import type { Node } from 'ultrahtml'
import { TEXT_NODE, parse, render } from 'ultrahtml'
import type { mastodon } from '#types'
import { sanitize } from '#backend/html/sanitize'

// A tree transform function takes an ultrahtml Node object and returns new content that will replace the given node in the tree.
// Returning a null removes the node from the tree. Strings get converted to text nodes.
// The input node's children have been transformed before the node itself gets transformed.
type Transform = (node: Node, root: Node) => (Node | string)[] | Node | string | null

export interface ContentParseOptions {
  astTransforms?: Transform[]
  status?: mastodon.v1.Status
  inReplyToStatus?: mastodon.v1.Status
}

const sanitizerBasicClasses = filterClasses(/^(h-\S*|p-\S*|u-\S*|dt-\S*|e-\S*|mention|hashtag|ellipsis|invisible)$/u)

const sanitizer = sanitize({
  // Allow basic elements as seen in https://github.com/mastodon/mastodon/blob/17f79082b098e05b68d6f0d38fabb3ac121879a9/lib/sanitize_ext/sanitize_config.rb
  br: {},
  p: {},
  a: {
    href: filterHref(),
    class: sanitizerBasicClasses,
    rel: set('nofollow noopener noreferrer'),
    target: set('_blank'),
    'data-id': keep,
    'data-file': keep,
    'data-type': keep,
    'data-publish-id': keep,
    download: keep
  },
  span: {
    class: sanitizerBasicClasses
  },
  // Allow elements potentially created for Markdown code blocks above
  pre: {},
  code: {
    class: filterClasses(/^language-\w+$/)
  },
  // Other elements supported in glitch, as seen in
  // https://github.com/glitch-soc/mastodon/blob/13227e1dafd308dfe1a3effc3379b766274809b3/lib/sanitize_ext/sanitize_config.rb#L75
  abbr: {
    title: keep
  },
  del: {},
  blockquote: {
    cite: filterHref()
  },
  b: {},
  strong: {},
  u: {},
  sub: {},
  sup: {},
  i: {},
  em: {},
  h1: {},
  h2: {},
  h3: {},
  h4: {},
  h5: {},
  ul: {
    'data-media-viewer': keep,
    'data-type': keep
  },
  ol: {
    start: keep,
    reversed: keep
  },
  li: {
    value: keep
  },
  audio: {
    src: keep,
    'data-id': keep,
    'data-wave': keep,
    'data-duration': keep
  },
  video: {
    src: keep,
    'data-id': keep,
    'data-aspect': keep,
    'data-poster-time': keep,
    'data-poster-url': keep,
    'data-duration': keep
  },
  img: {
    src: keep,
    'data-id': keep,
    'data-aspect': keep
  }
})

/**
 * Parse raw HTML form Mastodon server to AST,
 * with interop of custom emojis and inline Markdown syntax
 */
export function parseMastodonHTML(html: string, options: ContentParseOptions = {}) {
  // Always sanitize the raw HTML data *after* it has been modified
  const transforms: Transform[] = [
    sanitizer,
    ...options.astTransforms || []
  ]
  transforms.push(transformParagraphs)
  return transformSync(parse(html), transforms)
}

/**
 * Converts raw HTML form Mastodon server to HTML for Tiptap editor
 */
export function convertMastodonHTML(html: string) {
  const tree = parseMastodonHTML(html)
  return render(tree)
}

// Helpers for transforming (filtering, modifying, ...) a parsed HTML tree
// by running the given chain of transform functions one-by-one.
function transformSync(doc: Node, transforms: Transform[]) {
  function visit(node: Node, transform: Transform, root: Node) {
    if (Array.isArray(node.children)) {
      const children = [] as (Node | string)[]
      for (let i = 0; i < node.children.length; i++) {
        const result = visit(node.children[i], transform, root)
        if (Array.isArray(result)) {
          children.push(...result)
        } else if (result) {
          children.push(result)
        }
      }
      node.children = children.map((value) => {
        if (typeof value === 'string') {
          return { type: TEXT_NODE, value, parent: node }
        }
        value.parent = node
        return value
      })
    }
    return transform(node, root)
  }

  for (const transform of transforms) {
    doc = visit(doc, transform, doc) as Node
  }

  return doc
}

function filterClasses(allowed: RegExp) {
  return (c: string | undefined) => {
    if (!c) {
      return undefined
    }

    return c.split(/\s/g).filter(cls => allowed.test(cls)).join(' ')
  }
}

function keep(value: string | undefined) {
  return value
}

function set(value: string) {
  return () => value
}

function filterHref() {
  const LINK_PROTOCOLS = new Set([
    'http:',
    'https:',
    'dat:',
    'dweb:',
    'ipfs:',
    'ipns:',
    'ssb:',
    'gopher:',
    'xmpp:',
    'magnet:',
    'gemini:'
  ])

  return (href: string | undefined) => {
    if (href === undefined) {
      return undefined
    }

    // Allow relative links
    if (href.startsWith('/') || href.startsWith('.')) {
      return href
    }

    let url
    try {
      url = new URL(href)
    } catch (err) {
      if (err instanceof TypeError) {
        return undefined
      }
      throw err
    }

    if (LINK_PROTOCOLS.has(url.protocol)) {
      return url.toString()
    }
    return '#'
  }
}

function transformParagraphs(node: Node): Node | Node[] {
  // // For top level paragraphs, inject an empty <p> to preserve status paragraphs in our editor (except for the last one)
  // if (node.parent?.type === DOCUMENT_NODE && node.name === 'p' && node.parent.children.at(-1) !== node) {
  //   return [node, h('p')]
  // }
  return node
}
