import type { Node } from 'ultrahtml'
import { TEXT_NODE, parse } from 'ultrahtml'
import { decode } from 'tiny-decode'

export function htmlToText(html: string) {
  try {
    const tree = parse(html)
    return treeToText(tree)
  } catch (err) {
    console.error(err)
    return ''
  }
}

export function treeToText(input: Node): string {
  if (input.type === TEXT_NODE) {
    return decode(input.value)
  }

  let body = ''
  if ('children' in input) {
    body = (input.children as Node[]).map(n => treeToText(n)).join('')
  }

  if (input.name === 'br') {
    return '\n'
  }

  if (input.name === 'img') {
    const url = extractLargestSizeUrl(input.attributes.srcset) ?? input.attributes.src
    return `![${input.attributes.alt ?? ''}](${url})`
  }
  if (input.name === 'audio' || input.name === 'video') {
    return `![${input.attributes.alt ?? ''}](${input.attributes.src})`
  }

  if (!body) {
    return ''
  }

  if (['p', 'pre', 'div'].includes(input.name)) {
    return '\n' + body + '\n'
  }

  if (['ul'].includes(input.name)) {
    return input.attributes['data-media-viewer'] ? body : '\n' + body + '\n'
  }

  if (input.name === 'li' && input.parent?.attributes['data-media-viewer']) {
    return body
  }

  if (input.name === 'code') {
    if (input.parent?.name === 'pre') {
      const lang = input.attributes.class?.replace('language-', '')
      return `\`\`\`${lang || ''}\n` + body + '\n```'
    } else {
      return '`' + body + '`'
    }
  }

  if (input.name === 'a') {
    return '[' + cleanText(body) + `](${input.attributes.href})`
  }

  if (/h\d{1}/.test(input.name)) {
    const level = +input.name.toLowerCase().replace('h', '')
    return '\n\n' + Array(level).fill('#').join('') + ' ' + cleanText(body) + '\n'
  }

  if (input.name === 'li') {
    return '- ' + cleanText(body) + '\n'
  }

  if (input.name === 'b' || input.name === 'strong') {
    return '**' + cleanText(body) + '**'
  }

  if (input.name === 'i' || input.name === 'em') {
    return '*' + cleanText(body) + '*'
  }

  if (input.name === 'del') {
    return '~~' + cleanText(body) + '~~'
  }

  return body
}

function cleanText(s: string) {
  return s.replace('&nbsp;', ' ').trim().replace(/\s\s+/g, ' ')
}

function extractLargestSizeUrl(srcset: string) {
  if (!srcset) {
    return
  }
  const regex = /(?:\S+\s+)?(\S+)\s+(\d+)w(?:,|$)/g;
  let match
  let maxWidth = 0
  let largestSizeUrl = null
  while ((match = regex.exec(srcset)) !== null) {
    const url = match[1];
    const width = parseInt(match[2], 10)
    if (width > maxWidth) {
      maxWidth = width
      largestSizeUrl = url
    }
  }
  return largestSizeUrl
}
