// PDF to Word — extract text from PDF pages via pdf.js and build a DOCX via the docx library.

const PDF2W_PDFJS_URL = 'https://cdn.jsdelivr.net/npm/pdfjs-dist@3.11.174/build/pdf.min.js';
const PDF2W_PDFJS_WORKER = 'https://cdn.jsdelivr.net/npm/pdfjs-dist@3.11.174/build/pdf.worker.min.js';
const PDF2W_DOCX_URL = 'https://cdn.jsdelivr.net/npm/docx@9.1.1/build/index.umd.min.js';

window.TOOL_HANDLERS['pdf-to-word'] = function PdfToWordTool() {
  const [file, setFile] = React.useState(null);
  const [libsReady, setLibsReady] = React.useState(false);
  const [busy, setBusy] = React.useState(false);
  const [progress, setProgress] = React.useState('');
  const [result, setResult] = React.useState(null); // { blob, pageCount, wordCount, preview }
  const [err, setErr] = React.useState('');

  // Load CDN libraries once a file is picked
  const loadLibs = async () => {
    if (window.pdfjsLib && window.docx) { setLibsReady(true); return; }
    setProgress('Loading pdf.js\u2026');
    await window.loadScript(PDF2W_PDFJS_URL);
    window.pdfjsLib.GlobalWorkerOptions.workerSrc = PDF2W_PDFJS_WORKER;
    setProgress('Loading docx library\u2026');
    await window.loadScript(PDF2W_DOCX_URL);
    setLibsReady(true);
  };

  // ---- text extraction helpers ----

  // Group text items by Y position to reconstruct lines, then paragraphs.
  const extractPageText = (textContent) => {
    const items = textContent.items.filter((it) => it.str && it.str.trim().length > 0);
    if (items.length === 0) return [];

    // Sort items top-to-bottom (higher Y = higher on page in PDF coords, so descending),
    // then left-to-right within the same line.
    const sorted = items.slice().sort((a, b) => {
      const dy = b.transform[5] - a.transform[5];
      if (Math.abs(dy) > 2) return dy > 0 ? -1 : 1; // different lines
      return a.transform[4] - b.transform[4]; // same line, left to right
    });

    // Group into lines (items within 2px Y tolerance)
    const lines = [];
    let curLine = [sorted[0]];
    for (let i = 1; i < sorted.length; i++) {
      const prevY = curLine[0].transform[5];
      const curY = sorted[i].transform[5];
      if (Math.abs(curY - prevY) <= 2) {
        curLine.push(sorted[i]);
      } else {
        lines.push(curLine);
        curLine = [sorted[i]];
      }
    }
    lines.push(curLine);

    // Build line objects with text and metadata
    const lineObjs = lines.map((items) => {
      // Sort items left to right within line
      items.sort((a, b) => a.transform[4] - b.transform[4]);
      const text = items.map((it) => it.str).join(' ').replace(/\s+/g, ' ').trim();
      const avgHeight = items.reduce((s, it) => s + (it.height || 12), 0) / items.length;
      const maxHeight = Math.max(...items.map((it) => it.height || 12));
      const y = items[0].transform[5];
      const bold = items.some((it) => it.fontName && /bold/i.test(it.fontName));
      return { text, avgHeight, maxHeight, y, bold };
    });

    // Group lines into paragraphs: a gap larger than 1.3x line height = new paragraph
    const paragraphs = [];
    let curPara = [lineObjs[0]];
    for (let i = 1; i < lineObjs.length; i++) {
      const gap = Math.abs(lineObjs[i - 1].y - lineObjs[i].y);
      const lineH = lineObjs[i - 1].avgHeight || 12;
      if (gap > lineH * 1.3) {
        paragraphs.push(curPara);
        curPara = [lineObjs[i]];
      } else {
        curPara.push(lineObjs[i]);
      }
    }
    paragraphs.push(curPara);

    return paragraphs.map((lines) => {
      const text = lines.map((l) => l.text).join(' ');
      const maxH = Math.max(...lines.map((l) => l.maxHeight));
      const bold = lines.some((l) => l.bold);
      return { text, fontSize: maxH, bold };
    });
  };

  // Determine heading level from font size relative to body text
  const detectHeading = (fontSize, medianSize) => {
    if (!fontSize || !medianSize) return null;
    const ratio = fontSize / medianSize;
    if (ratio >= 1.8) return 'HEADING_1';
    if (ratio >= 1.4) return 'HEADING_2';
    if (ratio >= 1.15) return 'HEADING_3';
    return null;
  };

  // ---- main conversion ----

  const convert = async (f) => {
    setBusy(true); setErr(''); setResult(null);
    try {
      await loadLibs();

      setProgress('Reading PDF\u2026');
      const bytes = new Uint8Array(await f.arrayBuffer());
      const doc = await window.pdfjsLib.getDocument({ data: bytes }).promise;
      const pageCount = doc.numPages;

      // Extract text from all pages
      const allPages = []; // array of arrays of { text, fontSize, bold }
      for (let i = 1; i <= pageCount; i++) {
        setProgress('Extracting text \u2014 page ' + i + ' / ' + pageCount + '\u2026');
        const page = await doc.getPage(i);
        const tc = await page.getTextContent();
        allPages.push(extractPageText(tc));
      }

      // Compute median font size across all paragraphs for heading detection
      const allSizes = allPages.flat().map((p) => p.fontSize).filter(Boolean);
      allSizes.sort((a, b) => a - b);
      const medianSize = allSizes.length > 0 ? allSizes[Math.floor(allSizes.length / 2)] : 12;

      setProgress('Building DOCX\u2026');

      const { Document, Paragraph, TextRun, Packer, HeadingLevel, PageBreak, AlignmentType } = window.docx;

      // Build sections (one per page)
      const sections = allPages.map((paragraphs, pageIdx) => {
        const children = [];

        if (paragraphs.length === 0) {
          children.push(new Paragraph({ children: [new TextRun('')] }));
        }

        paragraphs.forEach((para, pIdx) => {
          const heading = detectHeading(para.fontSize, medianSize);
          const opts = {};

          if (heading) {
            opts.heading = HeadingLevel[heading];
          }

          const runOpts = { text: para.text };
          if (para.bold && !heading) {
            runOpts.bold = true;
          }
          // Scale non-heading large text
          if (!heading && para.fontSize && para.fontSize > medianSize * 1.05) {
            runOpts.size = Math.round(para.fontSize * 2); // docx uses half-points
          }

          opts.children = [new TextRun(runOpts)];
          children.push(new Paragraph(opts));
        });

        // Add page break between pages (not after the last)
        if (pageIdx < allPages.length - 1) {
          children.push(new Paragraph({ children: [new PageBreak()] }));
        }

        return { children };
      });

      const docxDoc = new Document({
        sections: sections
      });

      const blob = await Packer.toBlob(docxDoc);

      // Stats
      const allText = allPages.flat().map((p) => p.text).join(' ');
      const wordCount = allText.split(/\s+/).filter(Boolean).length;
      const preview = allText.slice(0, 500);

      setResult({ blob, pageCount, wordCount, preview });
      setProgress('');
    } catch (e) {
      setErr(e.message || String(e));
      setProgress('');
    } finally {
      setBusy(false);
    }
  };

  const handleFile = (f) => {
    setFile(f);
    convert(f);
  };

  const download = () => {
    if (!result) return;
    const name = file.name.replace(/\.pdf$/i, '') + '.docx';
    window.downloadBlob(result.blob, name);
  };

  const reset = () => {
    setFile(null);
    setResult(null);
    setErr('');
    setProgress('');
  };

  // --- render ---

  if (!file) {
    return (
      <window.Dropzone
        onFile={handleFile}
        title="Drop a PDF here"
        hint="extracts text and builds a .docx"
        accept="application/pdf,.pdf"
      />
    );
  }

  if (busy) {
    return <window.LoadingCard label={progress || 'Processing\u2026'} sub={file.name} />;
  }

  if (err) {
    return (
      <window.ToolError
        error={err}
        hint="Make sure the PDF is not encrypted and contains selectable text."
        onRetry={() => convert(file)}
      />
    );
  }

  if (result) {
    return (
      <div className="mini-tool">
        {/* Stats */}
        <div style={{ display: 'grid', gridTemplateColumns: '1fr 1fr 1fr', gap: 12, marginBottom: 16 }}>
          {[
            ['Pages', result.pageCount],
            ['Words', result.wordCount.toLocaleString()],
            ['Size', window.fmtBytes(result.blob.size)]
          ].map(([label, val]) => (
            <div key={label} style={{
              textAlign: 'center', padding: '14px 8px',
              background: 'var(--id-surface)', borderRadius: 10,
              border: '1px solid var(--id-border)'
            }}>
              <div style={{ fontSize: 22, fontWeight: 700, color: 'var(--id-text)' }}>{val}</div>
              <div className="cmp-meta">{label}</div>
            </div>
          ))}
        </div>

        {/* Text preview */}
        <div className="mini-label">Text preview</div>
        <div style={{
          padding: 14, background: 'var(--id-surface)', border: '1px solid var(--id-border)',
          borderRadius: 10, maxHeight: 200, overflow: 'auto',
          fontSize: 13, lineHeight: 1.6, color: 'var(--id-text-secondary)',
          whiteSpace: 'pre-wrap', wordBreak: 'break-word'
        }}>
          {result.preview}{result.preview.length >= 500 ? '\u2026' : ''}
        </div>

        {/* Actions */}
        <div className="cmp-actions">
          <button className="btn btn-secondary" onClick={reset}>
            <window.Icon name="upload" size={16} /> Another PDF
          </button>
          <button className="btn btn-primary" onClick={download}>
            <window.Icon name="download" size={16} /> Download DOCX
          </button>
        </div>
      </div>
    );
  }

  return null;
};
