// OCR PDF — render each page via pdf.js, run Tesseract OCR on the canvas,
// concatenate results into a downloadable .txt. Everything runs locally.

const OCR_PDF_PDFJS_URL    = 'https://cdn.jsdelivr.net/npm/pdfjs-dist@3.11.174/build/pdf.min.js';
const OCR_PDF_PDFJS_WORKER = 'https://cdn.jsdelivr.net/npm/pdfjs-dist@3.11.174/build/pdf.worker.min.js';
const OCR_PDF_TESSERACT    = 'https://unpkg.com/tesseract.js@5.1.1/dist/tesseract.min.js';

function ocrPdfEnsureTesseract() {
  if (window.Tesseract) return Promise.resolve(window.Tesseract);
  if (window.__tesseractLoading) return window.__tesseractLoading;
  window.__tesseractLoading = new Promise((resolve, reject) => {
    const s = document.createElement('script');
    s.src = OCR_PDF_TESSERACT;
    s.onload = () => resolve(window.Tesseract);
    s.onerror = () => reject(new Error('Failed to load Tesseract.js'));
    document.head.appendChild(s);
  });
  return window.__tesseractLoading;
}

const OCR_PDF_LANGUAGES = [
  { code: 'eng', name: 'English' },
  { code: 'tur', name: 'Türkçe' },
  { code: 'deu', name: 'Deutsch' },
  { code: 'fra', name: 'Français' },
  { code: 'spa', name: 'Español' },
  { code: 'ita', name: 'Italiano' },
  { code: 'nld', name: 'Nederlands' },
  { code: 'por', name: 'Português' },
  { code: 'rus', name: 'Русский' },
];

window.TOOL_HANDLERS['ocr-pdf'] = function OcrPdfTool() {
  const [file, setFile] = React.useState(null);
  const [lang, setLang] = React.useState('eng');
  const [pages, setPages] = React.useState([]); // { num, text, confidence }
  const [busy, setBusy] = React.useState(false);
  const [status, setStatus] = React.useState('');
  const [progress, setProgress] = React.useState(0);
  const [err, setErr] = React.useState('');

  const run = async (f, code) => {
    setBusy(true); setErr(''); setPages([]); setProgress(0);
    let worker = null;
    try {
      setStatus('Loading pdf.js…');
      await window.loadScript(OCR_PDF_PDFJS_URL);
      window.pdfjsLib.GlobalWorkerOptions.workerSrc = OCR_PDF_PDFJS_WORKER;

      setStatus('Loading OCR engine…');
      const Tesseract = await ocrPdfEnsureTesseract();
      worker = await Tesseract.createWorker(code, 1, {
        logger: (m) => {
          if (m.status === 'recognizing text' && typeof m.progress === 'number') {
            setStatus(`Recognizing… ${Math.round(m.progress * 100)}%`);
          }
        },
      });

      setStatus('Reading PDF…');
      const bytes = new Uint8Array(await f.arrayBuffer());
      const doc = await window.pdfjsLib.getDocument({ data: bytes }).promise;
      const total = doc.numPages;
      const out = [];

      for (let i = 1; i <= total; i++) {
        setStatus(`Page ${i} / ${total} — rendering…`);
        const page = await doc.getPage(i);
        // Use ~200 DPI equivalent for good OCR accuracy
        const viewport = page.getViewport({ scale: 2.2 });
        const canvas = document.createElement('canvas');
        canvas.width = viewport.width;
        canvas.height = viewport.height;
        const ctx = canvas.getContext('2d');
        ctx.fillStyle = '#ffffff';
        ctx.fillRect(0, 0, canvas.width, canvas.height);
        await page.render({ canvasContext: ctx, viewport }).promise;

        setStatus(`Page ${i} / ${total} — recognizing…`);
        const blob = await new Promise((resolve) => canvas.toBlob(resolve, 'image/png'));
        const { data } = await worker.recognize(blob);
        out.push({ num: i, text: (data.text || '').trim(), confidence: Math.round(data.confidence || 0) });
        setPages([...out]);
        setProgress(Math.round((i / total) * 100));
      }
      setStatus('Done');
    } catch (e) {
      setErr(e.message || String(e));
      setStatus('');
    } finally {
      if (worker) { try { await worker.terminate(); } catch {} }
      setBusy(false);
    }
  };

  const handleFile = (f) => { setFile(f); run(f, lang); };
  const changeLang = (code) => { setLang(code); if (file) run(file, code); };

  const allText = React.useMemo(
    () => pages.map((p) => `--- Page ${p.num} ---\n${p.text}`).join('\n\n'),
    [pages],
  );
  const avgConfidence = React.useMemo(() => {
    if (!pages.length) return 0;
    return Math.round(pages.reduce((s, p) => s + p.confidence, 0) / pages.length);
  }, [pages]);

  const download = () => {
    const blob = new Blob([allText], { type: 'text/plain;charset=utf-8' });
    window.downloadBlob(blob, (file?.name || 'ocr').replace(/\.pdf$/i, '') + '.txt');
  };
  const copy = () => navigator.clipboard.writeText(allText);
  const reset = () => { setFile(null); setPages([]); setErr(''); setStatus(''); setProgress(0); };

  if (!file) return (
    <div>
      <window.Dropzone onFile={handleFile} title="Drop a PDF here" hint="each page is rendered then OCR'd locally" accept="application/pdf,.pdf" />
      <div className="cmp-meta" style={{ textAlign: 'center', marginTop: 14 }}>
        First run downloads the OCR engine (~2 MB) and language data. Nothing leaves your browser.
      </div>
    </div>
  );

  return (
    <div className="mini-tool">
      <div className="mini-row">
        <div className="mini-field" style={{ flex: 1 }}>
          <label className="mini-label">Language</label>
          <select className="mini-input" value={lang} onChange={(e) => changeLang(e.target.value)} disabled={busy}>
            {OCR_PDF_LANGUAGES.map((l) => <option key={l.code} value={l.code}>{l.name}</option>)}
          </select>
        </div>
        <div className="mini-field" style={{ flex: 2 }}>
          <label className="mini-label">Progress</label>
          <div className="pw-bar"><div className="pw-fill" style={{ width: progress + '%', background: 'var(--id-brand-blue)' }} /></div>
          <div className="cmp-meta" style={{ marginTop: 6 }}>
            {status || (pages.length ? `${pages.length} page${pages.length === 1 ? '' : 's'} · avg ${avgConfidence}% confidence` : '')}
          </div>
        </div>
      </div>

      {err && <div style={{ marginTop: 10 }}><window.ToolError error={err} onRetry={() => file && run(file, lang)} /></div>}

      {pages.length > 0 && (
        <>
          <div className="mini-label" style={{ marginTop: 16 }}>Extracted text</div>
          <textarea
            className="mini-input mini-textarea"
            style={{ minHeight: 260, fontFamily: 'ui-monospace, monospace', fontSize: 13, whiteSpace: 'pre-wrap' }}
            value={allText}
            readOnly
          />
        </>
      )}

      <div className="cmp-actions">
        <button className="btn btn-secondary" onClick={reset} disabled={busy}>
          <window.Icon name="upload" size={16} /> Another PDF
        </button>
        <button className="btn btn-secondary" onClick={copy} disabled={!allText || busy}>
          <window.Icon name="doc" size={16} /> Copy text
        </button>
        <button className="btn btn-primary" onClick={download} disabled={!allText || busy}>
          <window.Icon name="download" size={16} /> Download .txt
        </button>
      </div>
    </div>
  );
};
