// PDF to Excel — extract text from PDF pages via pdf.js, detect table structure
// by clustering text item positions, and write a real .xlsx workbook via SheetJS.

const PDFJS_URL_XL = 'https://cdn.jsdelivr.net/npm/pdfjs-dist@3.11.174/build/pdf.min.js';
const PDFJS_WORKER_XL = 'https://cdn.jsdelivr.net/npm/pdfjs-dist@3.11.174/build/pdf.worker.min.js';
const XLSX_URL_XL = 'https://cdn.jsdelivr.net/npm/xlsx@0.18.5/dist/xlsx.full.min.js';

window.TOOL_HANDLERS['pdf-to-excel'] = function PdfToExcelTool() {
  const [file, setFile] = React.useState(null);
  const [busy, setBusy] = React.useState(false);
  const [status, setStatus] = React.useState('');
  const [progress, setProgress] = React.useState(0);
  const [err, setErr] = React.useState('');
  const [pageData, setPageData] = React.useState(null);   // array of { rows: string[][], cols: number }
  const [mergePages, setMergePages] = React.useState(false);
  const [previewPage, setPreviewPage] = React.useState(0); // index into pageData

  // --- Column detection helpers ---

  function clusterPositions(positions, tolerance) {
    if (positions.length === 0) return [];
    const sorted = [...positions].sort((a, b) => a - b);
    const clusters = [{ sum: sorted[0], count: 1, center: sorted[0] }];
    for (let i = 1; i < sorted.length; i++) {
      const last = clusters[clusters.length - 1];
      if (sorted[i] - last.center <= tolerance) {
        last.sum += sorted[i];
        last.count++;
        last.center = last.sum / last.count;
      } else {
        clusters.push({ sum: sorted[i], count: 1, center: sorted[i] });
      }
    }
    return clusters.map(c => c.center);
  }

  function nearestCol(x, colBounds) {
    let best = 0, bestDist = Math.abs(x - colBounds[0]);
    for (let i = 1; i < colBounds.length; i++) {
      const d = Math.abs(x - colBounds[i]);
      if (d < bestDist) { bestDist = d; best = i; }
    }
    return best;
  }

  function extractTable(items) {
    if (items.length === 0) return [];

    // Collect positions
    const entries = items
      .filter(it => it.str && it.str.trim())
      .map(it => ({
        x: it.transform[4],
        y: it.transform[5],
        text: it.str,
        w: it.width,
        h: it.height || Math.abs(it.transform[3]) || 12,
      }));

    if (entries.length === 0) return [];

    // Group into rows by Y (PDF Y is bottom-up, so sort descending for top-to-bottom)
    const yTolerance = 3;
    const ySorted = [...entries].sort((a, b) => b.y - a.y);
    const rows = [];
    let currentRow = [ySorted[0]];
    let currentY = ySorted[0].y;

    for (let i = 1; i < ySorted.length; i++) {
      if (Math.abs(ySorted[i].y - currentY) <= yTolerance) {
        currentRow.push(ySorted[i]);
      } else {
        rows.push(currentRow);
        currentRow = [ySorted[i]];
        currentY = ySorted[i].y;
      }
    }
    rows.push(currentRow);

    // Detect columns: collect all X positions, cluster nearby ones
    const allX = entries.map(e => e.x);
    const colBounds = clusterPositions(allX, 10);

    if (colBounds.length === 0) return rows.map(r => [r.map(it => it.text).join(' ')]);

    // Build 2D array
    const result = [];
    for (const row of rows) {
      const cells = new Array(colBounds.length).fill('');
      // Sort items within the row by X so left-to-right concatenation works
      row.sort((a, b) => a.x - b.x);
      for (const item of row) {
        const ci = nearestCol(item.x, colBounds);
        cells[ci] = cells[ci] ? cells[ci] + ' ' + item.text : item.text;
      }
      result.push(cells);
    }

    // Trim trailing empty columns
    let maxCol = 0;
    for (const row of result) {
      for (let c = row.length - 1; c >= 0; c--) {
        if (row[c].trim()) { maxCol = Math.max(maxCol, c); break; }
      }
    }
    return result.map(r => r.slice(0, maxCol + 1));
  }

  // --- Main extraction ---

  const run = async (f) => {
    setBusy(true); setErr(''); setPageData(null); setPreviewPage(0); setProgress(0);
    try {
      setStatus('Loading libraries\u2026');
      await Promise.all([
        window.loadScript(PDFJS_URL_XL),
        window.loadScript(XLSX_URL_XL),
      ]);
      window.pdfjsLib.GlobalWorkerOptions.workerSrc = PDFJS_WORKER_XL;

      setStatus('Reading PDF\u2026');
      const bytes = new Uint8Array(await f.arrayBuffer());
      const doc = await window.pdfjsLib.getDocument({ data: bytes }).promise;
      const allPages = [];

      for (let i = 1; i <= doc.numPages; i++) {
        setStatus('Extracting page ' + i + ' / ' + doc.numPages + '\u2026');
        setProgress(Math.round((i / doc.numPages) * 100));
        const page = await doc.getPage(i);
        const content = await page.getTextContent();
        const rows = extractTable(content.items);
        const cols = rows.length > 0 ? Math.max(...rows.map(r => r.length)) : 0;
        allPages.push({ rows, cols });
      }

      setPageData(allPages);
      setStatus('');
      setProgress(100);
    } catch (e) {
      setErr(e.message || String(e));
      setStatus('');
    } finally {
      setBusy(false);
    }
  };

  const handleFile = (f) => {
    if (!f) return;
    setFile(f);
    run(f);
  };

  // --- Build and download XLSX ---

  const download = () => {
    if (!pageData || pageData.length === 0) return;
    const wb = window.XLSX.utils.book_new();

    if (mergePages) {
      const merged = [];
      for (let i = 0; i < pageData.length; i++) {
        for (const row of pageData[i].rows) merged.push(row);
      }
      const ws = window.XLSX.utils.aoa_to_sheet(merged);
      window.XLSX.utils.book_append_sheet(wb, ws, 'All Pages');
    } else {
      for (let i = 0; i < pageData.length; i++) {
        const ws = window.XLSX.utils.aoa_to_sheet(pageData[i].rows.length > 0 ? pageData[i].rows : [['']]);
        window.XLSX.utils.book_append_sheet(wb, ws, 'Page ' + (i + 1));
      }
    }

    const out = window.XLSX.write(wb, { bookType: 'xlsx', type: 'array' });
    const blob = new Blob([out], { type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' });
    const base = file.name.replace(/\.pdf$/i, '');
    window.downloadBlob(blob, base + '.xlsx');
  };

  // --- Stats ---

  const totalRows = pageData ? pageData.reduce((s, p) => s + p.rows.length, 0) : 0;
  const maxCols = pageData ? Math.max(0, ...pageData.map(p => p.cols)) : 0;
  const nonEmptyPages = pageData ? pageData.filter(p => p.rows.length > 0).length : 0;

  // --- Render ---

  if (!file) {
    return (
      <window.Dropzone
        onFile={handleFile}
        title="Drop a PDF here"
        hint="extract tables \u2192 .xlsx"
        accept="application/pdf,.pdf"
      />
    );
  }

  const preview = pageData && pageData[previewPage] ? pageData[previewPage] : null;
  const previewRows = preview ? preview.rows.slice(0, 20) : [];

  return (
    <div className="mini-tool">
      {/* File info */}
      <div style={{ padding: 12, background: 'var(--id-surface-alt)', borderRadius: 10, marginBottom: 14 }}>
        <strong>{file.name}</strong>
        <div className="cmp-meta">{window.fmtBytes(file.size)}</div>
      </div>

      {/* Progress */}
      {busy && (
        <div style={{ marginBottom: 14 }}>
          <div className="cmp-meta" style={{ marginBottom: 6 }}>{status}</div>
          <div style={{ height: 6, borderRadius: 3, background: 'var(--id-border)', overflow: 'hidden' }}>
            <div style={{
              height: '100%', width: progress + '%', borderRadius: 3,
              background: 'var(--id-accent, #4f46e5)', transition: 'width 0.3s ease'
            }} />
          </div>
        </div>
      )}

      {/* Error */}
      {err && (
        <window.ToolError
          error={err}
          hint="Make sure the PDF is not encrypted or corrupted."
          onRetry={() => run(file)}
        />
      )}

      {/* Results */}
      {pageData && !err && (
        <>
          {/* Stats */}
          <div style={{
            display: 'flex', gap: 12, flexWrap: 'wrap', marginBottom: 14,
            padding: '10px 14px', background: 'var(--id-surface-alt)', borderRadius: 10,
          }}>
            <div>
              <div style={{ fontSize: 20, fontWeight: 700, color: 'var(--id-text)' }}>{pageData.length}</div>
              <div className="cmp-meta">page{pageData.length !== 1 ? 's' : ''}</div>
            </div>
            <div>
              <div style={{ fontSize: 20, fontWeight: 700, color: 'var(--id-text)' }}>{totalRows}</div>
              <div className="cmp-meta">row{totalRows !== 1 ? 's' : ''}</div>
            </div>
            <div>
              <div style={{ fontSize: 20, fontWeight: 700, color: 'var(--id-text)' }}>{maxCols}</div>
              <div className="cmp-meta">column{maxCols !== 1 ? 's' : ''}</div>
            </div>
            {nonEmptyPages < pageData.length && (
              <div>
                <div style={{ fontSize: 20, fontWeight: 700, color: 'var(--id-text-muted, #999)' }}>{pageData.length - nonEmptyPages}</div>
                <div className="cmp-meta">empty</div>
              </div>
            )}
          </div>

          {/* Options */}
          <div className="mini-row" style={{ marginBottom: 14, display: 'flex', alignItems: 'center', gap: 10 }}>
            <label className="mini-label" style={{ display: 'flex', alignItems: 'center', gap: 6, cursor: 'pointer', userSelect: 'none' }}>
              <input
                type="checkbox"
                checked={mergePages}
                onChange={(e) => setMergePages(e.target.checked)}
              />
              Merge all pages into one sheet
            </label>
          </div>

          {/* Page selector for preview */}
          {pageData.length > 1 && (
            <div style={{ display: 'flex', gap: 6, flexWrap: 'wrap', marginBottom: 10 }}>
              {pageData.map((_, i) => (
                <button
                  key={i}
                  className={'filter-pill' + (i === previewPage ? ' active' : '')}
                  onClick={() => setPreviewPage(i)}
                >
                  Page {i + 1}
                  {pageData[i].rows.length === 0 ? ' (empty)' : ''}
                </button>
              ))}
            </div>
          )}

          {/* Preview table */}
          {previewRows.length > 0 ? (
            <div style={{ overflow: 'auto', border: '1px solid var(--id-border)', borderRadius: 10, marginBottom: 14, maxHeight: 360 }}>
              <table style={{ width: '100%', borderCollapse: 'collapse', fontSize: 12 }}>
                <tbody>
                  {previewRows.map((row, ri) => (
                    <tr key={ri}>
                      {row.map((cell, ci) => (
                        <td key={ci} style={{
                          padding: '6px 10px',
                          borderBottom: '1px solid var(--id-border)',
                          background: ri === 0 ? 'var(--id-surface-alt)' : 'transparent',
                          fontWeight: ri === 0 ? 700 : 400,
                          whiteSpace: 'nowrap',
                        }}>{cell}</td>
                      ))}
                    </tr>
                  ))}
                </tbody>
              </table>
              {preview.rows.length > 20 && (
                <div className="cmp-meta" style={{ padding: 8 }}>&hellip; and {preview.rows.length - 20} more rows</div>
              )}
            </div>
          ) : (
            <div className="cmp-meta" style={{ marginBottom: 14, padding: '16px 0', textAlign: 'center' }}>
              No text content found on this page.
            </div>
          )}

          {/* Actions */}
          <div className="cmp-actions">
            <button className="btn btn-secondary" onClick={() => { setFile(null); setPageData(null); setErr(''); }}>
              <window.Icon name="upload" size={16} /> Another PDF
            </button>
            <button className="btn btn-primary" onClick={download} disabled={totalRows === 0}>
              <window.Icon name="download" size={16} /> Download XLSX
            </button>
          </div>
        </>
      )}
    </div>
  );
};
