import os
import uuid
import json
import subprocess
import shutil
import glob as globmod
import tempfile


SUPPORTED_FORMATS = {
    "doc", "docx", "html", "odt", "pdf", "ppt", "pptx", "rtf", "txt", "xlsx"
}

LIBREOFFICE_FORMAT_MAP = {
    "doc": "doc",
    "docx": "docx",
    "html": "html",
    "odt": "odt",
    "pdf": "pdf",
    "ppt": "ppt",
    "pptx": "pptx",
    "rtf": "rtf",
    "txt": "txt",
    "xlsx": "xlsx",
}

LIBREOFFICE_FILTER_MAP = {
    "doc": "MS Word 97",
    "docx": "MS Word 2007 XML",
    "html": "HTML (StarWriter)",
    "odt": "writer8",
    "pdf": "writer_pdf_Export",
    "ppt": "MS PowerPoint 97",
    "pptx": "Impress MS PowerPoint 2007 XML",
    "rtf": "Rich Text Format",
    "txt": "Text",
    "xlsx": "Calc MS Excel 2007 XML",
}

PDF_EXPORT_FILTERS = {
    "doc": "writer_pdf_Export",
    "docx": "writer_pdf_Export",
    "html": "writer_pdf_Export",
    "odt": "writer_pdf_Export",
    "rtf": "writer_pdf_Export",
    "txt": "writer_pdf_Export",
    "ppt": "impress_pdf_Export",
    "pptx": "impress_pdf_Export",
    "xlsx": "calc_pdf_Export",
}

PRESENTATION_INPUTS = {".ppt", ".pptx"}
SPREADSHEET_INPUTS = {".xls", ".xlsx"}
WRITER_INPUTS = {".doc", ".docx", ".odt", ".rtf", ".txt", ".html"}


def _parse_url_entry(url_entry):
    if isinstance(url_entry, str):
        try:
            parsed = json.loads(url_entry)
            if isinstance(parsed, dict):
                return parsed
        except (json.JSONDecodeError, TypeError):
            return {"path": url_entry, "name": os.path.splitext(os.path.basename(url_entry))[0], "ext": os.path.splitext(url_entry)[1]}
    elif isinstance(url_entry, dict):
        return url_entry
    return None


def _libreoffice_convert(input_path, output_dir, target_format, pdf_filter=None):
    user_profile = os.path.join("/tmp", f"lo_profile_{uuid.uuid4().hex}")
    os.makedirs(user_profile, exist_ok=True)

    # When a filter is specified, the --convert-to argument takes the form
    # "format:FilterName" (e.g. "pdf:writer_pdf_Export").  Build the value
    # explicitly so the command structure is clear and index-safe.
    convert_to_arg = f"{target_format}:{pdf_filter}" if pdf_filter else target_format

    cmd = [
        "libreoffice",
        "--headless",
        "--norestore",
        "--nolockcheck",
        f"-env:UserInstallation=file://{user_profile}",
        "--convert-to", convert_to_arg,
        "--outdir", output_dir,
        input_path
    ]

    try:
        result = subprocess.run(
            cmd,
            capture_output=True,
            text=True,
            timeout=300,
            env={**os.environ, "HOME": "/tmp"}
        )

        converted_files = globmod.glob(os.path.join(output_dir, f"*.{target_format}"))
        return converted_files[0] if converted_files else None
    except subprocess.TimeoutExpired:
        raise RuntimeError("Conversion timed out — the file may be too large or complex.")
    finally:
        shutil.rmtree(user_profile, ignore_errors=True)


def _convert_via_pdf_intermediate(input_path, output_dir, source_ext, target_format):
    pdf_filter = PDF_EXPORT_FILTERS.get(source_ext.lstrip(".").lower())
    pdf_path = _libreoffice_convert(input_path, output_dir, "pdf", pdf_filter)
    if not pdf_path:
        return None

    if target_format == "pdf":
        return pdf_path

    result = _libreoffice_convert(pdf_path, output_dir, target_format)

    try:
        if pdf_path and os.path.exists(pdf_path) and target_format != "pdf":
            os.remove(pdf_path)
    except OSError:
        pass

    return result


def _convert_pdf_to_pptx(input_path, output_dir, target_format):
    """
    Convert PDF → PPTX (or PPT) by:
      1. Rasterising every page with pdftoppm (poppler) at 150 DPI
      2. Assembling those images into a PPTX with python-pptx
      3. If target is .ppt, further convert the PPTX via LibreOffice
    This is far more reliable than LibreOffice's native PDF→PPTX for large files.
    """
    try:
        from pptx import Presentation
        from pptx.util import Inches, Pt
    except ImportError:
        return _libreoffice_convert(input_path, output_dir, target_format)

    pdftoppm_bin = shutil.which("pdftoppm")
    if not pdftoppm_bin:
        return _libreoffice_convert(input_path, output_dir, target_format)

    with tempfile.TemporaryDirectory() as page_dir:
        prefix = os.path.join(page_dir, "page")
        cmd = [
            pdftoppm_bin,
            "-jpeg",
            "-r", "150",
            input_path,
            prefix
        ]
        try:
            result = subprocess.run(
                cmd,
                capture_output=True,
                text=True,
                timeout=300,
                env={**os.environ, "HOME": "/tmp"}
            )
            if result.returncode != 0:
                return _libreoffice_convert(input_path, output_dir, target_format)
        except subprocess.TimeoutExpired:
            return None

        page_images = sorted(globmod.glob(os.path.join(page_dir, "*.jpg")))
        if not page_images:
            return _libreoffice_convert(input_path, output_dir, target_format)

        prs = Presentation()
        prs.slide_width  = Inches(10)
        prs.slide_height = Inches(7.5)
        blank_layout = prs.slide_layouts[6]

        for img_path in page_images:
            slide = prs.slides.add_slide(blank_layout)
            slide.shapes.add_picture(
                img_path,
                left=0, top=0,
                width=prs.slide_width,
                height=prs.slide_height
            )

        base_name = os.path.splitext(os.path.basename(input_path))[0]
        pptx_path = os.path.join(output_dir, f"{base_name}.pptx")
        prs.save(pptx_path)

    if not os.path.exists(pptx_path):
        return None

    if target_format == "ppt":
        ppt_path = _libreoffice_convert(pptx_path, output_dir, "ppt")
        try:
            os.remove(pptx_path)
        except OSError:
            pass
        return ppt_path

    return pptx_path


def _convert_pdf_to_docx_or_doc(input_path, output_dir, target_format):
    try:
        cmd_check = subprocess.run(
            ["ebook-convert", "--version"],
            capture_output=True, text=True, timeout=5
        )
        if cmd_check.returncode == 0:
            base_name = os.path.splitext(os.path.basename(input_path))[0]
            docx_out = os.path.join(output_dir, f"{base_name}.docx")

            result = subprocess.run(
                ["ebook-convert", input_path, docx_out],
                capture_output=True, text=True, timeout=300,
                env={**os.environ, "HOME": "/tmp"}
            )

            if result.returncode == 0 and os.path.exists(docx_out):
                if target_format == "doc":
                    doc_path = _libreoffice_convert(docx_out, output_dir, "doc")
                    try:
                        os.remove(docx_out)
                    except OSError:
                        pass
                    return doc_path
                return docx_out
    except (FileNotFoundError, subprocess.TimeoutExpired):
        pass

    return _libreoffice_convert(input_path, output_dir, target_format)


def _get_intermediate_format(source_ext, target_format):
    ext = source_ext.lstrip(".").lower()
    if ext == "html" and target_format in ("docx", "doc", "rtf"):
        return "odt"
    if ext == "html" and target_format in ("ppt", "pptx"):
        return "pdf"
    if ext in ("ppt", "pptx") and target_format in ("docx", "doc"):
        return "pdf"
    if ext in ("xls", "xlsx") and target_format in ("docx", "doc"):
        return "pdf"
    return None


def _convert_single(file_object, target_format, options, config):
    upload_dir = config.get("UPLOAD_DIR", "static/uploads")
    target_format = target_format.lower()

    file_path = file_object.get("path", "")
    file_name = file_object.get("name", "")
    file_ext  = file_object.get("ext", "")

    if not file_path:
        raise ValueError("No file path provided")

    input_path = os.path.join(upload_dir, file_path)
    if not os.path.exists(input_path):
        input_path = file_path
    if not os.path.exists(input_path):
        raise FileNotFoundError(f"File not found: {os.path.basename(file_path)}")

    # Derive name / ext from the actual file path if not supplied by caller
    if not file_ext:
        file_ext = os.path.splitext(input_path)[1]  # e.g. ".docx"
    if not file_name:
        file_name = os.path.splitext(os.path.basename(input_path))[0]

    output_folder = uuid.uuid4().hex
    output_dir = os.path.join(upload_dir, output_folder)
    os.makedirs(output_dir, exist_ok=True)

    source_ext = file_ext.lower()
    output_path = None

    if source_ext == ".pdf" and target_format in ("doc", "docx"):
        output_path = _convert_pdf_to_docx_or_doc(input_path, output_dir, target_format)

    elif source_ext == ".pdf" and target_format == "html":
        docx_path = _convert_pdf_to_docx_or_doc(input_path, output_dir, "docx")
        if docx_path and os.path.exists(docx_path):
            output_path = _libreoffice_convert(docx_path, output_dir, "html")
            try:
                os.remove(docx_path)
            except OSError:
                pass

    elif source_ext == ".pdf" and target_format in ("pptx", "ppt"):
        output_path = _convert_pdf_to_pptx(input_path, output_dir, target_format)

    elif source_ext in (".ppt", ".pptx") and target_format in ("doc", "docx", "odt", "rtf", "txt"):
        output_path = _convert_via_pdf_intermediate(input_path, output_dir, source_ext, target_format)

    elif source_ext in (".xls", ".xlsx") and target_format in ("doc", "docx", "odt"):
        output_path = _convert_via_pdf_intermediate(input_path, output_dir, source_ext, target_format)

    elif source_ext in (".xls", ".xlsx") and target_format == "xlsx":
        output_path = _libreoffice_convert(input_path, output_dir, target_format)

    elif source_ext == ".html" and target_format in ("ppt", "pptx"):
        # HTML → PDF first (writer_pdf_Export), then PDF → PPTX via python-pptx
        pdf_path = _libreoffice_convert(input_path, output_dir, "pdf", "writer_pdf_Export")
        if pdf_path and os.path.exists(pdf_path):
            output_path = _convert_pdf_to_pptx(pdf_path, output_dir, target_format)
            try:
                os.remove(pdf_path)
            except OSError:
                pass

    elif target_format == "pdf":
        pdf_filter = PDF_EXPORT_FILTERS.get(source_ext.lstrip("."), "writer_pdf_Export")
        output_path = _libreoffice_convert(input_path, output_dir, "pdf", pdf_filter)

    else:
        output_path = _libreoffice_convert(input_path, output_dir, target_format)

        if not output_path:
            intermediate = _get_intermediate_format(source_ext, target_format)
            if intermediate:
                inter_path = _libreoffice_convert(input_path, output_dir, intermediate)
                if inter_path and os.path.exists(inter_path):
                    output_path = _libreoffice_convert(inter_path, output_dir, target_format)
                    try:
                        os.remove(inter_path)
                    except OSError:
                        pass

    if not output_path or not os.path.exists(output_path):
        raise RuntimeError(f"Conversion failed for {os.path.basename(file_path)}")

    final_name = f"{file_name}.{target_format}"
    final_path = os.path.join(output_dir, final_name)
    if output_path != final_path:
        if os.path.exists(final_path):
            os.remove(final_path)
        os.rename(output_path, final_path)

    return final_path


def convert(urls, target_format, options, config):
    try:
        # TTS — delegate to tts_converter
        if options and isinstance(options, dict) and options.get("tts"):
            from converters import tts_converter
            return tts_converter.convert(urls, target_format, options, config)

        target_format_lower = target_format.lower()

        if target_format_lower not in SUPPORTED_FORMATS:
            return {"error": True, "message": f"Unsupported target format: {target_format}"}

        results = []
        errors = []

        for url_entry in urls:
            file_object = _parse_url_entry(url_entry)
            if not file_object:
                continue

            file_path = file_object.get("path", "")
            if not file_path or file_path == "empty":
                continue

            try:
                output_path = _convert_single(file_object, target_format_lower, options, config)
                results.append(output_path)
            except Exception as e:
                fname = os.path.basename(file_path) if file_path else "unknown"
                errors.append(f"Failed to convert {fname}: {str(e)}")

        if not results and errors:
            return {"error": True, "message": "; ".join(errors)}

        if not results and not errors:
            return {"error": True, "message": "No files were provided for conversion."}

        return {
            "error": False,
            "results": results,
            "output_path": results[0] if results else "",
            "errors": errors if errors else None
        }

    except Exception as e:
        return {"error": True, "message": f"Conversion failed: {str(e)}"}