import mimetypes
import os
import urllib.parse
import urllib.request

import weasyprint as wp

from paste.core.logging import echo_log


class Html2Pdf:
    """
    将 HTML 内容转换为 PDF 文件。
    """

    @classmethod
    def custom_url_fetcher(cls, url, timeout=30, **kwargs):
        """
        自定义 URL 加载器，增加超时时间
        """
        # 处理 file:// URLs
        if url.startswith('file://'):
            parsed = urllib.parse.urlparse(url)
            path = urllib.request.url2pathname(parsed.path)

            if not os.path.exists(path):
                raise ValueError(f"File not found: {path}")

            _mime_type, _ = mimetypes.guess_type(path)
            if not _mime_type:
                _mime_type = 'application/octet-stream'

            return {
                'mime_type': _mime_type,
                'encoding': 'binary',
                'filename': os.path.basename(path),
                'file_obj': open(path, 'rb'),
            }

        # 增加超时时间（默认是 30 秒）
        return wp.default_url_fetcher(url, timeout=timeout, **kwargs)

    @classmethod
    def write_pdf(cls, content, output_pdf=None, base_url=""):
        """
        将 HTML 转换为 PDF。

        :param content: HTML 字符串
        :param output_pdf: 输出的 PDF 文件路径，默认为空
        :param base_url: 跨域默认地址
        """
        try:
            # HTML 转换为 PDF
            _html = wp.HTML(string=content, url_fetcher=cls.custom_url_fetcher, base_url=base_url)
            _bytes = _html.write_pdf(output_pdf)
            if output_pdf:
                echo_log(f"PDF 已成功生成在: {output_pdf}.")
            else:
                echo_log(f"PDF 已成功生成.")
            return _bytes
        except Exception as e:
            echo_log(f"转换失败: {e}")
            raise e