import mimetypes import os import urllib.parse import urllib.request import weasyprint as wp from paste.core.logging import echo_log class Html2Pdf: """ 将 HTML 内容转换为 PDF 文件。 """ @classmethod def custom_url_fetcher(cls, url, timeout=30, **kwargs): """ 自定义 URL 加载器,增加超时时间 """ # 处理 file:// URLs if url.startswith('file://'): parsed = urllib.parse.urlparse(url) path = urllib.request.url2pathname(parsed.path) if not os.path.exists(path): raise ValueError(f"File not found: {path}") _mime_type, _ = mimetypes.guess_type(path) if not _mime_type: _mime_type = 'application/octet-stream' return { 'mime_type': _mime_type, 'encoding': 'binary', 'filename': os.path.basename(path), 'file_obj': open(path, 'rb'), } # 增加超时时间(默认是 30 秒) return wp.default_url_fetcher(url, timeout=timeout, **kwargs) @classmethod def write_pdf(cls, content, output_pdf=None, base_url=""): """ 将 HTML 转换为 PDF。 :param content: HTML 字符串 :param output_pdf: 输出的 PDF 文件路径,默认为空 :param base_url: 跨域默认地址 """ try: # HTML 转换为 PDF _html = wp.HTML(string=content, url_fetcher=cls.custom_url_fetcher, base_url=base_url) _bytes = _html.write_pdf(output_pdf) if output_pdf: echo_log(f"PDF 已成功生成在: {output_pdf}.") else: echo_log(f"PDF 已成功生成.") return _bytes except Exception as e: echo_log(f"转换失败: {e}") raise e