首次提交
This commit is contained in:
@@ -0,0 +1,185 @@
|
||||
import base64
|
||||
import binascii
|
||||
import datetime
|
||||
import decimal
|
||||
import json
|
||||
import re
|
||||
import zlib
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
from pandas._libs.tslibs.nattype import NaTType
|
||||
|
||||
from paste.db.baseadapter import BaseAdapter
|
||||
from paste.db.basemodel import LOCAL_DATETIME_FORMAT, LOCAL_DATE_FORMAT, LOCAL_TIME_FORMAT
|
||||
|
||||
|
||||
class JsonDumpsEncoder(json.JSONEncoder):
|
||||
"""
|
||||
JSON 转字符串时对一些特殊类型进行转码(编码)方法。
|
||||
"""
|
||||
|
||||
def default(self, obj):
|
||||
if isinstance(obj, NaTType):
|
||||
return ''
|
||||
elif isinstance(obj, np.ndarray):
|
||||
return obj.tolist()
|
||||
elif isinstance(obj, np.integer):
|
||||
return int(obj)
|
||||
elif isinstance(obj, (np.floating, decimal.Decimal)):
|
||||
return float(obj)
|
||||
elif isinstance(obj, bytes):
|
||||
return obj.decode(encoding='utf-8', errors='ignore')
|
||||
elif isinstance(obj, datetime.datetime):
|
||||
return obj.strftime(LOCAL_DATETIME_FORMAT)
|
||||
elif isinstance(obj, datetime.date):
|
||||
return obj.strftime(LOCAL_DATE_FORMAT)
|
||||
elif isinstance(obj, datetime.time):
|
||||
return obj.strftime(LOCAL_TIME_FORMAT)
|
||||
elif isinstance(obj, BaseAdapter):
|
||||
return obj.to_dict()
|
||||
|
||||
return super().default(obj)
|
||||
|
||||
|
||||
class BaseX:
|
||||
"""
|
||||
Base 编码解码方法,主要用于解码,针对编码数据自动检测编码类型。
|
||||
能根据编码方式自动选择解码方法,同时在解码后尝试执行标准 Zip 解压。
|
||||
"""
|
||||
|
||||
Type_Base16 = 'b16'
|
||||
Type_Base32 = 'b32'
|
||||
Type_Base64 = 'b64'
|
||||
Type_Base85 = 'b85'
|
||||
|
||||
Decoders = {
|
||||
Type_Base16: base64.b16decode,
|
||||
Type_Base32: base64.b32decode,
|
||||
Type_Base64: base64.b64decode,
|
||||
Type_Base85: base64.b85decode,
|
||||
}
|
||||
"""
|
||||
解码器。
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def base_x_detect(cls, data: Union[bytes, str]):
|
||||
"""
|
||||
检测采用的内置 Base 编码种类。返回数据与以下编码方式对应::
|
||||
|
||||
1、b16: Base16
|
||||
2、b32: Base32
|
||||
3、b64: Base64
|
||||
4、b85: Base85
|
||||
|
||||
:param data: Base 编码数据,允许为字节流或字符串
|
||||
:return: 编码名称,全小写
|
||||
"""
|
||||
if isinstance(data, bytes):
|
||||
try:
|
||||
data = data.decode()
|
||||
except (UnicodeDecodeError, Exception):
|
||||
return
|
||||
|
||||
try:
|
||||
_reg = re.compile("^[0-9A-F=]+$")
|
||||
if _reg.match(data) is not None:
|
||||
return cls.Type_Base16
|
||||
except (re.error, Exception):
|
||||
pass
|
||||
|
||||
try:
|
||||
_reg = re.compile("^[A-Z2-7=]+$")
|
||||
if _reg.match(data) is not None:
|
||||
return cls.Type_Base32
|
||||
except (re.error, Exception):
|
||||
pass
|
||||
|
||||
try:
|
||||
_reg = re.compile("^[A-Za-z0-9+/=]+$")
|
||||
if _reg.match(data) is not None:
|
||||
return cls.Type_Base64
|
||||
except (re.error, Exception):
|
||||
pass
|
||||
|
||||
try:
|
||||
_reg = re.compile("^[A-Za-z0-9!#$%&()*+-;<=>?@^_`{|}~']+$")
|
||||
if _reg.match(data) is not None:
|
||||
return cls.Type_Base85
|
||||
except (re.error, Exception):
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def base_x_decode(cls, data: Union[bytes, str], base_type: str = None):
|
||||
"""
|
||||
自动检测编码种类后,解码 Base 编码数据。参数 base_type 与以下编码方式对应::
|
||||
|
||||
1、b16: Base16
|
||||
2、b32: Base32
|
||||
3、b64: Base64
|
||||
4、b85: Base85
|
||||
|
||||
若在解码过程中发生异常,则返回原始数据。
|
||||
|
||||
:param data: Base 编码数据,允许为字节流或字符串
|
||||
:param base_type: Base 编码类型,如为 b64 则代表须用 Base64 解码
|
||||
:return: 解码后数据
|
||||
"""
|
||||
_res_data = b''
|
||||
if isinstance(data, bytes):
|
||||
try:
|
||||
_tmp_data = data.decode()
|
||||
_res_data = _tmp_data
|
||||
except (UnicodeDecodeError, Exception):
|
||||
return data
|
||||
else:
|
||||
_res_data = data
|
||||
|
||||
if base_type not in cls.Decoders:
|
||||
# 检测 Base 编码种类
|
||||
base_type = cls.base_x_detect(_res_data)
|
||||
|
||||
if base_type in cls.Decoders:
|
||||
try:
|
||||
# 尝试 BaseX 解码
|
||||
_decoder = cls.Decoders.get(base_type)
|
||||
_tmp_data = _decoder(_res_data)
|
||||
_res_data = _tmp_data
|
||||
except (binascii.Error, UnicodeDecodeError):
|
||||
return data
|
||||
|
||||
return _res_data
|
||||
|
||||
@classmethod
|
||||
def auto_decode_unzip(cls, data: Union[bytes, str], base_type: str = None):
|
||||
"""
|
||||
对参数尝试执行自动 BaseX 解码 和 Zip 解压::
|
||||
|
||||
1、若能 BaseX 解码,则执行解码,否则保持原始数据不变。
|
||||
2、若能 Zip 解压,则执行解压,否则保持上一层数据不变。
|
||||
|
||||
若各种解码方法都无法顺利解码,或在解码过程中发生异常,则返回原始数据。
|
||||
|
||||
参数 base_type 与以下编码方式对应::
|
||||
|
||||
1、b16: Base16
|
||||
2、b32: Base32
|
||||
3、b64: Base64
|
||||
4、b85: Base85
|
||||
|
||||
:param data: Base64 数据,允许为字节流或字符串
|
||||
:param base_type: Base 编码类型,如为 b64 则代表须用 Base64 解码
|
||||
:return: 解码后数据
|
||||
"""
|
||||
# 尝试 BaseX 解码
|
||||
_res_data = cls.base_x_decode(data, base_type)
|
||||
|
||||
try:
|
||||
# 尝试 Zip 解压
|
||||
_tmp_data = zlib.decompress(_res_data)
|
||||
_res_data = _tmp_data
|
||||
except (zlib.error, TypeError):
|
||||
return _res_data
|
||||
|
||||
return _res_data
|
||||
@@ -0,0 +1,125 @@
|
||||
"""
|
||||
基础分页程序,处理分页计算,后续应当扩展其功能。
|
||||
"""
|
||||
|
||||
|
||||
class Pagination:
|
||||
"""
|
||||
分页程序。
|
||||
"""
|
||||
|
||||
def __init__(self, row_count: int):
|
||||
"""
|
||||
初始化分页。
|
||||
|
||||
:param row_count: 总记录行数
|
||||
"""
|
||||
self._offset = 0
|
||||
"""
|
||||
偏移量。
|
||||
"""
|
||||
|
||||
self._pages = -1
|
||||
"""
|
||||
总页数。
|
||||
"""
|
||||
|
||||
self._page_number = 1
|
||||
"""
|
||||
当前页码。
|
||||
"""
|
||||
|
||||
self.row_count = row_count
|
||||
"""
|
||||
数据行数。
|
||||
"""
|
||||
|
||||
self.page_size = 20
|
||||
"""
|
||||
每页显示的数据量。默认 20 行每页。
|
||||
"""
|
||||
|
||||
@property
|
||||
def page_count(self):
|
||||
"""
|
||||
取得页数。该属性必须在调用 :meth:`.pages` 方法后调用,例如::
|
||||
|
||||
>>> self.pages()
|
||||
>>> self.page_count
|
||||
|
||||
:return: 页数
|
||||
"""
|
||||
return self._pages
|
||||
|
||||
@property
|
||||
def page_number(self):
|
||||
"""
|
||||
取得当前页码。该属性必须在调用 :meth:`.number` 方法后调用, 例如::
|
||||
|
||||
>>> self.number(3)
|
||||
>>> self.page_number
|
||||
|
||||
:return: 页码
|
||||
"""
|
||||
return self._page_number
|
||||
|
||||
@property
|
||||
def offset_size(self):
|
||||
"""
|
||||
取得偏移量。
|
||||
"""
|
||||
return self._offset
|
||||
|
||||
def pages(self, page_size: int = 20):
|
||||
"""
|
||||
计算页数。
|
||||
|
||||
:param page_size: 每页行数,必须处于 [1, 1000] 区间中。若不在此区间,则强制转换到此区间。默认每页 20 条。
|
||||
:return: 计算取得的页数。
|
||||
"""
|
||||
page_size = 1 if page_size < 1 else page_size
|
||||
page_size = 1000 if page_size > 1000 else page_size
|
||||
self.page_size = page_size
|
||||
|
||||
if self.row_count == 0:
|
||||
self._pages = 1
|
||||
else:
|
||||
_v1 = self.row_count / page_size
|
||||
_v2 = self.row_count // page_size
|
||||
self._pages = _v2 if _v1 == _v2 else _v2 + 1
|
||||
|
||||
return self._pages
|
||||
|
||||
def number(self, page_number: int):
|
||||
"""
|
||||
检查页码范围。
|
||||
|
||||
:param page_number: 页码
|
||||
:return: 正确页码
|
||||
"""
|
||||
_pages = self.pages(self.page_size)
|
||||
self._page_number = 1 if page_number < 1 else page_number
|
||||
self._page_number = _pages if self._page_number > _pages else self._page_number
|
||||
return self._page_number
|
||||
|
||||
def offset(self, page_number: int):
|
||||
"""
|
||||
偏移量。
|
||||
|
||||
:param page_number: 页码
|
||||
:return: 偏移量
|
||||
"""
|
||||
self._offset = self.page_size * (page_number - 1)
|
||||
return self._offset
|
||||
|
||||
def paging(self, page_number: int = 1, page_size: int = 20):
|
||||
"""
|
||||
分页计算,支持链式调用。
|
||||
|
||||
:params page_number 页码
|
||||
:params page_size 每页显示的数量
|
||||
:return self
|
||||
"""
|
||||
self.pages(page_size=page_size)
|
||||
self.offset(self.number(page_number))
|
||||
return self
|
||||
@@ -0,0 +1,63 @@
|
||||
import mimetypes
|
||||
import os
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
|
||||
import weasyprint as wp
|
||||
|
||||
from paste.core.logging import echo_log
|
||||
|
||||
|
||||
class Html2Pdf:
|
||||
"""
|
||||
将 HTML 内容转换为 PDF 文件。
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def custom_url_fetcher(cls, url, timeout=30, **kwargs):
|
||||
"""
|
||||
自定义 URL 加载器,增加超时时间
|
||||
"""
|
||||
# 处理 file:// URLs
|
||||
if url.startswith('file://'):
|
||||
parsed = urllib.parse.urlparse(url)
|
||||
path = urllib.request.url2pathname(parsed.path)
|
||||
|
||||
if not os.path.exists(path):
|
||||
raise ValueError(f"File not found: {path}")
|
||||
|
||||
_mime_type, _ = mimetypes.guess_type(path)
|
||||
if not _mime_type:
|
||||
_mime_type = 'application/octet-stream'
|
||||
|
||||
return {
|
||||
'mime_type': _mime_type,
|
||||
'encoding': 'binary',
|
||||
'filename': os.path.basename(path),
|
||||
'file_obj': open(path, 'rb'),
|
||||
}
|
||||
|
||||
# 增加超时时间(默认是 30 秒)
|
||||
return wp.default_url_fetcher(url, timeout=timeout, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def write_pdf(cls, content, output_pdf=None, base_url=""):
|
||||
"""
|
||||
将 HTML 转换为 PDF。
|
||||
|
||||
:param content: HTML 字符串
|
||||
:param output_pdf: 输出的 PDF 文件路径,默认为空
|
||||
:param base_url: 跨域默认地址
|
||||
"""
|
||||
try:
|
||||
# HTML 转换为 PDF
|
||||
_html = wp.HTML(string=content, url_fetcher=cls.custom_url_fetcher, base_url=base_url)
|
||||
_bytes = _html.write_pdf(output_pdf)
|
||||
if output_pdf:
|
||||
echo_log(f"PDF 已成功生成在: {output_pdf}.")
|
||||
else:
|
||||
echo_log(f"PDF 已成功生成.")
|
||||
return _bytes
|
||||
except Exception as e:
|
||||
echo_log(f"转换失败: {e}")
|
||||
raise e
|
||||
@@ -0,0 +1,126 @@
|
||||
"""
|
||||
雪花 ID 生成程序。
|
||||
"""
|
||||
|
||||
import time
|
||||
import logging
|
||||
|
||||
|
||||
# 64位ID的划分
|
||||
WORKER_ID_BITS = 5
|
||||
DATACENTER_ID_BITS = 5
|
||||
SEQUENCE_BITS = 12
|
||||
|
||||
# 最大取值计算
|
||||
MAX_WORKER_ID = -1 ^ (-1 << WORKER_ID_BITS) # 2**5-1 0b11111
|
||||
MAX_DATACENTER_ID = -1 ^ (-1 << DATACENTER_ID_BITS)
|
||||
|
||||
# 移位偏移计算
|
||||
WORKER_ID_SHIFT = SEQUENCE_BITS
|
||||
DATACENTER_ID_SHIFT = SEQUENCE_BITS + WORKER_ID_BITS
|
||||
TIMESTAMP_LEFT_SHIFT = SEQUENCE_BITS + WORKER_ID_BITS + DATACENTER_ID_BITS
|
||||
|
||||
# 序号循环掩码
|
||||
SEQUENCE_MASK = -1 ^ (-1 << SEQUENCE_BITS)
|
||||
|
||||
# Twitter元年时间戳
|
||||
TW_EPOCH = 1288834974657
|
||||
|
||||
ID_WORKER = None
|
||||
|
||||
|
||||
class InvalidSystemClock(Exception):
|
||||
"""
|
||||
时钟回拨异常
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class IdWorker(object):
|
||||
"""
|
||||
用于生成 Snow ID。
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def get_id_worker(cls, datacenter_id=1, worker_id=1, sequence=0):
|
||||
"""
|
||||
创建 Snow ID 对象。
|
||||
|
||||
:param datacenter_id: 数据中心(机器区域)ID
|
||||
:param worker_id: 机器ID
|
||||
:param sequence: 起始序号
|
||||
"""
|
||||
global ID_WORKER
|
||||
if ID_WORKER is None:
|
||||
ID_WORKER = IdWorker(datacenter_id, worker_id, sequence)
|
||||
return ID_WORKER
|
||||
|
||||
def __init__(self, datacenter_id, worker_id, sequence=0):
|
||||
"""
|
||||
初始化。
|
||||
|
||||
:param datacenter_id: 数据中心(机器区域)ID
|
||||
:param worker_id: 机器ID
|
||||
:param sequence: 起始序号
|
||||
"""
|
||||
# sanity check
|
||||
if worker_id > MAX_WORKER_ID or worker_id < 0:
|
||||
raise ValueError('worker_id值越界')
|
||||
|
||||
if datacenter_id > MAX_DATACENTER_ID or datacenter_id < 0:
|
||||
raise ValueError('datacenter_id值越界')
|
||||
|
||||
self.worker_id = worker_id
|
||||
self.datacenter_id = datacenter_id
|
||||
self.sequence = sequence
|
||||
|
||||
self.last_timestamp = -1 # 上次计算的时间戳
|
||||
|
||||
@staticmethod
|
||||
def _gen_timestamp():
|
||||
"""
|
||||
生成整数时间戳。
|
||||
|
||||
:return:int timestamp
|
||||
"""
|
||||
return int(time.time() * 1000)
|
||||
|
||||
def get_id(self):
|
||||
"""
|
||||
获取新ID。
|
||||
|
||||
:return: 新的 Snow ID
|
||||
"""
|
||||
timestamp = self._gen_timestamp()
|
||||
|
||||
# 时钟回拨
|
||||
if timestamp < self.last_timestamp:
|
||||
logging.error(f"时钟正在向后倒转。拒绝请求直至 {self.last_timestamp}.")
|
||||
raise InvalidSystemClock
|
||||
|
||||
if timestamp == self.last_timestamp:
|
||||
self.sequence = (self.sequence + 1) & SEQUENCE_MASK
|
||||
if self.sequence == 0:
|
||||
timestamp = self._til_next_millis(self.last_timestamp)
|
||||
else:
|
||||
self.sequence = 0
|
||||
|
||||
self.last_timestamp = timestamp
|
||||
|
||||
new_id = ((timestamp - TW_EPOCH) << TIMESTAMP_LEFT_SHIFT) | (self.datacenter_id << DATACENTER_ID_SHIFT) | \
|
||||
(self.worker_id << WORKER_ID_SHIFT) | self.sequence
|
||||
return new_id
|
||||
|
||||
def _til_next_millis(self, last_timestamp):
|
||||
"""
|
||||
等到下一毫秒。
|
||||
"""
|
||||
timestamp = self._gen_timestamp()
|
||||
while timestamp <= last_timestamp:
|
||||
timestamp = self._gen_timestamp()
|
||||
return timestamp
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
worker = IdWorker(1, 1, 0)
|
||||
print(worker.get_id())
|
||||
@@ -0,0 +1,704 @@
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
import svgwrite
|
||||
from svgwrite.container import Group
|
||||
from svgwrite.path import Path
|
||||
from svgwrite.shapes import Rect
|
||||
from svgwrite.text import Text
|
||||
|
||||
from paste.util import ustr
|
||||
|
||||
|
||||
class TextRect(Group):
|
||||
"""
|
||||
可显示文本的矩形。
|
||||
"""
|
||||
|
||||
def __init__(self, text, insert, text_extra: dict = None, rect_extra: dict = None, **extra):
|
||||
# 父类初始化
|
||||
super().__init__(**extra)
|
||||
|
||||
self.text = text
|
||||
"""
|
||||
要显示的文本内容。
|
||||
"""
|
||||
|
||||
self.extra = extra
|
||||
"""
|
||||
组合扩展信息。
|
||||
"""
|
||||
|
||||
self.rectExtra = rect_extra if rect_extra is not None else {}
|
||||
"""
|
||||
外框扩展信息。
|
||||
"""
|
||||
|
||||
self.textExtra = text_extra if text_extra is not None else {}
|
||||
"""
|
||||
文本扩展信息。
|
||||
"""
|
||||
|
||||
self.rectInsert = insert
|
||||
"""
|
||||
整体位置参数,即外框的位置参数。
|
||||
"""
|
||||
|
||||
# 初始化文本尺寸
|
||||
_fs = self.font_size
|
||||
|
||||
self.textInsert = self.text_pos
|
||||
"""
|
||||
文本位置参数。
|
||||
"""
|
||||
|
||||
# 文本初始化
|
||||
self.textElement = Text(self.text, insert=self.textInsert, **self.textExtra)
|
||||
# 矩形初始化
|
||||
self.rectElement = Rect(insert=self.rectInsert, size=self.rect_size, **self.rectExtra)
|
||||
|
||||
# 加入元素
|
||||
self.add(self.rectElement)
|
||||
self.add(self.textElement)
|
||||
|
||||
@property
|
||||
def font_size(self):
|
||||
"""
|
||||
从样式中识别字体大小,单位用像素,缺省 14px。
|
||||
|
||||
:return: 字体大小
|
||||
"""
|
||||
_font_size = self.textExtra.get('font-size', self.extra.get('font-size', f"{14}px"))
|
||||
self.textExtra['font-size'] = _font_size
|
||||
_size = re.sub(r'\D', '', _font_size.strip())
|
||||
return int(_size)
|
||||
|
||||
@property
|
||||
def text_width(self):
|
||||
"""
|
||||
文本宽度(近似)。
|
||||
"""
|
||||
total = len(self.text)
|
||||
q_count = ustr.str_q_count(self.text)
|
||||
return q_count * self.font_size + (total - q_count) * self.font_size * 0.5
|
||||
|
||||
@property
|
||||
def text_height(self):
|
||||
"""
|
||||
文本高度(近似)。
|
||||
"""
|
||||
return self.font_size * 1.2
|
||||
|
||||
@property
|
||||
def rect_width(self):
|
||||
"""
|
||||
外框宽度。
|
||||
"""
|
||||
return self.text_width + self.font_size * 1.5
|
||||
|
||||
@property
|
||||
def rect_height(self):
|
||||
"""
|
||||
外框高度。
|
||||
"""
|
||||
return self.text_height * 1.9
|
||||
|
||||
@property
|
||||
def rect_size(self):
|
||||
"""
|
||||
外框尺寸。
|
||||
"""
|
||||
return self.rect_width, self.rect_height
|
||||
|
||||
@property
|
||||
def text_pos(self):
|
||||
"""
|
||||
文本位置。
|
||||
"""
|
||||
return \
|
||||
self.rectInsert[0] + (self.rect_width - self.text_width) * 0.5, \
|
||||
self.rectInsert[1] + self.text_height * 1.25
|
||||
|
||||
def reposition(self, position: tuple):
|
||||
"""
|
||||
重新定位。
|
||||
|
||||
:param position: 位置坐标
|
||||
"""
|
||||
self.rectInsert = position
|
||||
self.rectElement.attribs['x'] = self.rectInsert[0]
|
||||
self.rectElement.attribs['y'] = self.rectInsert[1]
|
||||
|
||||
self.textInsert = self.text_pos
|
||||
self.textElement.attribs['x'] = self.textInsert[0]
|
||||
self.textElement.attribs['y'] = self.textInsert[1]
|
||||
|
||||
def point_bottom(self):
|
||||
"""
|
||||
底部点。
|
||||
"""
|
||||
return self.rectInsert[0] + self.rect_width / 2, self.rectInsert[1] + self.rect_size[1]
|
||||
|
||||
def point_top(self):
|
||||
"""
|
||||
顶部点。
|
||||
"""
|
||||
return self.rectInsert[0] + self.rect_width / 2, self.rectInsert[1]
|
||||
|
||||
def point_left(self):
|
||||
"""
|
||||
左侧点。
|
||||
"""
|
||||
return self.rectInsert[0], self.rectInsert[1] + self.rect_height / 2
|
||||
|
||||
def point_right(self):
|
||||
"""
|
||||
右侧点。
|
||||
"""
|
||||
return self.rectInsert[0] + self.rect_width, self.rectInsert[1] + self.rect_height / 2
|
||||
|
||||
@classmethod
|
||||
def horizontal_path(cls, start: tuple, end: tuple, **extra):
|
||||
"""
|
||||
生成水平方向连接线。
|
||||
|
||||
:param start: 起点坐标
|
||||
:param end: 终点坐标
|
||||
:param extra: 扩展参数
|
||||
:return: 路径对象
|
||||
"""
|
||||
_p_control = [
|
||||
(start[0] + end[0]) * 0.5,
|
||||
start[1]
|
||||
]
|
||||
|
||||
_p_center = [
|
||||
(start[0] + end[0]) * 0.5,
|
||||
(start[1] + end[1]) * 0.5
|
||||
]
|
||||
|
||||
_path = Path(**extra)
|
||||
|
||||
_path.push(['M', start])
|
||||
_path.push(['Q', _p_control + _p_center])
|
||||
_path.push(['T', end])
|
||||
|
||||
return _path
|
||||
|
||||
@classmethod
|
||||
def vertical_path(cls, start: tuple, end: tuple, **extra):
|
||||
"""
|
||||
生成垂直方向连接线。
|
||||
|
||||
:param start: 起点坐标
|
||||
:param end: 终点坐标
|
||||
:param extra: 扩展参数
|
||||
:return: 路径对象
|
||||
"""
|
||||
_p_control = [
|
||||
start[0],
|
||||
(start[1] + end[1]) * 0.5
|
||||
]
|
||||
|
||||
_p_center = [
|
||||
(start[0] + end[0]) * 0.5,
|
||||
(start[1] + end[1]) * 0.5
|
||||
]
|
||||
|
||||
_path = Path(**extra)
|
||||
|
||||
_path.push(['M', start])
|
||||
_path.push(['Q', _p_control + _p_center])
|
||||
_path.push(['T', end])
|
||||
|
||||
return _path
|
||||
|
||||
def choose_point(self, sibling: 'TextRect'):
|
||||
"""
|
||||
选择与目标文本框的连线点。
|
||||
|
||||
返回起点(tuple)在自生文本框上,终点(tuple)在目标文本框上。
|
||||
|
||||
:param sibling: 目标文本框
|
||||
:return: 起点、终点、是否水平连线
|
||||
"""
|
||||
_start = self.point_bottom()
|
||||
_end = sibling.point_top()
|
||||
_is_horizontal = True
|
||||
|
||||
if self.point_bottom()[1] > sibling.point_top()[1]:
|
||||
if self.point_right()[0] < sibling.point_left()[0]:
|
||||
_start = self.point_right()
|
||||
_end = sibling.point_left()
|
||||
_is_horizontal = False
|
||||
elif self.point_left()[0] > sibling.point_right()[0]:
|
||||
_start = self.point_left()
|
||||
_end = sibling.point_right()
|
||||
_is_horizontal = False
|
||||
else:
|
||||
_start = self.point_top()
|
||||
_end = sibling.point_bottom()
|
||||
_is_horizontal = True
|
||||
elif self.point_top()[1] < sibling.point_bottom()[1]:
|
||||
if self.point_right()[0] < sibling.point_left()[0]:
|
||||
_start = self.point_right()
|
||||
_end = sibling.point_left()
|
||||
_is_horizontal = False
|
||||
elif self.point_left()[0] > sibling.point_right()[0]:
|
||||
_start = self.point_left()
|
||||
_end = sibling.point_right()
|
||||
_is_horizontal = False
|
||||
else:
|
||||
_start = self.point_bottom()
|
||||
_end = sibling.point_top()
|
||||
_is_horizontal = True
|
||||
else:
|
||||
if self.point_right()[0] < sibling.point_left()[0]:
|
||||
_start = self.point_right()
|
||||
_end = sibling.point_left()
|
||||
_is_horizontal = False
|
||||
elif self.point_left()[0] > sibling.point_right()[0]:
|
||||
_start = self.point_left()
|
||||
_end = sibling.point_right()
|
||||
_is_horizontal = False
|
||||
else:
|
||||
_start = self.point_bottom()
|
||||
_end = sibling.point_top()
|
||||
_is_horizontal = True
|
||||
|
||||
return _start, _end, _is_horizontal
|
||||
|
||||
def connect(self, sibling: 'TextRect', **extra):
|
||||
"""
|
||||
取得连接路径。
|
||||
|
||||
:param sibling: 目标文本框
|
||||
:param extra: 连线扩展参数
|
||||
:return: 连接路径
|
||||
"""
|
||||
_start, _end, _is_horizontal = self.choose_point(sibling)
|
||||
if _is_horizontal:
|
||||
return self.vertical_path(_start, _end, **extra)
|
||||
else:
|
||||
return self.horizontal_path(_start, _end, **extra)
|
||||
|
||||
|
||||
class RelationGraph:
|
||||
"""
|
||||
SVG 关系图。
|
||||
|
||||
根据 title 名称和 row_list 列表数据输出 svg 格式的关系图谱。
|
||||
"""
|
||||
|
||||
def __init__(self, filename: str = 'noname.svg'):
|
||||
self.filename = filename
|
||||
|
||||
self.width = 800
|
||||
"""
|
||||
画布宽度。
|
||||
"""
|
||||
self.height = 600
|
||||
"""
|
||||
画布高度。
|
||||
"""
|
||||
self.vhSpace = 170
|
||||
"""
|
||||
内容垂直浮动空间。
|
||||
"""
|
||||
self.lrSpace = 100
|
||||
"""
|
||||
内容左右留白空间。
|
||||
"""
|
||||
|
||||
self.titleTextExtra = {
|
||||
'font-size': '16px', 'fill': 'rgb(255, 255, 255)'
|
||||
}
|
||||
"""
|
||||
标题文本样式。
|
||||
"""
|
||||
|
||||
self.titleRectExtra = {
|
||||
'rx': 10, 'ry': 10, 'fill': 'rgb(233, 72, 41)', 'fill-opacity': 1, 'stroke': 'rgb(233, 72, 41)'
|
||||
}
|
||||
"""
|
||||
标题外框样式
|
||||
"""
|
||||
|
||||
self.textExtra = {
|
||||
'font-size': '14px', 'fill': 'rgb(255, 255, 255)'
|
||||
}
|
||||
"""
|
||||
普通文本样式。
|
||||
"""
|
||||
|
||||
self.rectExtra = {
|
||||
'rx': 10, 'ry': 10, 'fill': 'rgb(65, 130, 164)', 'fill-opacity': 1, 'stroke': 'rgb(65, 130, 164)'
|
||||
}
|
||||
"""
|
||||
普通文本外框样式。
|
||||
"""
|
||||
|
||||
self.pathExtra = {
|
||||
'fill': 'none', 'stroke': 'rgb(65, 130, 164)'
|
||||
}
|
||||
"""
|
||||
连线样式。
|
||||
"""
|
||||
|
||||
self.drawing = svgwrite.Drawing(filename=self.filename)
|
||||
"""
|
||||
主绘图对象。
|
||||
"""
|
||||
|
||||
self.attribs = self.drawing.attribs
|
||||
"""
|
||||
图像样式。
|
||||
"""
|
||||
|
||||
self.save = self.drawing.save
|
||||
"""
|
||||
保存文件方法。
|
||||
"""
|
||||
|
||||
self.attribs.update({
|
||||
'width': self.width, 'height': self.height
|
||||
})
|
||||
|
||||
self.titleTr: Optional[TextRect] = None
|
||||
"""
|
||||
标题文本框对象。
|
||||
"""
|
||||
|
||||
def draw(self, title: str, row_list: list[dict]):
|
||||
"""
|
||||
绘制图形。
|
||||
|
||||
:param row_list: 数据对象列表,必须包含 unit_name, unit_uscid, enterprise_id 三个字段
|
||||
:param title: 标题文本
|
||||
:return: 自身对象
|
||||
"""
|
||||
# 重定设图像参数
|
||||
self.attribs.update(self.attribs)
|
||||
|
||||
# 创建标题文本框
|
||||
self.titleTr = TextRect(
|
||||
text=title, insert=(0, 0), text_extra=self.titleTextExtra, rect_extra=self.titleRectExtra, **{
|
||||
'debug': False
|
||||
}
|
||||
)
|
||||
self.titleTr.reposition((
|
||||
(self.width - self.titleTr.rect_width) * 0.5, (self.height - self.titleTr.rect_height) * 0.5 - 20
|
||||
))
|
||||
self.drawing.add(self.titleTr)
|
||||
|
||||
_tr_list: list[TextRect] = []
|
||||
for _i, _row in enumerate(row_list):
|
||||
# 遍历数据,初始创建所有的文本框,得到文本框尺寸信息
|
||||
# 同时保留所有需要输出的数据
|
||||
_text = f"{_row['short_name']} ({_row['count']})"
|
||||
_tr = TextRect(
|
||||
text=_text, insert=(0, 0), rect_extra=self.rectExtra, **self.textExtra, **{
|
||||
'debug': False,
|
||||
'data-name': _row['unit_name'],
|
||||
'data-uscid': _row['unit_uscid'],
|
||||
'data-enterprise-id': _row['enterprise_id'],
|
||||
}
|
||||
)
|
||||
_tr_list.append(_tr)
|
||||
|
||||
_harf = int(len(_tr_list) / 2) if int(len(_tr_list) % 2) == 0 else int(len(_tr_list) / 2) + 1
|
||||
_top_list = []
|
||||
_lft_list = _tr_list[:_harf]
|
||||
_rit_list = _tr_list[_harf:]
|
||||
_btm_list = []
|
||||
|
||||
if len(_tr_list) >= 12:
|
||||
_top_list = _lft_list[:2]
|
||||
_lft_list = _lft_list[2:]
|
||||
if len(_tr_list) >= 14:
|
||||
_btm_list = _rit_list[-2:]
|
||||
_rit_list = _rit_list[:-2]
|
||||
|
||||
# 遍历所有顶部文本框,重新定位
|
||||
for _i, _tr in enumerate(_top_list):
|
||||
if _i == 0:
|
||||
_position = (
|
||||
self.titleTr.point_top()[0] - _tr.rect_width - 15,
|
||||
self.titleTr.point_top()[1] - self.vhSpace - _tr.rect_height - 15
|
||||
)
|
||||
else:
|
||||
_position = (
|
||||
self.titleTr.point_top()[0] + 15,
|
||||
self.titleTr.point_top()[1] - self.vhSpace - _tr.rect_height - 15
|
||||
)
|
||||
_tr.reposition(_position)
|
||||
|
||||
# 遍历所有底部文本框,重新定位
|
||||
for _i, _tr in enumerate(_btm_list):
|
||||
if _i == 0:
|
||||
_position = (
|
||||
self.titleTr.point_bottom()[0] - _tr.rect_width - 15,
|
||||
self.titleTr.point_bottom()[1] + self.vhSpace + _tr.rect_height + 15
|
||||
)
|
||||
else:
|
||||
_position = (
|
||||
self.titleTr.point_bottom()[0] + 15,
|
||||
self.titleTr.point_bottom()[1] + self.vhSpace + _tr.rect_height + 15
|
||||
)
|
||||
_tr.reposition(_position)
|
||||
|
||||
_top = self.titleTr.point_top()[1] - self.vhSpace
|
||||
# 遍历所有左则文本框,重新定位
|
||||
for _tr in _lft_list:
|
||||
_w = _tr.rect_width
|
||||
_h = _tr.rect_height
|
||||
_space = self.titleTr.point_bottom()[1] - self.titleTr.point_top()[1] + self.vhSpace * 2 + _h
|
||||
|
||||
_margin = 0
|
||||
if len(_lft_list) > 1:
|
||||
_margin = (_space - len(_lft_list) * _h) / (len(_lft_list) - 1)
|
||||
|
||||
_left = self.titleTr.point_left()[0] - _w - self.lrSpace
|
||||
_position = (_left, _top)
|
||||
_tr.reposition(_position)
|
||||
if _tr.point_left()[0] < 20:
|
||||
_left = 20
|
||||
_position = (_left, _top)
|
||||
_tr.reposition(_position)
|
||||
_top += _h + _margin
|
||||
|
||||
_top = self.titleTr.point_top()[1] - self.vhSpace
|
||||
# 遍历所有右侧文本框,重新定位
|
||||
for _tr in _rit_list:
|
||||
_w = _tr.rect_width
|
||||
_h = _tr.rect_height
|
||||
_space = self.titleTr.point_bottom()[1] - self.titleTr.point_top()[1] + self.vhSpace * 2 + _h
|
||||
|
||||
_margin = 0
|
||||
if len(_rit_list) > 1:
|
||||
_margin = (_space - len(_rit_list) * _h) / (len(_rit_list) - 1)
|
||||
|
||||
_left = self.titleTr.point_right()[0] + self.lrSpace
|
||||
_position = (_left, _top)
|
||||
_tr.reposition(_position)
|
||||
if _tr.point_right()[0] > self.width - 20:
|
||||
_left = self.width - _tr.rect_width - 20
|
||||
_position = (_left, _top)
|
||||
_tr.reposition(_position)
|
||||
|
||||
_top += _h + _margin
|
||||
|
||||
for _tr in _tr_list:
|
||||
self.drawing.add(self.titleTr.connect(_tr, **self.pathExtra))
|
||||
|
||||
for _tr in _tr_list:
|
||||
self.drawing.add(_tr)
|
||||
|
||||
|
||||
class EnterpriseGraph:
|
||||
"""
|
||||
SVG 企业汇总信息图。
|
||||
|
||||
根据 title 名称和 row_list 列表数据输出 svg 格式的关系图谱。
|
||||
"""
|
||||
|
||||
def __init__(self, filename: str = 'noname.svg'):
|
||||
self.filename = filename
|
||||
|
||||
self.width = 800
|
||||
"""
|
||||
画布宽度。
|
||||
"""
|
||||
self.height = 300
|
||||
"""
|
||||
画布高度。
|
||||
"""
|
||||
self.vhSpace = 50
|
||||
"""
|
||||
内容垂直浮动空间。
|
||||
"""
|
||||
self.lrSpace = 100
|
||||
"""
|
||||
内容左右留白空间。
|
||||
"""
|
||||
|
||||
self.titleTextExtra = {
|
||||
'font-size': '16px', 'fill': 'rgb(255, 255, 255)'
|
||||
}
|
||||
"""
|
||||
标题文本样式。
|
||||
"""
|
||||
|
||||
self.titleRectExtra = {
|
||||
'rx': 10, 'ry': 10, 'fill': 'rgb(233, 72, 41)', 'fill-opacity': 1, 'stroke': 'rgb(233, 72, 41)'
|
||||
}
|
||||
"""
|
||||
标题外框样式
|
||||
"""
|
||||
|
||||
self.textExtra = {
|
||||
'font-size': '14px', 'fill': 'rgb(255, 255, 255)'
|
||||
}
|
||||
"""
|
||||
普通文本样式。
|
||||
"""
|
||||
|
||||
self.rectExtra = {
|
||||
'rx': 10, 'ry': 10, 'fill': 'rgb(65, 130, 164)', 'fill-opacity': 1, 'stroke': 'rgb(65, 130, 164)'
|
||||
}
|
||||
"""
|
||||
普通文本外框样式。
|
||||
"""
|
||||
|
||||
self.pathExtra = {
|
||||
'fill': 'none', 'stroke': 'rgb(65, 130, 164)'
|
||||
}
|
||||
"""
|
||||
连线样式。
|
||||
"""
|
||||
|
||||
self.drawing = svgwrite.Drawing(filename=self.filename)
|
||||
"""
|
||||
主绘图对象。
|
||||
"""
|
||||
|
||||
self.attribs = self.drawing.attribs
|
||||
"""
|
||||
图像样式。
|
||||
"""
|
||||
|
||||
self.save = self.drawing.save
|
||||
"""
|
||||
保存文件方法。
|
||||
"""
|
||||
|
||||
self.attribs.update({
|
||||
'width': self.width, 'height': self.height
|
||||
})
|
||||
|
||||
self.titleTr: Optional[TextRect] = None
|
||||
"""
|
||||
标题文本框对象。
|
||||
"""
|
||||
|
||||
def draw(self, title: str, data_item: dict):
|
||||
"""
|
||||
绘制图形。
|
||||
|
||||
:param data_item: 数据项字典,中文名称:数据值
|
||||
:param title: 标题文本
|
||||
:return: 自身对象
|
||||
"""
|
||||
# 重定设图像参数
|
||||
self.attribs.update(self.attribs)
|
||||
|
||||
# 创建标题文本框
|
||||
self.titleTr = TextRect(
|
||||
text=title, insert=(0, 0), text_extra=self.titleTextExtra, rect_extra=self.titleRectExtra, **{
|
||||
'debug': False
|
||||
}
|
||||
)
|
||||
self.titleTr.reposition((
|
||||
(self.width - self.titleTr.rect_width) * 0.5, (self.height - self.titleTr.rect_height) * 0.5 - 20
|
||||
))
|
||||
self.drawing.add(self.titleTr)
|
||||
|
||||
_tr_list: list[TextRect] = []
|
||||
for _key, _val in data_item.items():
|
||||
# 遍历数据,初始创建所有的文本框,得到文本框尺寸信息
|
||||
# 同时保留所有需要输出的数据
|
||||
_text = f"{_key}:{_val}"
|
||||
_tr = TextRect(
|
||||
text=_text, insert=(0, 0), rect_extra=self.rectExtra, **self.textExtra, **{
|
||||
'debug': False,
|
||||
}
|
||||
)
|
||||
_tr_list.append(_tr)
|
||||
|
||||
_harf = int(len(_tr_list) / 2) if int(len(_tr_list) % 2) == 0 else int(len(_tr_list) / 2) + 1
|
||||
_top_list = []
|
||||
_lft_list = _tr_list[:_harf]
|
||||
_rit_list = _tr_list[_harf:]
|
||||
_btm_list = []
|
||||
|
||||
if len(_tr_list) >= 12:
|
||||
_top_list = _lft_list[:2]
|
||||
_lft_list = _lft_list[2:]
|
||||
if len(_tr_list) >= 14:
|
||||
_btm_list = _rit_list[-2:]
|
||||
_rit_list = _rit_list[:-2]
|
||||
|
||||
# 遍历所有顶部文本框,重新定位
|
||||
for _key, _tr in enumerate(_top_list):
|
||||
if _key == 0:
|
||||
_position = (
|
||||
self.titleTr.point_top()[0] - _tr.rect_width - 15,
|
||||
self.titleTr.point_top()[1] - self.vhSpace - _tr.rect_height - 15
|
||||
)
|
||||
else:
|
||||
_position = (
|
||||
self.titleTr.point_top()[0] + 15,
|
||||
self.titleTr.point_top()[1] - self.vhSpace - _tr.rect_height - 15
|
||||
)
|
||||
_tr.reposition(_position)
|
||||
|
||||
# 遍历所有底部文本框,重新定位
|
||||
for _key, _tr in enumerate(_btm_list):
|
||||
if _key == 0:
|
||||
_position = (
|
||||
self.titleTr.point_bottom()[0] - _tr.rect_width - 15,
|
||||
self.titleTr.point_bottom()[1] + self.vhSpace + _tr.rect_height + 15
|
||||
)
|
||||
else:
|
||||
_position = (
|
||||
self.titleTr.point_bottom()[0] + 15,
|
||||
self.titleTr.point_bottom()[1] + self.vhSpace + _tr.rect_height + 15
|
||||
)
|
||||
_tr.reposition(_position)
|
||||
|
||||
_top = self.titleTr.point_top()[1] - self.vhSpace
|
||||
# 遍历所有左则文本框,重新定位
|
||||
for _tr in _lft_list:
|
||||
_w = _tr.rect_width
|
||||
_h = _tr.rect_height
|
||||
_space = self.titleTr.point_bottom()[1] - self.titleTr.point_top()[1] + self.vhSpace * 2 + _h
|
||||
|
||||
_margin = 0
|
||||
if len(_lft_list) > 1:
|
||||
_margin = (_space - len(_lft_list) * _h) / (len(_lft_list) - 1)
|
||||
|
||||
_left = self.titleTr.point_left()[0] - _w - self.lrSpace
|
||||
_position = (_left, _top)
|
||||
_tr.reposition(_position)
|
||||
if _tr.point_left()[0] < 20:
|
||||
_left = 20
|
||||
_position = (_left, _top)
|
||||
_tr.reposition(_position)
|
||||
_top += _h + _margin
|
||||
|
||||
_top = self.titleTr.point_top()[1] - self.vhSpace
|
||||
# 遍历所有右侧文本框,重新定位
|
||||
for _tr in _rit_list:
|
||||
_w = _tr.rect_width
|
||||
_h = _tr.rect_height
|
||||
_space = self.titleTr.point_bottom()[1] - self.titleTr.point_top()[1] + self.vhSpace * 2 + _h
|
||||
|
||||
_margin = 0
|
||||
if len(_rit_list) > 1:
|
||||
_margin = (_space - len(_rit_list) * _h) / (len(_rit_list) - 1)
|
||||
|
||||
_left = self.titleTr.point_right()[0] + self.lrSpace
|
||||
_position = (_left, _top)
|
||||
_tr.reposition(_position)
|
||||
if _tr.point_right()[0] > self.width - 20:
|
||||
_left = self.width - _tr.rect_width - 20
|
||||
_position = (_left, _top)
|
||||
_tr.reposition(_position)
|
||||
|
||||
_top += _h + _margin
|
||||
|
||||
for _tr in _tr_list:
|
||||
self.drawing.add(self.titleTr.connect(_tr, **self.pathExtra))
|
||||
|
||||
for _tr in _tr_list:
|
||||
self.drawing.add(_tr)
|
||||
@@ -0,0 +1,164 @@
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from paste.core import config
|
||||
|
||||
|
||||
class TailRead:
|
||||
"""
|
||||
文件逆向读取器。
|
||||
主要针对读取日志文件。当遇到大日志文件时,需要从后向前读取,这样读取的速度更快。
|
||||
当日志文件动态增加时,再正向读取,此时仅读取差异内容,实现小数据量交互。
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def logReader(cls, log_fn: str = None):
|
||||
"""
|
||||
取得配置文件设置的日志文件读取器。
|
||||
|
||||
:param log_fn: 日志文件名
|
||||
:return: 默认日志文件读取器
|
||||
"""
|
||||
if log_fn is None:
|
||||
log_fn = config.get_config('logger.filename')
|
||||
return TailRead(fn=log_fn)
|
||||
|
||||
def __init__(self, fn: str):
|
||||
self.file_name = fn
|
||||
"""
|
||||
要读取的文件名。
|
||||
"""
|
||||
|
||||
self.file_io = open(self.file_name, 'rb')
|
||||
"""
|
||||
文件 IO 对象。
|
||||
"""
|
||||
|
||||
self.current_position: Optional[int] = None
|
||||
"""
|
||||
当前读取点位置。
|
||||
"""
|
||||
|
||||
_f_size = self.size()
|
||||
if _f_size > 1:
|
||||
# 移动到文件末尾
|
||||
self.file_io.seek(_f_size - 1)
|
||||
else:
|
||||
self.file_io.seek(0)
|
||||
|
||||
def size(self):
|
||||
"""
|
||||
取得文件大小。
|
||||
|
||||
:return: 文件大小
|
||||
"""
|
||||
return os.path.getsize(self.file_name)
|
||||
|
||||
def readTail(self, lines: int = 100):
|
||||
"""
|
||||
从文件中,逆向读取 lines 行。读取结束后,将读取定位移动到所有读取到的字节的最后。
|
||||
|
||||
:param lines: 读取的行数
|
||||
:return: 读取到的数据,读取完成点
|
||||
"""
|
||||
_buffer: bytes = b''
|
||||
_c_pos = self.file_io.tell()
|
||||
|
||||
# 从当前位置读取一位,判断是否是回车
|
||||
# 若是,则增加一行,确保读取足够的行数
|
||||
_byte = self.file_io.read(1)
|
||||
if _byte == b'\n':
|
||||
lines += 1
|
||||
# 重新回到原始位置
|
||||
self.file_io.seek(_c_pos)
|
||||
|
||||
_r_pos = _c_pos
|
||||
while lines > 0:
|
||||
# 读取一个字节
|
||||
_byte = self.file_io.read(1)
|
||||
if _byte == b'':
|
||||
# 无数据,退出
|
||||
break
|
||||
if _byte == b'\n':
|
||||
# 减少行数
|
||||
lines -= 1
|
||||
# 逆向前移
|
||||
_r_pos -= 1
|
||||
if _r_pos <= 0:
|
||||
# 超出第一位时,退出
|
||||
break
|
||||
self.file_io.seek(_r_pos)
|
||||
# 加入缓存
|
||||
_buffer = _byte + _buffer
|
||||
|
||||
# 扣除首字节回车符号
|
||||
if _buffer[0:1] == b'\n':
|
||||
_buffer = _buffer[1:]
|
||||
|
||||
# 第一位已经读取,因此正向移动一位
|
||||
self.current_position = _c_pos + 1
|
||||
self.file_io.seek(self.current_position)
|
||||
return _buffer, self.current_position
|
||||
|
||||
def readLines(self, lines: int = 100, crt_pos: int = None):
|
||||
"""
|
||||
读取文件数据,默认读取 100 行。当不传入 crt_pos 时逆向读取,传入时正向读取。
|
||||
具有动态方向,确保第一次是最大量读取,以后每次都是增量读取,减少传递的数据量。
|
||||
|
||||
:param lines: 要读取的行数
|
||||
:param crt_pos: 当前读取位置
|
||||
:return:
|
||||
"""
|
||||
_buffer: bytes = b''
|
||||
if crt_pos is None:
|
||||
# 参数 cur_pos 为 None 时,逆向读取
|
||||
self.file_io.seek(self.size()-1)
|
||||
_buffer, crt_pos = self.readTail(lines)
|
||||
else:
|
||||
# 参数 cur_pos 有值时,正向读取
|
||||
self.file_io.seek(crt_pos)
|
||||
while lines:
|
||||
_bytes = self.file_io.readline()
|
||||
if _bytes == b'':
|
||||
# 无数据,退出
|
||||
break
|
||||
else:
|
||||
# 减少行数
|
||||
lines -= 1
|
||||
# 加入缓存
|
||||
_buffer += _bytes
|
||||
crt_pos = self.file_io.tell()
|
||||
|
||||
self.current_position = crt_pos
|
||||
return _buffer, self.current_position
|
||||
|
||||
def read(self, lines: int = 200, crt_pos: int = None):
|
||||
"""
|
||||
读取文件数据。注意::
|
||||
|
||||
1、首次读取时 crt_pos 应为 None 此时逆向读取,返回读取到的数据流和读取点位置。
|
||||
2、当有 crt_pos 参数时,先检查文件是否发生了变化,若文件变大,则正向读取增量部分,若文件变小则置空。
|
||||
3、若有 crt_pos 且文件没有发生变化,则返回空字节流,读取位置不变。
|
||||
|
||||
:param lines: 要读取的最大行数,默认 200 行
|
||||
:param crt_pos: 当前读取位置,为 None 时逆向读取,否则正向读取
|
||||
:return: 读取到的字节流
|
||||
"""
|
||||
_buffer = b''
|
||||
if crt_pos is None:
|
||||
# 参数 cur_pos 为 None 时,逆向读取
|
||||
_buffer, crt_pos = self.readLines(lines, crt_pos=crt_pos)
|
||||
else:
|
||||
# 参数 cur_pos 有值时
|
||||
# 检查文件是否发生了变化
|
||||
_log_size = self.size()
|
||||
if _log_size > crt_pos + 1:
|
||||
# 内容增加,继续正向读取
|
||||
_buffer, crt_pos = self.readLines(lines, crt_pos=crt_pos)
|
||||
elif _log_size < crt_pos:
|
||||
# 内容减少,置空读取位置
|
||||
# 置空后,再次调用本函数,执行逆向读取
|
||||
crt_pos = None
|
||||
|
||||
self.current_position = crt_pos
|
||||
return _buffer, self.current_position
|
||||
@@ -0,0 +1,40 @@
|
||||
from typing import Any, Optional, Dict
|
||||
|
||||
|
||||
def get_with_default(dict_obj: dict, key: Any, default: Optional[Any] = None):
|
||||
"""
|
||||
从字典中取得对应的值,若为 None,则返回默认值。
|
||||
注意,字典自带 get 方法是当 key 存在,则返回对应的值,无论是否为 None;
|
||||
而该方法是无论 key 是否存在,只要值为 None 均返回默认值。
|
||||
|
||||
:param dict_obj: 字典对象
|
||||
:param key: 键
|
||||
:param default: 默认值
|
||||
"""
|
||||
_val = dict_obj.get(key, default)
|
||||
if _val is None:
|
||||
_val = default
|
||||
return _val
|
||||
|
||||
|
||||
def get_by_path(dict_obj: Dict[str, Any], path: str, default: Optional[Any] = None):
|
||||
"""
|
||||
按路径取得字典中的数据。要求路径指向的也必须是字典,除最后一项。
|
||||
|
||||
:param dict_obj: 字典对象
|
||||
:param path: 字典中的 key 路径,以"."号分隔
|
||||
:param default: 默认值
|
||||
:return:
|
||||
"""
|
||||
_dict: Optional[Dict[str, Any]] = dict_obj
|
||||
_keys = path.split(".")
|
||||
|
||||
if len(_keys) > 1:
|
||||
# 遍历到倒数第二项
|
||||
for _key in _keys[:-1]:
|
||||
_dict = _dict.get(_key, None)
|
||||
if not isinstance(_dict, dict):
|
||||
return default
|
||||
|
||||
# 返回最后一项内容
|
||||
return _dict.get(_keys[-1], default)
|
||||
@@ -0,0 +1,293 @@
|
||||
import base64
|
||||
import datetime
|
||||
import io
|
||||
import os
|
||||
import re
|
||||
import unicodedata
|
||||
from typing import Optional, IO, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
file_types = {
|
||||
'jpeg': (b'\xFF\xD8\xFF', b'\xff\xd8\xff'),
|
||||
'png': (b'\x89PNG',),
|
||||
'gif': (b'GIF8',),
|
||||
'bmp': (b'BM',),
|
||||
'tiff': (b'II*\x00', b'MM\x00*'),
|
||||
'webp': (b'RIFF\x00\x00\x00\x00WEBP',),
|
||||
'ico': (b'\x00\x00\x01\x00',),
|
||||
'psd': (b'8BPS',),
|
||||
'svg': (b'<svg', b'<?xml'),
|
||||
|
||||
'mp3': (b'\x49\x44\x33', b'\xFF\xFB\x50'),
|
||||
'm4a': (b'\x00\x00\x00\x20\x66\x74\x79\x70\x4D\x34\x41\x20\x00\x00\x00\x00',),
|
||||
'wav': (b'RIFF\x00\x00\x00\x00WAVE',),
|
||||
'flac': (b'fLaC',),
|
||||
'aac': (b'\xFF\xF1', b'\xFF\xF9'),
|
||||
'aiff': (b'FORM\x00\x00\x00\x00AIFF',),
|
||||
'au': (b'.snd',),
|
||||
|
||||
'mov': (b'\x00\x00\x0F', b'\x00\x00\x77', b'\x6D\x6F\x6F\x76', b'\x6d\x64\x61\x74'),
|
||||
'mp4': (b'\x00\x00\x00\x14', b'\x00\x00\x00\x18', b'\x00\x00\x00\x1C', b'\x00\x00\x00\x20'),
|
||||
'mpg': (b'\x00\x00\x01\xB3', b'\x00\x00\x01\xBA'),
|
||||
'avi': (b'RIFF\x00\x00\x00\x00AVI',),
|
||||
'mkv': (b'\x1A\x45\xDF\xA3',),
|
||||
'wmv': (b'\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C',),
|
||||
'flv': (b'FLV\x01',),
|
||||
'3gp': (b'\x00\x00\x00\x18\x66\x74\x79\x70\x33\x67\x70',),
|
||||
|
||||
'pdf': (b'\x25PDF',),
|
||||
('doc', 'ppt', 'xls'): (b'\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1',),
|
||||
('docx', 'pptx', 'xlsx'): (b'PK\x03\x04\n\x00\x00\x00\x00\x00\x87N',),
|
||||
'rtf': (b'{\\rtf',),
|
||||
'txt': (b'\xEF\xBB\xBF', b'\xFE\xFF', b'\xFF\xFE'),
|
||||
|
||||
'zip': (b'PK\x03\x04',),
|
||||
'rar': (b'Rar!\x1A\x07\x00',),
|
||||
'tar': (b'\x75\x73\x74\x61\x72\x00\x00\x00',),
|
||||
'gz': (b'\x1F\x8B',),
|
||||
'7z': (b'7z\xBC\xAF\x27\x1C',),
|
||||
}
|
||||
"""
|
||||
可检测的文件格式配置。
|
||||
"""
|
||||
|
||||
|
||||
def inspect_type(file_data: Optional[bytes] = None, file_io: Optional[IO] = None):
|
||||
"""
|
||||
检测文件类型。
|
||||
:param file_io: 文件输入输出对象
|
||||
:param file_data: 文件数据内容
|
||||
:return: 检测结果
|
||||
"""
|
||||
assert file_data is not None or file_io is not None, '至少传入文件内容或文件输入输出对象之一.'
|
||||
|
||||
if file_data is None and file_io is not None:
|
||||
# 若未传入数据内容,仅传入文件对象,则从文件对象读取前100字节数据
|
||||
file_data = file_io.read(1024*2)
|
||||
|
||||
if len(file_data) > 1024*2:
|
||||
file_data = file_data[:1024*2]
|
||||
|
||||
file_type: Optional[Union[str, tuple[str]]] = ''
|
||||
for _key, _val in file_types.items():
|
||||
for _bs in _val:
|
||||
if file_data.startswith(_bs):
|
||||
file_type = _key
|
||||
break
|
||||
if file_type:
|
||||
break
|
||||
|
||||
if isinstance(file_type, tuple):
|
||||
if file_type[0] == 'doc':
|
||||
# 使用读取到的全部数据(≤4KB)进行启发式判断
|
||||
file_type = _heuristic_office_type(file_data)
|
||||
elif file_type[0] == 'docx':
|
||||
# 使用读取到的全部数据(≤4KB)进行启发式判断
|
||||
file_type = _heuristic_office_x_type(file_data)
|
||||
|
||||
return file_type
|
||||
|
||||
|
||||
def _heuristic_office_type(data: bytes) -> str:
|
||||
"""
|
||||
仅基于前 4KB 数据,启发式判断是 .doc、.xls 还是 .ppt
|
||||
依据:各格式在 OLE 结构中的典型字符串偏移位置
|
||||
"""
|
||||
# 关键词及其对应类型
|
||||
patterns = [
|
||||
(b'W\x00o\x00r\x00d\x00D\x00o\x00c\x00u\x00m\x00e\x00n\x00t', 'doc'),
|
||||
(b'W\x00o\x00r\x00d', 'doc'),
|
||||
(b'WordDocument', 'doc'),
|
||||
(b'Word', 'doc'),
|
||||
(b'W\x00o\x00r\x00k\x00b\x00o\x00o\x00k', 'xls'),
|
||||
(b'B\x00o\x00o\x00k', 'xls'),
|
||||
(b'Workbook', 'xls'),
|
||||
(b'Book', 'xls'),
|
||||
(b'P\x00o\x00w\x00e\x00r\x00P\x00o\x00i\x00n\x00t', 'ppt'),
|
||||
(b'PowerPoint', 'ppt'),
|
||||
]
|
||||
# 一次性遍历:在 data 中查找任一关键词
|
||||
# 由于模式短,且数据小(≤4KB),用简单循环即可
|
||||
for keyword, file_type in patterns:
|
||||
if keyword in data:
|
||||
return file_type
|
||||
# 未匹配时,保守返回 ""
|
||||
return ""
|
||||
|
||||
|
||||
def _heuristic_office_x_type(data: bytes) -> str:
|
||||
"""
|
||||
仅用 `in` 判断 .docx/.xlsx/.pptx,精准匹配 Open XML 标准 MIME 类型
|
||||
不解压、不解析、不猜,就看有没有那三个关键字符串
|
||||
"""
|
||||
# 关键词及其对应类型
|
||||
patterns = [
|
||||
(b'word/PK', 'docx'),
|
||||
(b'xl/PK', 'xlsx'),
|
||||
(b'ppt/PK', 'pptx'),
|
||||
]
|
||||
# 一次性遍历:在 data 中查找任一关键词
|
||||
# 由于模式短,且数据小(≤4KB),用简单循环即可
|
||||
for keyword, file_type in patterns:
|
||||
if keyword in data:
|
||||
return file_type
|
||||
# 未匹配时,保守返回 ""
|
||||
return ""
|
||||
|
||||
|
||||
def get_file_info(file_path):
|
||||
"""
|
||||
取得文件信息,包括:文件大小、创建时间。
|
||||
|
||||
:param file_path: 文件绝对路径
|
||||
:return: 大小,创建时间
|
||||
"""
|
||||
_ctime = datetime.datetime.fromtimestamp(os.path.getctime(file_path))
|
||||
_ctime = _ctime.strftime('%Y-%m-%d %H:%M:%S')
|
||||
|
||||
_f_size = os.path.getsize(file_path)
|
||||
|
||||
# 将字节转换为 KB
|
||||
_size_kb = _f_size / 1024
|
||||
if _size_kb < 1024:
|
||||
return f"{_size_kb:.2f} KB", _ctime
|
||||
|
||||
# 将 KB 转换为 MB
|
||||
_size_mb = _size_kb / 1024
|
||||
if _size_mb < 1024:
|
||||
return f"{_size_mb:.2f} MB", _ctime
|
||||
|
||||
# 将 MB 转换为 GB
|
||||
_size_gb = _size_mb / 1024
|
||||
return f"{_size_gb:.2f} GB", _ctime
|
||||
|
||||
|
||||
def read_to_buffer(file) -> bytes:
|
||||
"""
|
||||
以二进制只读方式从文件载入数据到字节流。
|
||||
"""
|
||||
assert os.path.isfile(file), 'File not found: %s' % file
|
||||
with open(file, 'rb') as f:
|
||||
buf = f.read(os.path.getsize(file))
|
||||
f.close()
|
||||
return buf
|
||||
|
||||
|
||||
def sanitize_filename(filename: str) -> str:
|
||||
"""
|
||||
统一严格过滤文件名中的非法字符(跨 Windows/Linux/macOS 安全)。
|
||||
|
||||
规则:
|
||||
1. 过滤所有系统禁止的字符(包括控制字符 \x00-\x1f)
|
||||
2. 处理 Windows 保留名称(如 CON、NUL 等)
|
||||
3. 替换空格和 # 为下划线
|
||||
4. 禁止以空格或点开头/结尾
|
||||
5. 限制文件名长度(255 字符)
|
||||
|
||||
:param filename: 文件名
|
||||
:return: 替换非法字符为 _ 的安全文件名
|
||||
"""
|
||||
# 1. Unicode 规范化(防止混淆攻击)
|
||||
filename = unicodedata.normalize("NFKC", filename)
|
||||
|
||||
# 2. 替换所有非法字符为下划线(包括空格和 #)
|
||||
# 包括:\ / : * ? " < > | \x00-\x20(控制字符和空格)#
|
||||
safe_name = re.sub(r'[\\/:*?"<>|\x00-\x20#]', '_', filename)
|
||||
|
||||
# 3. 处理 Windows 保留名称(如 CON.txt -> _CON.txt)
|
||||
win_reserved = [
|
||||
"CON", "PRN", "AUX", "NUL",
|
||||
"COM1", "COM2", "COM3", "COM4",
|
||||
"LPT1", "LPT2", "LPT3", "CLOCK$"
|
||||
]
|
||||
if safe_name.upper().split(".")[0] in win_reserved:
|
||||
safe_name = f"_{safe_name}"
|
||||
|
||||
# 4. 移除首尾空格和点(避免隐形问题)
|
||||
safe_name = safe_name.strip(". ")
|
||||
|
||||
# 5. 确保文件名非空(如果输入全是非法字符)
|
||||
if not safe_name:
|
||||
safe_name = "unnamed_file"
|
||||
|
||||
# 6. 限制长度(Windows 最大 255 字符)
|
||||
return safe_name[:255]
|
||||
|
||||
|
||||
def check_and_create_dir(file_path, mode=0o777, exist_ok=False):
|
||||
"""
|
||||
根据传入的文件路径检查目录是否存在,若不存在,则创建。
|
||||
|
||||
:param file_path: 文件路径
|
||||
:param mode: 目录权限,默认最高权限
|
||||
:param exist_ok: 目录存在时,是否抛出异常,默认不抛出
|
||||
"""
|
||||
# 获取文件所在目录
|
||||
_directory = os.path.dirname(file_path)
|
||||
if not os.path.exists(_directory):
|
||||
# 如果目录不存在,则创建目录
|
||||
os.makedirs(_directory, mode, exist_ok)
|
||||
|
||||
|
||||
def load_image_from_base64(base64_str):
|
||||
"""
|
||||
将 Base64 字符串转换为 face_recognition 可用的 numpy 数组。
|
||||
|
||||
:param base64_str: 经过 Base64 编码的图像数据
|
||||
:return: 图像数据(numpy数组)
|
||||
"""
|
||||
try:
|
||||
# 移除 Base64 头部(如果存在)
|
||||
if "," in base64_str:
|
||||
base64_str = base64_str.split(",")[1]
|
||||
|
||||
# 解码为二进制
|
||||
image_data = base64.b64decode(base64_str)
|
||||
|
||||
# 验证图像完整性
|
||||
Image.open(io.BytesIO(image_data)).verify()
|
||||
# 转换为 RGB numpy 数组
|
||||
image = Image.open(io.BytesIO(image_data))
|
||||
if image.mode != "RGB":
|
||||
image = image.convert("RGB")
|
||||
|
||||
return np.array(image)
|
||||
except Exception as e:
|
||||
raise ValueError(f"无效的 Base64 图像数据: {e}")
|
||||
|
||||
|
||||
def load_png_from_base64(base64_str):
|
||||
"""
|
||||
从Base64字符串读取PNG图像并保留Alpha通道。
|
||||
|
||||
:param base64_str: 经过 Base64 编码的 PNG 图像数据
|
||||
:return: 图像数据(numpy数组),包含BGRA四个通道
|
||||
"""
|
||||
try:
|
||||
# 1. 解码Base64字符串
|
||||
img_data = base64.b64decode(base64_str)
|
||||
|
||||
# 2. 将字节数据转换为numpy数组
|
||||
np_array = np.frombuffer(img_data, np.uint8)
|
||||
|
||||
# 3. 使用IMREAD_UNCHANGED标志解码图像以保留Alpha通道
|
||||
img = cv2.imdecode(np_array, cv2.IMREAD_UNCHANGED)
|
||||
|
||||
# 4. 检查是否成功读取
|
||||
if img is None:
|
||||
raise ValueError("无法解码图像数据")
|
||||
|
||||
# 5. 检查是否有Alpha通道
|
||||
if img.shape[2] != 4:
|
||||
print("警告: 图像没有Alpha通道,将添加全不透明Alpha通道")
|
||||
# 将BGR转换为BGRA,添加全不透明Alpha通道
|
||||
img = cv2.cvtColor(img, cv2.COLOR_BGR2BGRA)
|
||||
|
||||
return img
|
||||
|
||||
except Exception as e:
|
||||
print(f"读取Base64图像时出错: {str(e)}")
|
||||
return None
|
||||
@@ -0,0 +1,58 @@
|
||||
from matplotlib import pyplot as plt
|
||||
from matplotlib.font_manager import fontManager, FontProperties
|
||||
|
||||
|
||||
def get_fonts():
|
||||
"""
|
||||
取得系统字体,并与要采用的字体合并后,取得可用字体。
|
||||
"""
|
||||
# 系统所有可用字体
|
||||
os_fonts = {f.name for f in fontManager.ttflist}
|
||||
# 自定义字体,优先级按顺序排列
|
||||
custom_fonts = (
|
||||
'PingFang SC', 'Hiragino Sans GB', 'Heiti SC', 'SimSong', 'SimHei',
|
||||
'WenQuanYi Micro Hei', 'WenQuanYi Zen Hei', 'Source Han Sans SC',
|
||||
'Noto Sans CJK', 'Noto Sans CJK SC', 'DejaVu Sans'
|
||||
)
|
||||
# 可用字体
|
||||
available_font = set(custom_fonts) & os_fonts
|
||||
# 字典排序
|
||||
available_font = sorted(
|
||||
available_font, key=lambda x: custom_fonts.index(x) if x in custom_fonts else len(custom_fonts)
|
||||
)
|
||||
return available_font
|
||||
|
||||
|
||||
def get_font_metrics(font_name='Microsoft YaHei', font_size=11, dpi=72):
|
||||
"""
|
||||
使用 matplotlib 获取字体度量信息。
|
||||
|
||||
:param font_name: 字体名称
|
||||
:param font_size: 字号
|
||||
:param dpi: 显示像素,像素没英寸
|
||||
:return: (英文字符宽度_cm, 中文字符宽度_cm)
|
||||
"""
|
||||
# 创建高分辨率图形
|
||||
fig = plt.figure(figsize=(10, 2), dpi=dpi)
|
||||
ax = fig.add_subplot(111)
|
||||
ax.axis('off')
|
||||
|
||||
# 设置字体
|
||||
font = FontProperties(family=font_name, size=font_size)
|
||||
|
||||
# 测试英文字符
|
||||
text_en = ax.text(0.1, 0.5, 'aaaaa', fontproperties=font)
|
||||
fig.canvas.draw()
|
||||
en_width_px = text_en.get_window_extent().width / 5 # 5个字符的平均宽度
|
||||
|
||||
# 测试中文字符
|
||||
text_cn = ax.text(0.1, 0.5, '中中中中中', fontproperties=font)
|
||||
fig.canvas.draw()
|
||||
cn_width_px = text_cn.get_window_extent().width / 5 # 5个字符的平均宽度
|
||||
|
||||
plt.close(fig)
|
||||
|
||||
# 转换为厘米
|
||||
px_per_cm = dpi / 2.54
|
||||
# 增加100%宽度
|
||||
return en_width_px / px_per_cm * 2, cn_width_px / px_per_cm * 2
|
||||
@@ -0,0 +1,214 @@
|
||||
"""
|
||||
基本公共函数。
|
||||
"""
|
||||
import base64
|
||||
import os
|
||||
import re
|
||||
from typing import Union
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
|
||||
from paste.db import basemodel
|
||||
|
||||
|
||||
def fetch_image(img_url: str) -> tuple[requests.Response, str]:
|
||||
"""
|
||||
获取外部图像。
|
||||
|
||||
:param img_url: 图像 URL
|
||||
:return: (响应对象,内容类型)
|
||||
:raises ValueError: URL 格式无效
|
||||
:raises requests.exceptions.RequestException: 请求失败
|
||||
"""
|
||||
# 验证 URL 格式
|
||||
parsed_url = urlparse(img_url)
|
||||
if not all([parsed_url.scheme, parsed_url.netloc]):
|
||||
raise ValueError("Invalid URL")
|
||||
|
||||
# 设置请求头,模拟浏览器请求
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
|
||||
'AppleWebKit/537.36 (KHTML, like Gecko) '
|
||||
'Chrome/91.0.4472.124 Safari/537.36'
|
||||
}
|
||||
|
||||
# 获取外部图像
|
||||
response = requests.get(img_url, headers=headers, stream=True, timeout=10)
|
||||
response.raise_for_status()
|
||||
|
||||
# 获取内容类型,如果没有则默认为 image/jpeg
|
||||
content_type = response.headers.get('Content-Type', 'image/jpeg')
|
||||
|
||||
return response, content_type
|
||||
|
||||
|
||||
def save_image_to_dir(image_data: bytes, image_type: str, output_dir: str) -> str:
|
||||
"""
|
||||
将图像数据保存到指定目录,返回相对路径。
|
||||
|
||||
:param image_data: 图像二进制数据
|
||||
:param image_type: 图像扩展名(如 'jpg', 'png')
|
||||
:param output_dir: 输出目录(相对于项目根目录,如 'static/upload/article/images')
|
||||
:return: 保存后的相对路径(以 / 开头)
|
||||
"""
|
||||
# 生成唯一文件名
|
||||
filename = f"{basemodel.BaseModel.newId()}.{image_type}"
|
||||
full_path = os.path.abspath(os.path.join(os.curdir, output_dir, filename))
|
||||
|
||||
# 确保目录存在
|
||||
os.makedirs(os.path.dirname(full_path), exist_ok=True)
|
||||
|
||||
# 保存图像
|
||||
with open(full_path, 'wb') as f:
|
||||
f.write(image_data)
|
||||
|
||||
# 返回相对路径(以 / 开头)
|
||||
rel_path = os.path.join(output_dir, filename).replace('\\', '/')
|
||||
if not rel_path.startswith('/'):
|
||||
rel_path = '/' + rel_path
|
||||
return rel_path
|
||||
|
||||
|
||||
def download_and_save_image(url: str, output_dir: str) -> Union[str, None]:
|
||||
"""
|
||||
从外部 URL 下载图像并保存到指定目录。
|
||||
|
||||
:param url: 外部图像的完整 URL
|
||||
:param output_dir: 输出目录
|
||||
:return: 保存成功时返回相对路径,失败时返回 None
|
||||
"""
|
||||
try:
|
||||
res_img, res_content_type = fetch_image(url)
|
||||
|
||||
# 提取扩展名
|
||||
image_type = res_content_type.split('/')[1].split(';')[0].strip() if '/' in res_content_type else 'jpg'
|
||||
|
||||
# 验证扩展名安全性
|
||||
allowed_extensions = {'jpg', 'jpeg', 'png', 'gif', 'webp', 'svg', 'bmp'}
|
||||
if image_type not in allowed_extensions:
|
||||
image_type = 'jpg'
|
||||
|
||||
# 收集图像数据
|
||||
image_data = b''.join(res_img.iter_content(1024))
|
||||
|
||||
# 保存到本地
|
||||
new_src = save_image_to_dir(image_data, image_type, output_dir)
|
||||
return new_src
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def decode_base64_image(header: str, data: str, output_dir: str) -> str:
|
||||
"""
|
||||
解码 base64 格式的图像数据并保存到指定目录。
|
||||
|
||||
:param header: base64 数据头
|
||||
:param data: base64 编码的图像数据
|
||||
:param output_dir: 输出目录
|
||||
:return: 保存后的相对路径
|
||||
"""
|
||||
# 从 header 中获取图像类型
|
||||
image_type = header.split(';')[0].split('/')[1]
|
||||
|
||||
# 验证扩展名安全性
|
||||
allowed_extensions = {'jpg', 'jpeg', 'png', 'gif', 'webp', 'svg', 'bmp'}
|
||||
if image_type not in allowed_extensions:
|
||||
image_type = 'jpg'
|
||||
|
||||
# 解码并保存
|
||||
image_data = base64.b64decode(data)
|
||||
return save_image_to_dir(image_data, image_type, output_dir)
|
||||
|
||||
|
||||
def extract_image_paths(html_content: str) -> list[dict]:
|
||||
"""
|
||||
从 HTML 内容中提取所有图像的 src 信息。
|
||||
|
||||
该方法用于识别文章中引用的所有图像资源,返回详细的图像信息列表。
|
||||
|
||||
:param str html_content: HTML 内容
|
||||
:return: 图像信息列表,每个元素包含 src 值和类型
|
||||
:rtype: list[dict]
|
||||
|
||||
返回结构::
|
||||
|
||||
[
|
||||
{
|
||||
'original': 'https://external.com/img.jpg', # 原始 src 值
|
||||
'src': '/static/upload/article/images/abc.jpg', # 标准化后的本地路径(external/base64 为 None)
|
||||
'type': 'external', # local: 本地路径,domain: 本地域名,external: 外部域名,base64: base64 数据
|
||||
'url': 'https://external.com/img.jpg' # 完整 URL(仅 external 类型有值)
|
||||
}
|
||||
]
|
||||
|
||||
注意::
|
||||
|
||||
- local/domain 类型:src 为标准化本地路径
|
||||
- external 类型:src 为 None,url 为原始外部 URL
|
||||
- base64 类型:src 为 None,url 为 None
|
||||
"""
|
||||
# 允许的本地域名列表
|
||||
allowed_domains = {
|
||||
'haiten.cn', 'www.haiten.cn', 'usasu.cn', 'www.usasu.cn', 'pathx.cn', 'www.pathx.cn',
|
||||
'127.0.0.1', '100.64.0.18', 'localhost'
|
||||
}
|
||||
|
||||
# 改进的正则表达式:
|
||||
# - 允许 src 是第一个属性
|
||||
# - 支持单引号和双引号
|
||||
# - 确保引号成对匹配
|
||||
# - 支持跨行匹配
|
||||
img_pattern = re.compile(
|
||||
r'<img[^>]*?\s+src\s*=\s*(["\'])([^"\']+?)\1[^>]*?>?',
|
||||
re.IGNORECASE | re.DOTALL
|
||||
)
|
||||
|
||||
images = []
|
||||
|
||||
for match in img_pattern.finditer(html_content):
|
||||
original_src = match.group(2) # 捕获组 2 是 src 的值
|
||||
image_info = {
|
||||
'original': original_src,
|
||||
'src': None,
|
||||
'type': None,
|
||||
'url': None
|
||||
}
|
||||
|
||||
# 判断图像类型
|
||||
if original_src.startswith('data:image'):
|
||||
# base64 数据
|
||||
image_info['type'] = 'base64'
|
||||
|
||||
elif original_src.startswith(('http://', 'https://')):
|
||||
parsed_url = urlparse(original_src)
|
||||
domain = parsed_url.netloc.split(':')[0]
|
||||
|
||||
if domain in allowed_domains:
|
||||
# 本地域名 - 转换为相对路径
|
||||
new_src = parsed_url.path
|
||||
if parsed_url.query:
|
||||
new_src += f"?{parsed_url.query}"
|
||||
if parsed_url.fragment:
|
||||
new_src += f"#{parsed_url.fragment}"
|
||||
# 确保路径以 / 开头
|
||||
if not new_src.startswith('/'):
|
||||
new_src = '/' + new_src
|
||||
image_info['src'] = new_src
|
||||
image_info['type'] = 'domain'
|
||||
else:
|
||||
# 外部域名
|
||||
image_info['type'] = 'external'
|
||||
image_info['url'] = original_src
|
||||
|
||||
else:
|
||||
# 本地相对路径
|
||||
# 确保路径以 / 开头
|
||||
if not original_src.startswith('/'):
|
||||
original_src = '/' + original_src
|
||||
image_info['src'] = original_src
|
||||
image_info['type'] = 'local'
|
||||
|
||||
images.append(image_info)
|
||||
|
||||
return images
|
||||
@@ -0,0 +1,218 @@
|
||||
import datetime
|
||||
import gzip
|
||||
import io
|
||||
import re
|
||||
from typing import List
|
||||
from urllib.parse import quote
|
||||
|
||||
|
||||
def str_q_count(ustring):
|
||||
"""
|
||||
汉字加全角字符数量。
|
||||
|
||||
:param ustring: 待扫描文本
|
||||
:return: 全角字符数量
|
||||
"""
|
||||
count = 0
|
||||
for uchar in ustring:
|
||||
inside_code = ord(uchar)
|
||||
if '\u4e00' <= uchar <= '\u9fff' or 65281 <= inside_code <= 65374:
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
def str_q2b(ustring):
|
||||
"""
|
||||
全角转半角。
|
||||
|
||||
:param ustring: 待转换文本
|
||||
:return: 转换后的文本
|
||||
"""
|
||||
r_str = ""
|
||||
for uchar in ustring:
|
||||
inside_code = ord(uchar)
|
||||
if inside_code == 12288:
|
||||
# 全角空格直接转换
|
||||
inside_code = 32
|
||||
elif 65281 <= inside_code <= 65374:
|
||||
# 全角字符(除空格)根据关系转化
|
||||
inside_code -= 65248
|
||||
r_str += chr(inside_code)
|
||||
return r_str
|
||||
|
||||
|
||||
def str_b2q(ustring):
|
||||
"""
|
||||
半角转全角。
|
||||
|
||||
:param ustring: 待转换文本
|
||||
:return: 转换后的文本
|
||||
"""
|
||||
r_str = ""
|
||||
for uchar in ustring:
|
||||
inside_code = ord(uchar)
|
||||
if inside_code == 32:
|
||||
# 半角空格直接转化
|
||||
inside_code = 12288
|
||||
elif 32 <= inside_code <= 126:
|
||||
# 半角字符(除空格)根据关系转化
|
||||
inside_code += 65248
|
||||
r_str += chr(inside_code)
|
||||
return r_str
|
||||
|
||||
|
||||
def str_gzip(data: str):
|
||||
"""
|
||||
创建gzip压缩数据。
|
||||
|
||||
:param data: 待压缩的数据
|
||||
"""
|
||||
buffer = io.BytesIO()
|
||||
with gzip.GzipFile(fileobj=buffer, mode='w') as f:
|
||||
f.write(data.encode('utf-8'))
|
||||
_compressed_data = buffer.getvalue()
|
||||
return _compressed_data
|
||||
|
||||
|
||||
def is_contains_chinese(text, length: int = None):
|
||||
"""
|
||||
检查字符串中是否包含中文字符。
|
||||
|
||||
:param text: 要检查的字符串
|
||||
:param length: 可选参数,要求中文字符的最小数量
|
||||
:return: 如果包含中文字符返回True,否则返回False
|
||||
"""
|
||||
chinese_chars = [char for char in text if '\u4e00' <= char <= '\u9fff']
|
||||
|
||||
if not chinese_chars:
|
||||
# 如果没有中文字符
|
||||
return False
|
||||
|
||||
if length is not None:
|
||||
# 如果指定了length参数
|
||||
return len(chinese_chars) >= length
|
||||
|
||||
return True # 默认情况,只要包含中文就返回True
|
||||
|
||||
|
||||
def is_valid_id_number(id_str):
|
||||
"""
|
||||
检查字符串是否符合中国居民身份证号码格式。
|
||||
|
||||
支持15位和18位身份证号码,包括校验位验证
|
||||
:param id_str: 要检查的字符串
|
||||
:return: 如果符合格式返回True,否则返回False
|
||||
"""
|
||||
# 正则表达式匹配
|
||||
pattern = r'^[1-9]\d{5}(19|20)\d{2}(0[1-9]|1[0-2])(0[1-9]|[12]\d|3[01])\d{3}[\dXx]$'
|
||||
if not re.match(pattern, id_str):
|
||||
return False
|
||||
|
||||
# 如果是15位身份证,直接返回True(15位不包含校验位)
|
||||
if len(id_str) == 15:
|
||||
return True
|
||||
|
||||
# 18位身份证校验位验证
|
||||
# 权重系数
|
||||
weight = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2]
|
||||
# 校验码对应值
|
||||
validate = ['1', '0', 'X', '9', '8', '7', '6', '5', '4', '3', '2']
|
||||
|
||||
# 计算校验位
|
||||
sum_val = 0
|
||||
for i in range(17):
|
||||
sum_val += int(id_str[i]) * weight[i]
|
||||
|
||||
mod_val = sum_val % 11
|
||||
if validate[mod_val].upper() != id_str[17].upper():
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def is_valid_phone_number(phone_str):
|
||||
"""
|
||||
验证是否是中国大陆合法的手机号码。
|
||||
|
||||
:param phone_str: 要检查的字符串
|
||||
:return: 如果是合法手机号返回True,否则返回False
|
||||
"""
|
||||
# 2023年中国大陆手机号正则表达式
|
||||
pattern = r'^1(3[0-9]|4[5-9]|5[0-35-9]|6[2567]|7[0-8]|8[0-9]|9[0-35-9])\d{8}$'
|
||||
|
||||
return bool(re.fullmatch(pattern, phone_str))
|
||||
|
||||
|
||||
def is_valid_postcode(postcode):
|
||||
"""
|
||||
验证中国邮政编码是否合法
|
||||
:param postcode: 要验证的邮编字符串或数字
|
||||
:return: 如果合法返回True,否则返回False
|
||||
"""
|
||||
# 转换为字符串处理
|
||||
postcode_str = str(postcode)
|
||||
|
||||
# 中国邮政编码规则:
|
||||
# 1. 6位数字
|
||||
# 2. 第一位不能是0
|
||||
pattern = r'^[1-9]\d{5}$'
|
||||
|
||||
return bool(re.fullmatch(pattern, postcode_str))
|
||||
|
||||
|
||||
def encode_path_to_url(local_path: str) -> str:
|
||||
"""
|
||||
将本地文件路径转换为URL编码的相对路径
|
||||
|
||||
参数:
|
||||
local_path: 本地路径(如 "C:\\data\\报告.pdf" 或 "/var/www/文件.txt")
|
||||
|
||||
返回:
|
||||
URL编码的相对路径(如 "data/%E6%8A%A5%E5%91%8A.pdf")
|
||||
|
||||
处理逻辑:
|
||||
1. 统一路径分隔符为/
|
||||
2. 移除Windows盘符
|
||||
3. 分段编码每个路径部分
|
||||
4. 保留路径中的/分隔符
|
||||
"""
|
||||
# 统一路径分隔符为POSIX格式
|
||||
normalized_path = local_path.replace('\\', '/')
|
||||
|
||||
# 移除Windows盘符(如 C:/)
|
||||
normalized_path = re.sub(r'^[A-Za-z]:/', '', normalized_path)
|
||||
|
||||
# 移除开头多余的/
|
||||
normalized_path = normalized_path.lstrip('/')
|
||||
|
||||
# 分段处理每个路径部分
|
||||
encoded_parts = []
|
||||
for part in normalized_path.split('/'):
|
||||
if part:
|
||||
# 对每个路径段进行URL编码(保留. _ - 不编码)
|
||||
encoded_part = quote(part, safe='.-_')
|
||||
encoded_parts.append(encoded_part)
|
||||
|
||||
# 拼接编码后的路径
|
||||
return '/'.join(encoded_parts)
|
||||
|
||||
|
||||
def to_datetime(dt_str: str, fmt_list: List[str]):
|
||||
"""
|
||||
字符串转时间日期对象。
|
||||
|
||||
:param dt_str: 需要转日期格式的字符串
|
||||
:param fmt_list: 用于转换的日期格式列表,注意将最有可能的放在前面
|
||||
"""
|
||||
_date = None
|
||||
|
||||
for _fmt in fmt_list:
|
||||
if _date is None:
|
||||
try:
|
||||
_date = datetime.datetime.strptime(dt_str, _fmt)
|
||||
except (ValueError, Exception):
|
||||
pass
|
||||
else:
|
||||
return _date
|
||||
|
||||
return _date
|
||||
@@ -0,0 +1,154 @@
|
||||
from typing import Union, List, Optional, Dict, Any
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from paste.util import ufont
|
||||
|
||||
|
||||
def cm_to_excel_units(cm):
|
||||
"""
|
||||
厘米转Excel列宽单位。
|
||||
|
||||
:param cm: 厘米单位
|
||||
"""
|
||||
return cm / 2.54 * 7 # 1英寸=2.54厘米, 1Excel单位=1/7英寸
|
||||
|
||||
|
||||
def auto_width_cm(series: pd.Series, font_name='Microsoft YaHei', font_size=11, min_cm=1.5, max_cm=20):
|
||||
"""
|
||||
自动列宽计算方法(区分中英文)。
|
||||
|
||||
:param series: pandas Series (数据列)
|
||||
:param font_name: 字体名称
|
||||
:param font_size: 字号
|
||||
:param min_cm: 最小列宽(厘米)
|
||||
:param max_cm: 最大列宽(厘米)
|
||||
:return: 建议的列宽(厘米)
|
||||
"""
|
||||
# 获取精确字体度量
|
||||
en_width_cm, cn_width_cm = ufont.get_font_metrics(font_name, font_size)
|
||||
|
||||
def calculate_text_width(text):
|
||||
"""计算文本总宽度"""
|
||||
cn_count = 0
|
||||
en_count = 0
|
||||
for char in str(text):
|
||||
if '\u4e00' <= char <= '\u9fff':
|
||||
cn_count += 1
|
||||
else:
|
||||
en_count += 1
|
||||
|
||||
_total_width = (cn_count * cn_width_cm) + (en_count * en_width_cm)
|
||||
return _total_width
|
||||
|
||||
# 计算列标题宽度
|
||||
title_width = calculate_text_width(series.name)
|
||||
|
||||
# 计算数据内容最大宽度
|
||||
content_width = series.astype(str).apply(calculate_text_width).max()
|
||||
|
||||
# 取最大值并增加边距,15%额外边距
|
||||
total_width = max(title_width, content_width) * 1.2
|
||||
|
||||
return max(min(total_width, max_cm), min_cm)
|
||||
|
||||
|
||||
def auto_column_width(df, worksheet):
|
||||
"""
|
||||
根据内容自动设置列的宽度。
|
||||
|
||||
:param df: pandas DataFrame
|
||||
:param worksheet: 工作表
|
||||
"""
|
||||
_font_name_set = ufont.get_fonts()
|
||||
for col_num, col_name in enumerate(df.columns):
|
||||
# 计算列宽
|
||||
width_cm = auto_width_cm(df[col_name], font_name=_font_name_set[0], font_size=11)
|
||||
# 设置列宽
|
||||
worksheet.set_column(col_num, col_num, cm_to_excel_units(width_cm))
|
||||
|
||||
|
||||
def apply_header_style(df, worksheet, workbook, **kwargs):
|
||||
"""
|
||||
应用表头样式。
|
||||
|
||||
:param df: 原始 DataFrame
|
||||
:param worksheet: xlsxwriter worksheet对象
|
||||
:param workbook: xlsxwriter workbook对象
|
||||
:param kwargs: 样式参数
|
||||
:return: 无
|
||||
"""
|
||||
_style_sheet = {
|
||||
'font_size': 12,
|
||||
'bg_color': '#F2F2F2',
|
||||
'border': 1,
|
||||
'bold': True,
|
||||
}
|
||||
_header_style = workbook.add_format({**_style_sheet, **kwargs})
|
||||
|
||||
for col_num, value in enumerate(df.columns.values):
|
||||
worksheet.write(0, col_num, value, _header_style)
|
||||
|
||||
|
||||
def apply_data_style(df, worksheet, workbook, **kwargs):
|
||||
"""
|
||||
应用数据单元格样式。
|
||||
|
||||
:param df: 原始 DataFrame
|
||||
:param worksheet: xlsxwriter worksheet对象
|
||||
:param workbook: xlsxwriter workbook对象
|
||||
:param kwargs: 样式参数
|
||||
:return: 无
|
||||
"""
|
||||
_style_sheet = {
|
||||
'font_size': 12,
|
||||
'border': 1,
|
||||
}
|
||||
_cell_style = workbook.add_format({**_style_sheet, **kwargs})
|
||||
|
||||
for row in range(1, len(df) + 1):
|
||||
for col in range(0, len(df.columns)):
|
||||
worksheet.write(row, col, df.iloc[row - 1, col], _cell_style)
|
||||
|
||||
|
||||
def insert_text_to_column(
|
||||
worksheet,
|
||||
workbook,
|
||||
column: int,
|
||||
start_row: int,
|
||||
texts: Union[str, List[str]],
|
||||
text_format: Optional[Dict[str, Any]] = None
|
||||
) -> None:
|
||||
"""
|
||||
向Excel表格的指定列插入文本。
|
||||
|
||||
:param worksheet: xlsxwriter worksheet
|
||||
:param workbook: xlsxwriter workbook
|
||||
:param column: 列号
|
||||
:param start_row: 开始插入的行号(1-based)
|
||||
:param texts: 要插入的文本(字符串或字符串列表)
|
||||
:param text_format: 格式字典,None则使用默认格式
|
||||
:return: None
|
||||
"""
|
||||
# 设置默认格式
|
||||
default_format = {
|
||||
'font_size': 12,
|
||||
}
|
||||
|
||||
# 合并用户自定义格式
|
||||
fmt = workbook.add_format({**default_format, **(text_format or {})})
|
||||
|
||||
# 转换列号为数字索引(1-based)
|
||||
if isinstance(column, str):
|
||||
col_idx = ord(column.upper()) - ord('A') + 1
|
||||
else:
|
||||
col_idx = column
|
||||
|
||||
# 确保texts是列表形式
|
||||
if isinstance(texts, str):
|
||||
texts = [texts]
|
||||
|
||||
# 处理每行数据
|
||||
for i, text in enumerate(texts):
|
||||
row_num = start_row + i
|
||||
worksheet.write(row_num, col_idx, text, fmt)
|
||||
Reference in New Issue
Block a user