d3i-szct/paste/util/ustr.py

import datetime
import gzip
import io
import re
from typing import List
from urllib.parse import quote


def str_q_count(ustring):
    """
    汉字加全角字符数量。

    :param ustring: 待扫描文本
    :return: 全角字符数量
    """
    count = 0
    for uchar in ustring:
        inside_code = ord(uchar)
        if '\u4e00' <= uchar <= '\u9fff' or 65281 <= inside_code <= 65374:
            count += 1
    return count


def str_q2b(ustring):
    """
    全角转半角。

    :param ustring: 待转换文本
    :return: 转换后的文本
    """
    r_str = ""
    for uchar in ustring:
        inside_code = ord(uchar)
        if inside_code == 12288:
            # 全角空格直接转换
            inside_code = 32
        elif 65281 <= inside_code <= 65374:
            # 全角字符（除空格）根据关系转化
            inside_code -= 65248
        r_str += chr(inside_code)
    return r_str


def str_b2q(ustring):
    """
    半角转全角。

    :param ustring: 待转换文本
    :return: 转换后的文本
    """
    r_str = ""
    for uchar in ustring:
        inside_code = ord(uchar)
        if inside_code == 32:
            # 半角空格直接转化
            inside_code = 12288
        elif 32 <= inside_code <= 126:
            # 半角字符（除空格）根据关系转化
            inside_code += 65248
        r_str += chr(inside_code)
    return r_str


def str_gzip(data: str):
    """
    创建gzip压缩数据。

    :param data: 待压缩的数据
    """
    buffer = io.BytesIO()
    with gzip.GzipFile(fileobj=buffer, mode='w') as f:
        f.write(data.encode('utf-8'))
    _compressed_data = buffer.getvalue()
    return _compressed_data


def is_contains_chinese(text, length: int = None):
    """
    检查字符串中是否包含中文字符。

    :param text: 要检查的字符串
    :param length: 可选参数，要求中文字符的最小数量
    :return: 如果包含中文字符返回True，否则返回False
    """
    chinese_chars = [char for char in text if '\u4e00' <= char <= '\u9fff']

    if not chinese_chars:
        # 如果没有中文字符
        return False

    if length is not None:
        # 如果指定了length参数
        return len(chinese_chars) >= length

    return True  # 默认情况，只要包含中文就返回True


def is_valid_id_number(id_str):
    """
    检查字符串是否符合中国居民身份证号码格式。

    支持15位和18位身份证号码，包括校验位验证
    :param id_str: 要检查的字符串
    :return: 如果符合格式返回True，否则返回False
    """
    # 正则表达式匹配
    pattern = r'^[1-9]\d{5}(19|20)\d{2}(0[1-9]|1[0-2])(0[1-9]|[12]\d|3[01])\d{3}[\dXx]$'
    if not re.match(pattern, id_str):
        return False

    # 如果是15位身份证，直接返回True（15位不包含校验位）
    if len(id_str) == 15:
        return True

    # 18位身份证校验位验证
    # 权重系数
    weight = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2]
    # 校验码对应值
    validate = ['1', '0', 'X', '9', '8', '7', '6', '5', '4', '3', '2']

    # 计算校验位
    sum_val = 0
    for i in range(17):
        sum_val += int(id_str[i]) * weight[i]

    mod_val = sum_val % 11
    if validate[mod_val].upper() != id_str[17].upper():
        return False

    return True


def is_valid_phone_number(phone_str):
    """
    验证是否是中国大陆合法的手机号码。

    :param phone_str: 要检查的字符串
    :return: 如果是合法手机号返回True，否则返回False
    """
    # 2023年中国大陆手机号正则表达式
    pattern = r'^1(3[0-9]|4[5-9]|5[0-35-9]|6[2567]|7[0-8]|8[0-9]|9[0-35-9])\d{8}$'

    return bool(re.fullmatch(pattern, phone_str))


def is_valid_postcode(postcode):
    """
    验证中国邮政编码是否合法
    :param postcode: 要验证的邮编字符串或数字
    :return: 如果合法返回True，否则返回False
    """
    # 转换为字符串处理
    postcode_str = str(postcode)

    # 中国邮政编码规则：
    # 1. 6位数字
    # 2. 第一位不能是0
    pattern = r'^[1-9]\d{5}$'

    return bool(re.fullmatch(pattern, postcode_str))


def encode_path_to_url(local_path: str) -> str:
    """
    将本地文件路径转换为URL编码的相对路径

    参数:
        local_path: 本地路径(如 "C:\\data\\报告.pdf" 或 "/var/www/文件.txt")

    返回:
        URL编码的相对路径(如 "data/%E6%8A%A5%E5%91%8A.pdf")

    处理逻辑:
        1. 统一路径分隔符为/
        2. 移除Windows盘符
        3. 分段编码每个路径部分
        4. 保留路径中的/分隔符
    """
    # 统一路径分隔符为POSIX格式
    normalized_path = local_path.replace('\\', '/')

    # 移除Windows盘符(如 C:/)
    normalized_path = re.sub(r'^[A-Za-z]:/', '', normalized_path)

    # 移除开头多余的/
    normalized_path = normalized_path.lstrip('/')

    # 分段处理每个路径部分
    encoded_parts = []
    for part in normalized_path.split('/'):
        if part:
            # 对每个路径段进行URL编码(保留. _ - 不编码)
            encoded_part = quote(part, safe='.-_')
            encoded_parts.append(encoded_part)

    # 拼接编码后的路径
    return '/'.join(encoded_parts)


def to_datetime(dt_str: str, fmt_list: List[str]):
    """
    字符串转时间日期对象。

    :param dt_str: 需要转日期格式的字符串
    :param fmt_list: 用于转换的日期格式列表，注意将最有可能的放在前面
    """
    _date = None

    for _fmt in fmt_list:
        if _date is None:
            try:
                _date = datetime.datetime.strptime(dt_str, _fmt)
            except (ValueError, Exception):
                pass
        else:
            return _date

    return _date