初始化项目

This commit is contained in:
zwf
2026-06-02 17:46:38 +08:00
commit 646a4d02c0
240 changed files with 33662 additions and 0 deletions
+3
View File
@@ -0,0 +1,3 @@
"""
市12345对接模块。
"""
Binary file not shown.
Binary file not shown.
+65
View File
@@ -0,0 +1,65 @@
"""
市12345对接 API 基础功能。
"""
from tornado.httpclient import AsyncHTTPClient
import dock
from paste.core import config
ApiUrl = "http://2.46.12.176:8091/sz12345"
"""
对接 API 根目录。
"""
ProxyConfig = config.get_config('dock.govc.proxy')
"""
代理服务器配置。
"""
if ProxyConfig and ProxyConfig.get('proxy_host', None) and ProxyConfig.get('proxy_port', None):
# 切换到底层实现,以便代理服务器生效
AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
async def new_api_request(api_url: str, request_body: dict, method: str = 'POST',
timeout: float = dock.DEFAULT_TIMEOUT, use_form: bool = True, headers: dict = None):
"""
构造一个 API 请求对象
:param api_url: API 地址,以斜杠开头的 URI 地址,非完整 URL
:param request_body: 请求体,即所有请求参数
:param method: 请求提交方式
:param timeout: 超时时长
:param use_form: 是否使用表单(Form)方式提交
:param headers: 头数据,最高优先级
:return: HTTPRequest 对象
"""
# Cookie
from dock.govc import govc_security
_, cookie_header = await govc_security.get_cookies()
# 构建扩展头
user_agent, browser_ver, os_name = dock.get_random_user_agent()
extra_headers = {
'Cookie': cookie_header,
'EPTOKEN': dock.get_cookie_value(cookie_header, 'EPTOKEN'),
'Host': '2.46.12.176:8091',
'Origin': 'http://2.46.12.176:8091',
'Referer': 'http://2.46.12.176:8091/sz12345/bmfw/bmfwlogin/login',
'User-Agent': user_agent,
}
if headers is not None:
extra_headers = {**extra_headers, **headers}
# 构造请求对象
request = dock.new_http_request(
url=f"{ApiUrl}{api_url}",
body=request_body,
method=method,
timeout=timeout,
use_form=use_form,
extra_headers=extra_headers,
** ProxyConfig
)
return request
+94
View File
@@ -0,0 +1,94 @@
"""
数据抓取模块。
"""
import asyncio
from typing import Optional, Union
from sqlalchemy import select, desc
import dock
from dock.govc import govc_scrape_dept_feedback, govc_scrape_return_visit, govc_scrape_finish_info, govc_scrape_order
from models.govc_task import GovcTask
from paste.core.logging import echo_log
from paste.web import requests
async def fetch_govc_task(fetch_size: int = 60, task_id: Optional[Union[str, int]] = None):
"""
抓取待办数据及其明细数据。
:param fetch_size: 读取多少任务进行明细抓取
:param task_id: 可选的指定的工单id
"""
echo_log(f"开始抓取待办数据...")
task_request = await govc_scrape_order.get_task_request(
fetch_size=fetch_size
)
request_queue = asyncio.Queue()
await request_queue.put(task_request)
await requests.async_concurrency(
request_queue, retry=dock.MAX_RETRY_COUNT,
after_request=govc_scrape_order.after_task_request
)
echo_log(f"待办数据抓取完成...")
# 读取任务数据,以便能对最新数据抓取详细数据
query = select(
GovcTask.id, GovcTask.pvi_guid, GovcTask.c_guid
).order_by(
desc(GovcTask.id)
)
if task_id:
if isinstance(task_id, list):
query = query.where(GovcTask.id.in_(task_id))
echo_log(f"开始抓取待办列表:{task_id} 的详细数据...")
else:
query = query.where(GovcTask.id == task_id)
echo_log(f"开始抓取待办:{task_id} 的详细数据...")
else:
echo_log(f"开始抓取前 {fetch_size} 条待办的详细数据...")
query = query.limit(fetch_size)
task_df = await GovcTask.query_as_df(query)
# 构建请求队列
feedback_queue = asyncio.Queue()
result_info_queue = asyncio.Queue()
finish_info_queue = asyncio.Queue()
# 向队列中填充请求对象
echo_log(f"正在准备请求队列...")
for _h, _row in task_df.iterrows():
_feedback_request = await govc_scrape_dept_feedback.get_feedback_request(_row.get(GovcTask.pvi_guid.key),
_row.get(GovcTask.c_guid.key))
setattr(_feedback_request, 'task_id', _row.get(GovcTask.id.key))
await feedback_queue.put(_feedback_request)
_result_info_request = await govc_scrape_return_visit.get_return_visit_request(_row.get(GovcTask.pvi_guid.key),
_row.get(GovcTask.c_guid.key))
setattr(_result_info_request, 'task_id', _row.get(GovcTask.id.key))
await result_info_queue.put(_result_info_request)
_finish_info_request = await govc_scrape_finish_info.get_finish_info_request(_row.get(GovcTask.pvi_guid.key),
_row.get(GovcTask.c_guid.key))
setattr(_finish_info_request, 'task_id', _row.get(GovcTask.id.key))
await finish_info_queue.put(_finish_info_request)
echo_log(f"抓取待办详细数据...")
tasks = [
requests.async_concurrency(
feedback_queue, con_count=dock.CONCURRENCY_COUNT, retry=dock.MAX_RETRY_COUNT,
after_request=govc_scrape_dept_feedback.after_feedback_request
),
requests.async_concurrency(
result_info_queue, con_count=dock.CONCURRENCY_COUNT, retry=dock.MAX_RETRY_COUNT,
after_request=govc_scrape_result_info.after_result_info_request
),
requests.async_concurrency(
finish_info_queue, con_count=dock.CONCURRENCY_COUNT, retry=dock.MAX_RETRY_COUNT,
after_request=govc_scrape_finish_info.after_finish_info_request)
]
await asyncio.gather(*tasks)
if __name__ == "__main__":
from paste.core import aio_pool
_runner = aio_pool.get_aio_runner()
_runner(fetch_govc_task(10))
+59
View File
@@ -0,0 +1,59 @@
import asyncio
import json
import pandas as pd
from tornado.httpclient import HTTPResponse, HTTPRequest
from dock.govc import govc_api
import models
from models.govc_task_contact import GovcTaskContact
from paste.util import udict
from paste.core.logging import echo_log
async def get_contact_request(pviguid: str, cguid: str):
"""
获取市12345的工单的联系信息,请求响应是单条工单的数据
:param pviguid: 工单列表请求返回的pviguid
:param cguid: 工单列表请求返回的cguid
"""
api_url = '/rest/sztaskworkordercommonrest/getContactInformation'
headers = {
'Referer': f'{govc_api.ApiUrl}/rest/sztaskworkordercommonrest/getContactInformation'
}
request_body = {
"ProcessVersionInstanceGuid": pviguid,
'caseguid': cguid,
'yearflag': 'undefined'
}
# 构造 API 请求
return await govc_api.new_api_request(api_url, request_body, headers=headers)
async def after_contact_request(response: HTTPResponse, retry_queue: asyncio.Queue[HTTPRequest]):
"""
任务请求响应后的处理程序。
:param response: 响应对象
:param retry_queue: 重试队列
"""
response_body = response.body.decode()
response_data = json.loads(response_body)
contact_list = udict.get_by_path(response_data, 'params.linklist')
if contact_list:
mapped_df = pd.DataFrame(contact_list)
# 更换映射方向,用于将源数据列名改为与数据库表对应
forward_mapping = {dict_f: table_f for table_f, dict_f in GovcTaskContact.FieldMapping.items()}
mapped_df = mapped_df.rename(columns=forward_mapping)
# 这里把空数据都换成 None,以便存入数据库时是 null
mapped_df.replace(models.EmptyInDF + models.EmptyDatetimeInDF, None, inplace=True)
task_id = getattr(response.request, GovcTaskContact.task_id.key)
mapped_df[GovcTaskContact.task_id.key] = task_id
# 筛选数据状态
_created, _updated = await GovcTaskContact.save_batch(mapped_df)
echo_log(f"成功创建联系信息:{_created}条,更新:{_updated}条.")
else:
echo_log('未获取到联系信息')
if retry_queue:
echo_log(f"联系信息重试队列中有:{retry_queue.qsize()} 个请求在等待.")
+59
View File
@@ -0,0 +1,59 @@
import asyncio
import json
import pandas as pd
from tornado.httpclient import HTTPResponse, HTTPRequest
from dock.govc import govc_api
import models
from models.govc_task_delay import GovcTaskDelay
from paste.util import udict
from paste.core.logging import echo_log
async def get_delay_request(pviguid: str, cguid: str):
"""
获取市12345的工单的延迟信息,请求响应是单条工单的数据
:param pviguid: 工单列表请求返回的pviguid
:param cguid: 工单列表请求返回的cguid
"""
api_url = '/rest/sztaskhandlerest/getDelayInfo'
headers = {
'Referer': f'{govc_api.ApiUrl}/rest/sztaskhandlerest/getDelayInfo'
}
request_body = {
"ProcessVersionInstanceGuid": pviguid,
'caseguid': cguid,
'yearflag': 'undefined'
}
# 构造 API 请求
return await govc_api.new_api_request(api_url, request_body, headers=headers)
async def after_delay_request(response: HTTPResponse, retry_queue: asyncio.Queue[HTTPRequest]):
"""
任务请求响应后的处理程序。
:param response: 响应对象
:param retry_queue: 重试队列
"""
response_body = response.body.decode()
response_data = json.loads(response_body)
delay_list = udict.get_by_path(response_data, 'params.table')
if delay_list:
mapped_df = pd.DataFrame(delay_list)
# 更换映射方向,用于将源数据列名改为与数据库表对应
forward_mapping = {dict_f: table_f for table_f, dict_f in GovcTaskDelay.FieldMapping.items()}
mapped_df = mapped_df.rename(columns=forward_mapping)
# 这里把空数据都换成 None,以便存入数据库时是 null
mapped_df.replace(models.EmptyInDF + models.EmptyDatetimeInDF, None, inplace=True)
task_id = getattr(response.request, GovcTaskDelay.task_id.key)
mapped_df[GovcTaskDelay.task_id.key] = task_id
# 筛选数据状态
_created, _updated = await GovcTaskDelay.save_batch(mapped_df)
echo_log(f"成功创建延迟信息:{_created}条,更新:{_updated}条.")
else:
echo_log('未获取到延迟信息')
if retry_queue:
echo_log(f"延迟信息重试队列中有:{retry_queue.qsize()} 个请求在等待.")
+66
View File
@@ -0,0 +1,66 @@
import asyncio
import json
import pandas as pd
from tornado.httpclient import HTTPResponse, HTTPRequest
from dock.govc import govc_api
import models
from models.govc_task_department_feedback import GovcTaskDeptFeedback
from paste.util import udict
from paste.core.logging import echo_log
async def get_feedback_request(pviguid: str, cguid: str):
"""
获取市12345的工单的部门处置信息,请求响应是单条工单的数据
:param pviguid: 工单列表请求返回的pviguid
:param cguid: 工单列表请求返回的cguid
"""
api_url = '/rest/sztaskworkordercommonrest/getDeptfeedback'
headers = {
'Referer': f'{govc_api.ApiUrl}/rest/sztaskworkordercommonrest/getDeptfeedback'
}
request_body = {
"ProcessVersionInstanceGuid": pviguid,
'caseguid': cguid,
'yearflag': 'undefined'
}
# 构造 API 请求
return await govc_api.new_api_request(api_url, request_body, headers=headers)
async def after_feedback_request(response: HTTPResponse, retry_queue: asyncio.Queue[HTTPRequest]):
"""
任务请求响应后的处理程序。
:param response: 响应对象
:param retry_queue: 重试队列
"""
response_body = response.body.decode()
response_data = json.loads(response_body)
feedback_list = udict.get_by_path(response_data, 'params.feedbackresult')
if feedback_list:
mapped_df = pd.DataFrame(feedback_list)
# 更换映射方向,用于将源数据列名改为与数据库表对应
forward_mapping = {dict_f: table_f for table_f, dict_f in GovcTaskDeptFeedback.FieldMapping.items()}
mapped_df = mapped_df.rename(columns=forward_mapping)
# 把字典、列表改为字符串
mapped_df[GovcTaskDeptFeedback.zxhf_info.key] = mapped_df[GovcTaskDeptFeedback.zxhf_info.key].apply(
lambda x: json.dumps(x, ensure_ascii=False) if isinstance(x, (list, dict)) else x
)
mapped_df[GovcTaskDeptFeedback.back_info.key] = mapped_df[GovcTaskDeptFeedback.back_info.key].apply(
lambda x: json.dumps(x, ensure_ascii=False) if isinstance(x, (list, dict)) else x
)
# 这里把空数据都换成 None,以便存入数据库时是 null
mapped_df.replace(models.EmptyInDF + models.EmptyDatetimeInDF, None, inplace=True)
task_id = getattr(response.request, GovcTaskDeptFeedback.task_id.key)
mapped_df[GovcTaskDeptFeedback.task_id.key] = task_id
# 筛选数据状态
_created, _updated = await GovcTaskDeptFeedback.save_batch(mapped_df)
echo_log(f"成功创建部门处置信息:{_created}条,更新:{_updated}条.")
else:
echo_log('未获取到部门处置信息')
if retry_queue:
echo_log(f"部门处置信息重试队列中有:{retry_queue.qsize()} 个请求在等待.")
+67
View File
@@ -0,0 +1,67 @@
import asyncio
import json
import pandas as pd
from tornado.httpclient import HTTPResponse, HTTPRequest
from sqlalchemy import select
from dock.govc import govc_api
import models
from models.govc_task_detail import GovcTaskDetail
from models.govc_task_attachment import GovcTaskAttachment
from paste.util import udict
from paste.core.logging import echo_log
async def get_detail_request(cguid: str):
"""
获取市12345的工单的详情信息,请求响应是单条工单的数据
:param cguid: 工单列表请求返回的cguid
"""
api_url = '/rest/sztaskworkordercommonrest/getDetail'
headers = {
'Referer': f'{govc_api.ApiUrl}/rest/sztaskworkordercommonrest/getDetail'
}
request_body = {
'caseguid': cguid, 'secret': 1
}
# 构造 API 请求
return await govc_api.new_api_request(api_url, request_body, headers=headers, method='GET')
async def after_detail_request(response: HTTPResponse, retry_queue: asyncio.Queue[HTTPRequest]):
"""
任务请求响应后的处理程序。
:param response: 响应对象
:param retry_queue: 重试队列
"""
response_body = response.body.decode()
response_data = json.loads(response_body)
detail_info = udict.get_by_path(response_data, 'params')
if detail_info:
mapped_df = pd.DataFrame([detail_info])
# 更换映射方向,用于将源数据列名改为与数据库表对应
forward_mapping = {dict_f: table_f for table_f, dict_f in GovcTaskDetail.FieldMapping.items()}
mapped_df = mapped_df.rename(columns=forward_mapping)
# 这里把空数据都换成 None,以便存入数据库时是 null
mapped_df.replace(models.EmptyInDF + models.EmptyDatetimeInDF, None, inplace=True)
task_id = getattr(response.request, GovcTaskDetail.task_id.key)
mapped_df[GovcTaskDetail.task_id.key] = task_id
# 筛选数据状态
_created, _updated = await GovcTaskDetail.save_batch(mapped_df)
echo_log(f"成功创建详情信息:{_created}条,更新:{_updated}条.")
files = udict.get_by_path(detail_info, 'files')
if files:
attachment_df = pd.DataFrame(files)
attachment_df[GovcTaskAttachment.task_id.key] = task_id
detail_query = select(GovcTaskDetail.id).where(GovcTaskDetail.task_id == task_id)
detail_id = await GovcTaskDetail.query_first(detail_query)
attachment_df[GovcTaskAttachment.detail_id.key] = detail_id
_created, _updated = await GovcTaskAttachment.save_batch(attachment_df)
echo_log(f"成功创建附件信息:{_created}条,更新:{_updated}条.")
else:
echo_log('未获取到详情信息')
if retry_queue:
echo_log(f"详情信息重试队列中有:{retry_queue.qsize()} 个请求在等待.")
+59
View File
@@ -0,0 +1,59 @@
import asyncio
import json
import pandas as pd
from tornado.httpclient import HTTPResponse, HTTPRequest
from dock.govc import govc_api
import models
from models.govc_task_finish import GovcTaskFinish
from paste.util import udict
from paste.core.logging import echo_log
async def get_finish_info_request(pviguid: str, cguid: str):
"""
获取市12345的工单的办结信息,请求响应是单条工单的数据
:param pviguid: 工单列表请求返回的pviguid
:param cguid: 工单列表请求返回的cguid
"""
api_url = '/rest/sztaskworkordercommonrest/getFinishInfo'
headers = {
'Referer': f'{govc_api.ApiUrl}/rest/sztaskworkordercommonrest/getFinishInfo'
}
request_body = {
"ProcessVersionInstanceGuid": pviguid,
'caseguid': cguid,
'yearflag': 'undefined'
}
# 构造 API 请求
return await govc_api.new_api_request(api_url, request_body, headers=headers)
async def after_finish_info_request(response: HTTPResponse, retry_queue: asyncio.Queue[HTTPRequest]):
"""
任务请求响应后的处理程序。
:param response: 响应对象
:param retry_queue: 重试队列
"""
response_body = response.body.decode()
response_data = json.loads(response_body)
result_list = udict.get_by_path(response_data, 'params.finishinfo')
if result_list:
mapped_df = pd.DataFrame(result_list)
# 更换映射方向,用于将源数据列名改为与数据库表对应
forward_mapping = {dict_f: table_f for table_f, dict_f in GovcTaskFinish.FieldMapping.items()}
mapped_df = mapped_df.rename(columns=forward_mapping)
# 这里把空数据都换成 None,以便存入数据库时是 null
mapped_df.replace(models.EmptyInDF + models.EmptyDatetimeInDF, None, inplace=True)
task_id = getattr(response.request, GovcTaskFinish.task_id.key)
mapped_df[GovcTaskFinish.task_id.key] = task_id
# 筛选数据状态
_created, _updated = await GovcTaskFinish.save_batch(mapped_df)
echo_log(f"成功创建办结信息:{_created}条,更新:{_updated}条.")
else:
echo_log('未获取到办结信息')
if retry_queue:
echo_log(f"办结信息重试队列中有:{retry_queue.qsize()} 个请求在等待.")
+238
View File
@@ -0,0 +1,238 @@
import asyncio
import json
import pandas as pd
from tornado.httpclient import HTTPResponse, HTTPRequest
import dock
from dock.govc import govc_api, govc_security
from models.govc_task import GovcTask
import models
from paste.core.logging import echo_log
from paste.util import udict
from paste.web import requests
async def get_task_request(fetch_size: int = 100):
"""
获取市12345任务列表数据。
通过 POST 请求向市12345的任务列表接口提交表单数据,获取任务分页数据。
:param fetch_size: 抓取条数
"""
api_url = f"/rest/bmfw/business/taskinfo/query/sztaskquerylistaction/getDataGridData?moduleGuid=94835dc2-a76f-489b-ae76-ea8ed699a113"
headers = {
'Referer': f'{govc_api.ApiUrl}/bmfw/business/taskinfo/query/taskquerylist?moduleGuid=94835dc2-a76f-489b-ae76-ea8ed699a113',
}
# 设计公共请求对象和自定义请求对象的合并,这里主要合并的应该是 cmdParams 字段
epoint_user_loginid, cookie_header = await govc_security.get_cookies()
request_body = {
"commonDto": json.dumps([
{
"id": "cserial",
"bind": "cnsTinfo.serialnum",
"type": "textbox",
"action": "",
"value": "",
"text": ""
},
{
"id": "rqsttitle",
"bind": "cnsTinfo.rqsttitle",
"type": "textbox",
"action": "",
"value": "",
"text": ""
},
{
"id": "mini-12",
"bind": "cnsTinfo.handleouname",
"type": "textbox",
"action": "",
"value": "",
"text": ""
},
{
"id": "rqsttype",
"bind": "cnsTinfo.rqsttype",
"type": "combobox",
"action": "getRqsttypeModel",
"textField": "text",
"valueField": "id",
"pinyinField": "tag",
"columns": [],
"value": "",
"text": ""
},
{
"id": "search_tstatus",
"bind": "cnsTinfo.tstatus",
"type": "combobox",
"action": "getTstatusModel",
"textField": "text",
"valueField": "id",
"pinyinField": "tag",
"columns": [],
"value": "",
"text": ""
},
{
"id": "rqschannel",
"bind": "cnsTinfo.rqschannel",
"type": "combobox",
"action": "getRqschannelModel",
"textField": "text",
"valueField": "id",
"pinyinField": "tag",
"columns": [],
"value": "",
"text": ""
},
{
"id": "rqstcontent",
"bind": "cnsTinfo.rqstcontent",
"type": "textbox",
"action": "",
"value": "",
"text": ""
},
{
"id": "startDate",
"bind": "startDate",
"type": "datepicker",
"action": "",
"format": "yyyy/MM/dd HH:mm:ss",
"value": "",
"text": ""
},
{
"id": "endDate",
"bind": "endDate",
"type": "datepicker",
"action": "",
"format": "yyyy/MM/dd HH:mm:ss",
"value": "",
"text": ""
},
{
"id": "dataexport",
"type": "dataexport",
"action": "getExportBigDataModel",
"mapClass": "com.epoint.basic.faces.export.DataExport",
"exportAction": "zwztexportaction.export"
},
{
"id": "datagrid",
"type": "datagrid",
"action": "getDataGridData",
"idField": "rowguid",
"pageIndex": 0,
"sortField": "",
"sortOrder": "desc",
"columns": [
{
"fieldName": "serialnum"
},
{
"fieldName": "rqsttitle"
},
{
"fieldName": "rqstcontent"
},
{
"fieldName": "createdate",
"format": "yyyy-MM-dd HH:mm:ss"
},
{
"fieldName": "handleouname"
},
{
"fieldName": "finishtime_bf",
"format": "yyyy-MM-dd HH:mm:ss"
},
{
"fieldName": "backtime_bf",
"format": "yyyy-MM-dd HH:mm:ss"
},
{
"fieldName": "tstatus",
"code": "任务单状态"
}
],
"pageSize": fetch_size,
"url": "getDataGridData",
"data": [],
"isSecondRequest": True
},
{
"id": "_common_hidden_viewdata",
"type": "hidden",
"value": json.dumps({'epoint_user_loginid': epoint_user_loginid}, separators=(',', ':'))
}
], separators=(',', ':')),
"cmdParams": json.dumps({
'pageUrl': api_url
}, separators=(',', ':')),
'pageIndex': 0,
'pageSize': fetch_size,
'sortField': '',
'sortOrder': 'desc',
'isSecondRequest': 'true'
}
# 构造 API 请求
return await govc_api.new_api_request(api_url, request_body, headers=headers)
async def after_task_request(response: HTTPResponse, retry_queue: asyncio.Queue[HTTPRequest]):
"""
任务请求响应后的处理程序。
:param response: 响应对象
:param retry_queue: 重试队列
"""
response_body = response.body.decode()
response_data = json.loads(response_body)
list_data: list[dict] = udict.get_by_path(response_data, 'controls.0.data')
if list_data:
mapped_df = pd.DataFrame(list_data)
# 更换映射方向,用于将源数据列名改为与数据库表对应
forward_mapping = {dict_f: table_f for table_f, dict_f in GovcTask.FieldMapping.items()}
mapped_df = mapped_df.rename(columns=forward_mapping)
# 把非数字的时间戳字段改为None
mapped_df[GovcTask.finish_time.key] = mapped_df[GovcTask.finish_time.key].apply(
lambda x: None if not isinstance(x, int) else x
)
mapped_df[GovcTask.sign_time.key] = mapped_df[GovcTask.sign_time.key].apply(
lambda x: None if not isinstance(x, int) else x
)
mapped_df[GovcTask.sign_time_bf.key] = mapped_df[GovcTask.sign_time_bf.key].apply(
lambda x: None if not isinstance(x, int) else x
)
# 这里把空数据都换成 None,以便存入数据库时是 null
mapped_df.replace(models.EmptyInDF + models.EmptyDatetimeInDF, None, inplace=True)
# 筛选数据状态
_created, _updated = await GovcTask.save_batch(mapped_df)
echo_log(f"成功创建企业待办:{_created}条,更新:{_updated}条.")
else:
echo_log('未获取到企业待办数据')
if retry_queue:
echo_log(f"企业待办重试队列中有:{retry_queue.qsize()} 个请求在等待.")
if __name__ == "__main__":
from paste.core import aio_pool
async def scrape():
task_request = await get_task_request()
request_queue = asyncio.Queue()
await request_queue.put(task_request)
await requests.async_concurrency(
request_queue, retry=dock.MAX_RETRY_COUNT,
after_request=after_task_request
)
_runner = aio_pool.get_aio_runner()
_runner(scrape())
+59
View File
@@ -0,0 +1,59 @@
import asyncio
import json
import pandas as pd
from tornado.httpclient import HTTPResponse, HTTPRequest
from dock.govc import govc_api
import models
from models.govc_task_status import GovcTaskStatus
from paste.util import udict
from paste.core.logging import echo_log
async def get_fetch_status_request(pviguid: str, cguid: str):
"""
获取市12345的工单状态信息,请求响应是单条工单的数据
:param pviguid: 工单列表请求返回的pviguid
:param cguid: 工单列表请求返回的cguid
"""
api_url = '/rest/sztaskworkordercommonrest/getCinfoLink'
headers = {
'Referer': f'{govc_api.ApiUrl}/rest/sztaskworkordercommonrest/getCinfoLink'
}
request_body = {
"ProcessVersionInstanceGuid": pviguid,
'caseguid': cguid,
'yearflag': 'undefined'
}
# 构造 API 请求
return await govc_api.new_api_request(api_url, request_body, headers=headers)
async def after_fetch_status_request(response: HTTPResponse, retry_queue: asyncio.Queue[HTTPRequest]):
"""
任务请求响应后的处理程序。
:param response: 响应对象
:param retry_queue: 重试队列
"""
response_body = response.body.decode()
response_data = json.loads(response_body)
status_dict = udict.get_by_path(response_data, 'params.statuslink')
if status_dict:
mapped_df = pd.DataFrame([status_dict])
# 更换映射方向,用于将源数据列名改为与数据库表对应
forward_mapping = {dict_f: table_f for table_f, dict_f in GovcTaskStatus.FieldMapping.items()}
mapped_df = mapped_df.rename(columns=forward_mapping)
# 这里把空数据都换成 None,以便存入数据库时是 null
mapped_df.replace(models.EmptyInDF + models.EmptyDatetimeInDF, None, inplace=True)
task_id = getattr(response.request, GovcTaskStatus.task_id.key)
mapped_df[GovcTaskStatus.task_id.key] = task_id
# 筛选数据状态
_created, _updated = await GovcTaskStatus.save_batch(mapped_df)
echo_log(f"成功创建工单状态信息:{_created}条,更新:{_updated}条.")
else:
echo_log('未获取到工单状态信息')
if retry_queue:
echo_log(f"工单状态信息重试队列中有:{retry_queue.qsize()} 个请求在等待.")
+58
View File
@@ -0,0 +1,58 @@
import asyncio
import json
import pandas as pd
from tornado.httpclient import HTTPResponse, HTTPRequest
from dock.govc import govc_api
import models
from models.govc_task_process import GovcTaskProcess
from paste.util import udict
from paste.core.logging import echo_log
async def get_process_request(pviguid: str, cguid: str):
"""
获取市12345的工单的办理过程信息,请求响应是单条工单的数据
:param pviguid: 工单列表请求返回的pviguid
:param cguid: 工单列表请求返回的cguid
"""
api_url = '/rest/sztaskworkordercommonrest/getTracing'
headers = {
'Referer': f'{govc_api.ApiUrl}/rest/sztaskworkordercommonrest/getTracing'
}
request_body = {
'caseguid': cguid,
'pviguid': pviguid
}
# 构造 API 请求
return await govc_api.new_api_request(api_url, request_body, headers=headers, method='GET')
async def after_process_request(response: HTTPResponse, retry_queue: asyncio.Queue[HTTPRequest]):
"""
任务请求响应后的处理程序。
:param response: 响应对象
:param retry_queue: 重试队列
"""
response_body = response.body.decode()
response_data = json.loads(response_body)
process_list = udict.get_by_path(response_data, 'params.processedList')
if process_list:
mapped_df = pd.DataFrame(process_list)
# 更换映射方向,用于将源数据列名改为与数据库表对应
forward_mapping = {dict_f: table_f for table_f, dict_f in GovcTaskProcess.FieldMapping.items()}
mapped_df = mapped_df.rename(columns=forward_mapping)
# 这里把空数据都换成 None,以便存入数据库时是 null
mapped_df.replace(models.EmptyInDF + models.EmptyDatetimeInDF, None, inplace=True)
task_id = getattr(response.request, GovcTaskProcess.task_id.key)
mapped_df[GovcTaskProcess.task_id.key] = task_id
# 筛选数据状态
_created, _updated = await GovcTaskProcess.save_batch(mapped_df)
echo_log(f"成功创建办理过程信息:{_created}条,更新:{_updated}条.")
else:
echo_log('未获取到办理过程信息')
if retry_queue:
echo_log(f"办理过程信息重试队列中有:{retry_queue.qsize()} 个请求在等待.")
+56
View File
@@ -0,0 +1,56 @@
import asyncio
import json
import pandas as pd
from tornado.httpclient import HTTPResponse, HTTPRequest
from dock.govc import govc_api
import models
from models.govc_task_requester import GovcTaskRequester
from paste.util import udict
from paste.core.logging import echo_log
async def get_requster_request(cguid: str):
"""
获取市12345的工单的诉求人信息,请求响应是单条工单的数据
:param cguid: 工单列表请求返回的cguid
"""
api_url = '/rest/sztaskworkordercommonrest/getInformation'
headers = {
'Referer': f'{govc_api.ApiUrl}/rest/sztaskworkordercommonrest/getInformation'
}
request_body = {
'caseguid': cguid, 'secret': 0
}
# 构造 API 请求
return await govc_api.new_api_request(api_url, request_body, headers=headers, method='GET')
async def after_requester_request(response: HTTPResponse, retry_queue: asyncio.Queue[HTTPRequest]):
"""
任务请求响应后的处理程序。
:param response: 响应对象
:param retry_queue: 重试队列
"""
response_body = response.body.decode()
response_data = json.loads(response_body)
requester_info = udict.get_by_path(response_data, 'params')
if requester_info:
mapped_df = pd.DataFrame([requester_info])
# 更换映射方向,用于将源数据列名改为与数据库表对应
forward_mapping = {dict_f: table_f for table_f, dict_f in GovcTaskRequester.FieldMapping.items()}
mapped_df = mapped_df.rename(columns=forward_mapping)
# 这里把空数据都换成 None,以便存入数据库时是 null
mapped_df.replace(models.EmptyInDF + models.EmptyDatetimeInDF, None, inplace=True)
task_id = getattr(response.request, GovcTaskRequester.task_id.key)
mapped_df[GovcTaskRequester.task_id.key] = task_id
# 筛选数据状态
_created, _updated = await GovcTaskRequester.save_batch(mapped_df)
echo_log(f"成功创建诉求人信息:{_created}条,更新:{_updated}条.")
else:
echo_log('未获取到诉求人信息')
if retry_queue:
echo_log(f"诉求人信息重试队列中有:{retry_queue.qsize()} 个请求在等待.")
+59
View File
@@ -0,0 +1,59 @@
import asyncio
import json
import pandas as pd
from tornado.httpclient import HTTPResponse, HTTPRequest
from dock.govc import govc_api
import models
from models.govc_task_return_visit import GovcTaskReturnVisit
from paste.util import udict
from paste.core.logging import echo_log
async def get_return_visit_request(pviguid: str, cguid: str):
"""
获取市12345的工单的回访结果信息,请求响应是单条工单的数据
:param pviguid: 工单列表请求返回的pviguid
:param cguid: 工单列表请求返回的cguid
"""
api_url = '/rest/sztaskworkordercommonrest/getResultInfo'
headers = {
'Referer': f'{govc_api.ApiUrl}/rest/sztaskworkordercommonrest/getResultInfo'
}
request_body = {
"ProcessVersionInstanceGuid": pviguid,
'caseguid': cguid,
'yearflag': 'undefined'
}
# 构造 API 请求
return await govc_api.new_api_request(api_url, request_body, headers=headers)
async def after_return_visit_request(response: HTTPResponse, retry_queue: asyncio.Queue[HTTPRequest]):
"""
任务请求响应后的处理程序。
:param response: 响应对象
:param retry_queue: 重试队列
"""
response_body = response.body.decode()
response_data = json.loads(response_body)
result_list = udict.get_by_path(response_data, 'params.resultinfo')
if result_list:
mapped_df = pd.DataFrame(result_list)
# 更换映射方向,用于将源数据列名改为与数据库表对应
forward_mapping = {dict_f: table_f for table_f, dict_f in GovcTaskReturnVisit.FieldMapping.items()}
mapped_df = mapped_df.rename(columns=forward_mapping)
# 这里把空数据都换成 None,以便存入数据库时是 null
mapped_df.replace(models.EmptyInDF + models.EmptyDatetimeInDF, None, inplace=True)
task_id = getattr(response.request, GovcTaskReturnVisit.task_id.key)
mapped_df[GovcTaskReturnVisit.task_id.key] = task_id
# 筛选数据状态
_created, _updated = await GovcTaskReturnVisit.save_batch(mapped_df)
echo_log(f"成功创建回访结果信息:{_created}条,更新:{_updated}条.")
else:
echo_log('未获取到回访结果信息')
if retry_queue:
echo_log(f"回访结果信息重试队列中有:{retry_queue.qsize()} 个请求在等待.")
+56
View File
@@ -0,0 +1,56 @@
import asyncio
import json
import pandas as pd
from tornado.httpclient import HTTPResponse, HTTPRequest
from dock.govc import govc_api
import models
from models.govc_task_history import GovcTaskHistory
from paste.util import udict
from paste.core.logging import echo_log
async def get_history_order_request(cguid: str):
"""
获取市12345的工单的历史工单信息,请求响应是单条工单的数据
:param cguid: 工单列表请求返回的cguid
"""
api_url = '/rest/sztaskworkordercommonrest/getHistoryWorkOrder'
headers = {
'Referer': f'{govc_api.ApiUrl}/rest/sztaskworkordercommonrest/getHistoryWorkOrder'
}
request_body = {
'caseguid': cguid
}
# 构造 API 请求
return await govc_api.new_api_request(api_url, request_body, headers=headers, method='GET')
async def after_history_order_request(response: HTTPResponse, retry_queue: asyncio.Queue[HTTPRequest]):
"""
任务请求响应后的处理程序。
:param response: 响应对象
:param retry_queue: 重试队列
"""
response_body = response.body.decode()
response_data = json.loads(response_body)
history_order_list = udict.get_by_path(response_data, 'params.list')
if history_order_list:
mapped_df = pd.DataFrame(history_order_list)
# 更换映射方向,用于将源数据列名改为与数据库表对应
forward_mapping = {dict_f: table_f for table_f, dict_f in GovcTaskHistory.FieldMapping.items()}
mapped_df = mapped_df.rename(columns=forward_mapping)
# 这里把空数据都换成 None,以便存入数据库时是 null
mapped_df.replace(models.EmptyInDF + models.EmptyDatetimeInDF, None, inplace=True)
task_id = getattr(response.request, GovcTaskHistory.task_id.key)
mapped_df[GovcTaskHistory.task_id.key] = task_id
# 筛选数据状态
_created, _updated = await GovcTaskHistory.save_batch(mapped_df)
echo_log(f"成功创建历史工单信息:{_created}条,更新:{_updated}条.")
else:
echo_log('未获取到历史工单信息')
if retry_queue:
echo_log(f"历史工单信息重试队列中有:{retry_queue.qsize()} 个请求在等待.")
+382
View File
@@ -0,0 +1,382 @@
"""
安全模块。
"""
import asyncio
import base64
import io
import json
import re
from typing import Optional
import ddddocr
from PIL import Image, ImageFilter, ImageEnhance
from gmssl import sm2
from tornado.httpclient import HTTPResponse, HTTPRequest
import dock
from dock.govc import govc_api
from models.token import TokenModel
from paste.core import config
from paste.core.logging import echo_log
from paste.util import udict
from paste.web import requests
async def fetch_captcha():
verify_code_img: Optional[str] = None
def after_request(response: HTTPResponse, retry_queue: asyncio.Queue[HTTPRequest]):
nonlocal verify_code_img
response_body = response.body.decode()
response_data = json.loads(response_body)
controls = udict.get_by_path(response_data, 'controls')
verify_code_img = udict.get_by_path(controls[0], 'data.src')
# 请求路径
pub_key_url = '/rest/bmfw/bmfwlogin/loginaction/page_Refresh?isCommondto=true'
# 构建请求参数
common_dto_str = json.dumps([
{
"id": "verifyCode",
"type": "verifycode",
"action": "loginaction.pageLoad",
"mapClass": "com.epoint.basic.faces.verifycode.VerifyCode",
"width": "35%",
"height": "43px",
"charLength": 4,
"ignorecase": True,
"value": "random"
},
{
"id": "_common_hidden_viewdata",
"type": "hidden",
"value": "{\"pageUrl\":\"E86A24CDBC744150F0A28F52940E2E9342802C4870563C30D121F50510AD3184\"}"
}
], separators=(',', ':'))
cmd_params_str = json.dumps({
"pageUrl": "http://2.46.12.176:8091/sz12345/bmfw/bmfwlogin/login"
}, separators=(',', ':'))
body_data = {
"commonDto": common_dto_str,
"cmdParams": cmd_params_str
}
verify_code_request = dock.new_http_request(
f"{govc_api.ApiUrl}{pub_key_url}", body_data, use_form=True, **govc_api.ProxyConfig
)
request_queue = asyncio.Queue()
await request_queue.put(verify_code_request)
await requests.async_concurrency(
request_queue, con_count=1, retry=1,
after_request=after_request
)
return verify_code_img
def enhance_captcha(img_bytes):
"""
利用 PIL 对图像进行:去噪 + 增强对比度处理。
:param img_bytes: 图像字节数据
:return: 增强后的图像数据
"""
img = Image.open(io.BytesIO(img_bytes))
# 去噪
img = img.filter(ImageFilter.MedianFilter())
# 增强对比度
enhancer = ImageEnhance.Contrast(img)
img = enhancer.enhance(1.5) # 1.5倍对比度
# 转回字节流
buf = io.BytesIO()
img.save(buf, format='PNG')
return buf.getvalue()
async def read_verify_code():
"""
读取验证码。
:return: 验证码
"""
verify_code = ""
def validate_captcha(code: str) -> bool:
"""
验证验证码是否符合要求。
"""
return bool(re.fullmatch(r'[A-Za-z0-9]{4}', code))
while not validate_captcha(verify_code):
base64_str = await fetch_captcha()
img_data = base64_str.split(',')[1]
img_data += '=' * (-len(img_data) % 4)
img_bytes = base64.b64decode(img_data)
# 利用 PIL 预处理图像
img_bytes = enhance_captcha(img_bytes)
ocr = ddddocr.DdddOcr(show_ad=False)
verify_code = ocr.classification(img_bytes)
echo_log(verify_code)
return verify_code
async def fetch_public_key():
sm2_pub_key: Optional[str] = None
def after_request(response: HTTPResponse, retry_queue: asyncio.Queue[HTTPRequest]):
nonlocal sm2_pub_key
response_body = response.body.decode()
response_data = json.loads(response_body)
sm2_pub_key = udict.get_by_path(response_data, 'custom.sm2PubKey')
pub_key_url = '/rest/loginaction/autoLoad?isCommondto=true'
body_data = {
"commonDto": json.dumps([]) # 空数组,表示没有额外数据
}
public_key_request = dock.new_http_request(
f"{govc_api.ApiUrl}{pub_key_url}", body_data, **govc_api.ProxyConfig
)
request_queue = asyncio.Queue()
await request_queue.put(public_key_request)
await requests.async_concurrency(
request_queue, con_count=1, retry=dock.MAX_RETRY_COUNT,
after_request=after_request
)
return sm2_pub_key
def js_escape(s: str) -> str:
"""模拟 JavaScript escape"""
result = []
for ch in s:
code = ord(ch)
if (65 <= code <= 90) or (97 <= code <= 122) or (48 <= code <= 57):
result.append(ch)
elif code == 32:
result.append("%20")
elif code in [42, 45, 46, 47, 64, 95]:
result.append(ch)
elif code <= 0xFF:
result.append(f"%{code:02X}")
else:
result.append(f"%u{code:04X}")
return ''.join(result)
def unicode_escape_to_utf8(escaped: str) -> str:
"""%uXXXX 转换成 UTF-8 的 %XX 形式"""
import re
def repl(m):
code = int(m.group(1), 16)
utf8_bytes = code.to_bytes((code.bit_length() + 7) // 8, 'big')
return ''.join(f'%{b:02X}' for b in utf8_bytes)
return re.sub(r'%u([0-9A-Fa-f]{4})', repl, escaped)
def sm2_encrypt(plain_text: str, public_key_hex: str) -> str:
escaped = js_escape(plain_text)
encoded = unicode_escape_to_utf8(escaped)
# SM2 加密(模拟前端 sm2Encrypt 内部逻辑)
# 前端:CryptoJS.enc.Utf8.parse(encoded) → Base64.stringify → CryptoJS.enc.Utf8.parse → SM2 加密
utf8_bytes = encoded.encode('utf-8')
base64_str = base64.b64encode(utf8_bytes).decode('ascii')
# 再次做 UTF-8 编码作为加密输入
final_bytes = base64_str.encode('utf-8')
# C1C3C2 加密
sm2_crypt = sm2.CryptSM2(public_key=public_key_hex, private_key="")
encrypted = sm2_crypt.encrypt(final_bytes)
return '04' + encrypted.hex()
async def build_login_common_dto(
username: str,
password: str
) -> tuple[str, str]:
"""
构造登录请求的 commonDto 参数(符合服务器要求)
Args:
username: 用户名
password: 密码
Returns:
(commonDto, cmdParams) 元组
"""
# 获取公钥
pub_key = await fetch_public_key()
if not pub_key:
raise Exception("获取 SM2 公钥失败")
# 使用 SM2 加密
encrypted_username = sm2_encrypt(username, pub_key)
encrypted_password = sm2_encrypt(password, pub_key)
# # 读取验证码
# verify_code = await read_verify_code()
# 构造 commonDto
common_dto_data = [
{
"id": "_common_hidden_viewdata",
"type": "hidden",
"value": ""
}
]
# 构造 cmdParams
# 格式: [加密用户名, 加密密码, loginType, false, verifyCodeRandom]
cmd_params = [
encrypted_username, # 加密后的用户名
encrypted_password, # 加密后的密码
"0", # 固定值
False, # 固定值
f"#undefined #verifyCode", # 验证码随机串
]
# 转为 JSON 字符串并将双引号替换为单引号
common_dto_str = json.dumps(common_dto_data, separators=(',', ':'))
cmd_params_str = json.dumps(cmd_params, separators=(',', ':'))
return common_dto_str, cmd_params_str
async def login():
"""
登录政务服务 12345 系统并获取认证 Token。
流程:
1. 从市12345平台获取公钥。
2. 模拟前端的编码和加密过程。
3. 提交请求完成登陆。
Args:
无参数。
Returns:
tuple: 包含两个元素的元组:
- dict: DCM 接口返回的完整 JSON 响应数据
Raises:
AssertionError: 登录失败(`resultInfo.success` 为 False
ValueError: 响应体非合法 JSON
HTTPError: 网络请求失败(由 `async_request` 抛出)
"""
login_url = f"{govc_api.ApiUrl}/rest/bmfw/bmfwlogin/loginaction/login?isCommondto=true"
# 构建扩展头
user_agent, browser_ver, os_name = dock.get_random_user_agent()
extra_headers = {
'Host': '2.46.12.176:8091',
'Referer': 'http://2.46.12.176:8091/sz12345/bmfw/bmfwlogin/login',
'User-Agent': user_agent,
'X-Requested-With': 'XMLHttpRequest',
}
# 构造 commonDto
common_dto, cmd_params = await build_login_common_dto(
config.get_config("dock.govc.account.username"),
config.get_config("dock.govc.account.password"),
)
# 构造请求
request_body = {
"commonDto": common_dto,
"cmdParams": cmd_params,
}
# 构造请求对象
request = dock.new_http_request(
url=login_url,
body=request_body,
method='POST',
timeout=dock.DEFAULT_TIMEOUT,
use_form=True,
extra_headers=extra_headers,
**govc_api.ProxyConfig
)
async def after_request(response: HTTPResponse, retry_queue: asyncio.Queue[HTTPRequest]):
cookies_data = dock.get_cookies(response)
cookies = "; ".join([f"{k}={v}" for k, v in cookies_data.items()])
await first_login(cookies)
queue = asyncio.Queue()
await queue.put(request)
await requests.async_concurrency(
queue, con_count=1, retry=dock.MAX_RETRY_COUNT,
after_request=after_request
)
async def first_login(cookies: str):
grace_url = f"{govc_api.ApiUrl}/rest/szbmfw/szdesktop/szdeptindexaction/getIsFirstLogin?isCommondto=true"
# 构建扩展头
user_agent, browser_ver, os_name = dock.get_random_user_agent()
extra_headers = {
'Cookie': cookies,
'Host': '2.46.12.176:8091',
'Referer': 'http://2.46.12.176:8091/sz12345/bmfw/bmfwlogin/login',
'User-Agent': user_agent,
'X-Requested-With': 'XMLHttpRequest',
}
async def after_request(response: HTTPResponse, retry_queue: asyncio.Queue[HTTPRequest]):
# 读取 epoint_user_loginid
response_body = response.body.decode()
response_data = json.loads(response_body)
controls: list[dict] = response_data.get('controls', [])
epoint_user_loginid = json.loads(controls[0].get('value', '')).get('epoint_user_loginid', '')
# 读取并组合 cookies
cookies_data = dock.get_cookies(response)
full_cookies = "; ".join([f"{k}={v}" for k, v in cookies_data.items()])
full_cookies = f"{full_cookies}; {cookies}"
# 组合 token
token = json.dumps(
{
'epoint_user_loginid': epoint_user_loginid,
'cookies': full_cookies,
},
separators=(',', ':')
)
await TokenModel.refresh(platform='GOVC', token=token)
echo_log(f"成功刷新市12345登录令牌.")
grace_request = dock.new_http_request(
grace_url, {}, 'GET',
extra_headers=extra_headers,
**govc_api.ProxyConfig
)
request_queue = asyncio.Queue()
await request_queue.put(grace_request)
await requests.async_concurrency(
request_queue, con_count=1, retry=dock.MAX_RETRY_COUNT,
after_request=after_request
)
async def get_cookies(platform: str = 'GOVC'):
"""
取得可用 Cookies。
:param platform: 要查询的平台,默认是:GOVC,市12345
:return: epoint_user_loginid, cookies
"""
_token_str = await TokenModel.find_by_platform(platform)
_token = json.loads(_token_str.token)
return _token.get('epoint_user_loginid', ''), _token.get('cookies', '')
if __name__ == "__main__":
from paste.core import aio_pool
_runner = aio_pool.get_aio_runner()
_runner(login())