初始化项目
This commit is contained in:
@@ -0,0 +1,106 @@
|
||||
"""
|
||||
数据抓取模块。
|
||||
"""
|
||||
import asyncio
|
||||
from typing import Optional, Union
|
||||
|
||||
from sqlalchemy import select, desc
|
||||
|
||||
import dock
|
||||
from dock.govs import govs_scrape_order_master, govs_scrape_order_detail, govs_scrape_order_process
|
||||
from models.govs_order_master import GovsOrderMaster
|
||||
from paste.core.logging import echo_log
|
||||
from paste.web import requests
|
||||
|
||||
|
||||
async def fetch_govs_task(dept_page_tag: int = 1, num_per_page: int = 60, task_id: Optional[Union[str, int]] = None):
|
||||
"""
|
||||
抓取待办数据及其明细数据。
|
||||
|
||||
:param num_per_page: 读取多少任务进行明细抓取
|
||||
:param dept_page_tag: 0代表全部工单,1代表待签收工单,2代表待交办工单
|
||||
:param task_id: 可选的指定的工单id
|
||||
"""
|
||||
echo_log(f"开始抓取待办数据...")
|
||||
task_request = await govs_scrape_order_master.get_task_request(
|
||||
dept_page_tag=dept_page_tag, num_per_page=num_per_page
|
||||
)
|
||||
request_queue = asyncio.Queue()
|
||||
await request_queue.put(task_request)
|
||||
await requests.async_concurrency(
|
||||
request_queue, retry=dock.MAX_RETRY_COUNT,
|
||||
after_request=govs_scrape_order_master.after_task_request
|
||||
)
|
||||
echo_log(f"待办数据抓取完成...")
|
||||
|
||||
# 读取任务数据,以便能对最新数据抓取详细数据
|
||||
query = select(
|
||||
GovsOrderMaster.id, GovsOrderMaster.order_id, GovsOrderMaster.order_no, GovsOrderMaster.tenant_id,
|
||||
GovsOrderMaster.master_id, GovsOrderMaster.area_code
|
||||
).order_by(
|
||||
desc(GovsOrderMaster.id)
|
||||
)
|
||||
# 如果dept_page_tag=1,只抓取待签收的,如果dept_page_tag不是0或者1,只抓取已签收的,针对性抓取特定状态的工单数据
|
||||
if dept_page_tag == 1:
|
||||
query = query.where(GovsOrderMaster.govs_sign == 0)
|
||||
elif dept_page_tag != 0:
|
||||
query = query.where(GovsOrderMaster.govs_sign == 1)
|
||||
if task_id:
|
||||
if isinstance(task_id, list):
|
||||
query = query.where(GovsOrderMaster.id.in_(task_id))
|
||||
echo_log(f"开始抓取待办列表:{task_id} 的详细数据...")
|
||||
else:
|
||||
query = query.where(GovsOrderMaster.id == task_id)
|
||||
echo_log(f"开始抓取待办:{task_id} 的详细数据...")
|
||||
else:
|
||||
echo_log(f"开始抓取前 {num_per_page} 条待办的详细数据...")
|
||||
query = query.limit(num_per_page)
|
||||
task_df = await GovsOrderMaster.query_as_df(query)
|
||||
|
||||
# 构建请求队列
|
||||
detail_queue = asyncio.Queue()
|
||||
process_queue = asyncio.Queue()
|
||||
# 向队列中填充请求对象
|
||||
echo_log(f"正在准备请求队列...")
|
||||
for _h, _row in task_df.iterrows():
|
||||
order_id = _row.get(GovsOrderMaster.order_id.key)
|
||||
order_no = _row.get(GovsOrderMaster.order_no.key)
|
||||
tenant_id = int(_row.get(GovsOrderMaster.tenant_id.key))
|
||||
master_id = int(_row.get(GovsOrderMaster.master_id.key))
|
||||
area_code = _row.get(GovsOrderMaster.area_code.key)
|
||||
|
||||
_detail_request = await govs_scrape_order_detail.get_task_request(order_id, master_id, tenant_id)
|
||||
setattr(_detail_request, 'order_id', order_id)
|
||||
setattr(_detail_request, 'order_no', order_no)
|
||||
setattr(_detail_request, 'master_id', master_id)
|
||||
setattr(_detail_request, 'tenant_id', tenant_id)
|
||||
await detail_queue.put(_detail_request)
|
||||
|
||||
_process_request = await govs_scrape_order_process.get_task_request(
|
||||
order_id, order_no, master_id, tenant_id, '1700467981117980074', area_code
|
||||
)
|
||||
setattr(_process_request, 'order_id', order_id)
|
||||
setattr(_process_request, 'order_no', order_no)
|
||||
setattr(_process_request, 'master_id', master_id)
|
||||
setattr(_process_request, 'tenant_id', tenant_id)
|
||||
await process_queue.put(_process_request)
|
||||
|
||||
echo_log(f"抓取待办详细数据...")
|
||||
tasks = [
|
||||
requests.async_concurrency(
|
||||
detail_queue, con_count=dock.CONCURRENCY_COUNT, retry=dock.MAX_RETRY_COUNT,
|
||||
after_request=govs_scrape_order_detail.after_task_request
|
||||
),
|
||||
requests.async_concurrency(
|
||||
process_queue, con_count=dock.CONCURRENCY_COUNT, retry=dock.MAX_RETRY_COUNT,
|
||||
after_request=govs_scrape_order_process.after_task_request
|
||||
)
|
||||
]
|
||||
await asyncio.gather(*tasks)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from paste.core import aio_pool
|
||||
|
||||
_runner = aio_pool.get_aio_runner()
|
||||
_runner(fetch_govs_task(dept_page_tag=1, num_per_page=50))
|
||||
Reference in New Issue
Block a user