初始化项目
This commit is contained in:
@@ -0,0 +1,94 @@
|
||||
"""
|
||||
数据抓取模块。
|
||||
"""
|
||||
import asyncio
|
||||
from typing import Optional, Union
|
||||
|
||||
from sqlalchemy import select, desc
|
||||
|
||||
import dock
|
||||
from dock.govc import govc_scrape_dept_feedback, govc_scrape_return_visit, govc_scrape_finish_info, govc_scrape_order
|
||||
from models.govc_task import GovcTask
|
||||
from paste.core.logging import echo_log
|
||||
from paste.web import requests
|
||||
|
||||
|
||||
async def fetch_govc_task(fetch_size: int = 60, task_id: Optional[Union[str, int]] = None):
|
||||
"""
|
||||
抓取待办数据及其明细数据。
|
||||
|
||||
:param fetch_size: 读取多少任务进行明细抓取
|
||||
:param task_id: 可选的指定的工单id
|
||||
"""
|
||||
echo_log(f"开始抓取待办数据...")
|
||||
task_request = await govc_scrape_order.get_task_request(
|
||||
fetch_size=fetch_size
|
||||
)
|
||||
request_queue = asyncio.Queue()
|
||||
await request_queue.put(task_request)
|
||||
await requests.async_concurrency(
|
||||
request_queue, retry=dock.MAX_RETRY_COUNT,
|
||||
after_request=govc_scrape_order.after_task_request
|
||||
)
|
||||
echo_log(f"待办数据抓取完成...")
|
||||
|
||||
# 读取任务数据,以便能对最新数据抓取详细数据
|
||||
query = select(
|
||||
GovcTask.id, GovcTask.pvi_guid, GovcTask.c_guid
|
||||
).order_by(
|
||||
desc(GovcTask.id)
|
||||
)
|
||||
if task_id:
|
||||
if isinstance(task_id, list):
|
||||
query = query.where(GovcTask.id.in_(task_id))
|
||||
echo_log(f"开始抓取待办列表:{task_id} 的详细数据...")
|
||||
else:
|
||||
query = query.where(GovcTask.id == task_id)
|
||||
echo_log(f"开始抓取待办:{task_id} 的详细数据...")
|
||||
else:
|
||||
echo_log(f"开始抓取前 {fetch_size} 条待办的详细数据...")
|
||||
query = query.limit(fetch_size)
|
||||
task_df = await GovcTask.query_as_df(query)
|
||||
|
||||
# 构建请求队列
|
||||
feedback_queue = asyncio.Queue()
|
||||
result_info_queue = asyncio.Queue()
|
||||
finish_info_queue = asyncio.Queue()
|
||||
# 向队列中填充请求对象
|
||||
echo_log(f"正在准备请求队列...")
|
||||
for _h, _row in task_df.iterrows():
|
||||
_feedback_request = await govc_scrape_dept_feedback.get_feedback_request(_row.get(GovcTask.pvi_guid.key),
|
||||
_row.get(GovcTask.c_guid.key))
|
||||
setattr(_feedback_request, 'task_id', _row.get(GovcTask.id.key))
|
||||
await feedback_queue.put(_feedback_request)
|
||||
_result_info_request = await govc_scrape_return_visit.get_return_visit_request(_row.get(GovcTask.pvi_guid.key),
|
||||
_row.get(GovcTask.c_guid.key))
|
||||
setattr(_result_info_request, 'task_id', _row.get(GovcTask.id.key))
|
||||
await result_info_queue.put(_result_info_request)
|
||||
_finish_info_request = await govc_scrape_finish_info.get_finish_info_request(_row.get(GovcTask.pvi_guid.key),
|
||||
_row.get(GovcTask.c_guid.key))
|
||||
setattr(_finish_info_request, 'task_id', _row.get(GovcTask.id.key))
|
||||
await finish_info_queue.put(_finish_info_request)
|
||||
|
||||
echo_log(f"抓取待办详细数据...")
|
||||
tasks = [
|
||||
requests.async_concurrency(
|
||||
feedback_queue, con_count=dock.CONCURRENCY_COUNT, retry=dock.MAX_RETRY_COUNT,
|
||||
after_request=govc_scrape_dept_feedback.after_feedback_request
|
||||
),
|
||||
requests.async_concurrency(
|
||||
result_info_queue, con_count=dock.CONCURRENCY_COUNT, retry=dock.MAX_RETRY_COUNT,
|
||||
after_request=govc_scrape_result_info.after_result_info_request
|
||||
),
|
||||
requests.async_concurrency(
|
||||
finish_info_queue, con_count=dock.CONCURRENCY_COUNT, retry=dock.MAX_RETRY_COUNT,
|
||||
after_request=govc_scrape_finish_info.after_finish_info_request)
|
||||
]
|
||||
await asyncio.gather(*tasks)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from paste.core import aio_pool
|
||||
|
||||
_runner = aio_pool.get_aio_runner()
|
||||
_runner(fetch_govc_task(10))
|
||||
Reference in New Issue
Block a user