crawler_task_management/main.py

227 lines
9.2 KiB
Python
Raw Normal View History

2025-11-26 17:40:11 +08:00
import os
import yaml
import uuid
import asyncio
import uvicorn
from pathlib import Path
from typing import Dict, Any, Optional, List
2025-11-28 17:38:20 +08:00
from fastapi import FastAPI, HTTPException, Depends, Header
2025-11-28 11:14:38 +08:00
from public_function.auth import verify_tk_token
2025-11-27 16:18:12 +08:00
from public_function.public_func import read_config, create_logger
2025-12-03 14:39:17 +08:00
from model.model import GoodsInfo, DataReceive, AccountStatus, AccountUpdate, TokenItem, CrawlerItem, ResetTask, AccountObtain
2025-11-26 17:40:11 +08:00
app = FastAPI()
2025-11-28 11:14:38 +08:00
app.middleware("http")(verify_tk_token)
2025-11-27 16:18:12 +08:00
logger = create_logger(file_name="crawler_main")
2025-11-26 17:40:11 +08:00
def get_config():
"""获取配置文件"""
config_path = os.path.join(Path(__file__).resolve().parent, 'public_function/config.yaml')
try:
# 这里假设read_config函数存在
from public_function.public_func import read_config
return read_config(config_path)
except ImportError:
2025-11-27 16:00:41 +08:00
print(f"未找到read_config函数使用默认配置")
2025-11-26 17:40:11 +08:00
return {'host': 'localhost', 'port': 3306, 'user': 'root', 'password': 'password', 'db': 'test_db', 'max_overflow': 10}
def get_account_manager():
"""获取账号管理器实例"""
config = get_config()
try:
from account_management.deal_account import DealAccount
return DealAccount(config)
except ImportError:
2025-11-27 16:00:41 +08:00
print(f"未找到DealAccount类返回模拟实例")
2025-11-26 17:40:11 +08:00
return None
2025-11-28 15:38:58 +08:00
def get_task_manager():
2025-11-28 15:08:58 +08:00
"""任务获取管理器实例"""
2025-11-26 17:40:11 +08:00
config = get_config()
try:
from task_management.all_task_management import AllTask
return AllTask(config)
except ImportError:
2025-11-27 16:00:41 +08:00
print(f"未找到AllTask类返回模拟实例")
return None
def get_reset_manager():
"""设备重置管理器"""
config = get_config()
try:
from task_management.all_task_management import AllTask
return AllTask(config)
except ImportError:
print("未找到 类,返回模拟实例")
2025-11-26 17:40:11 +08:00
return None
# 账号处理相关
2025-12-03 14:39:17 +08:00
@app.post("/obtain_account", summary="获取可用账号")
async def obtain_account(account_data: AccountObtain, account_manager: Any = Depends(get_account_manager)):
2025-11-26 17:40:11 +08:00
"""
获取指定应用的可用账号
- **app_name**: 应用名称
2025-11-28 17:13:08 +08:00
pad_code
2025-11-26 17:40:11 +08:00
"""
try:
2025-12-03 14:39:17 +08:00
param = account_data.model_dump()
result = await account_manager.obtain_account_info(param["app_name"], param["region"])
2025-11-26 17:40:11 +08:00
if result:
2025-11-28 17:13:08 +08:00
# 将账号和设备进行记录
2025-12-03 10:20:07 +08:00
set_param = {"status": 2}
print(result)
params = (result[0]["account_id"], result[0]["app_name"])
where_conditions = "account_id = %s and app_name = %s "
await account_manager.update_account_info(set_param=set_param, where_conditions=where_conditions, params=params)
2025-11-26 17:40:11 +08:00
return {"code": 200, "message": "获取账号成功", "data": result[0]}
else:
raise HTTPException(status_code=404, detail="没有可用的账号")
except Exception as e:
print(f"获取账号失败: {e}")
raise HTTPException(status_code=404, detail="{}".format(e))
2025-12-03 10:20:07 +08:00
@app.post("/account_status", summary="获取账号状态")
2025-12-03 10:28:14 +08:00
async def account_status(account_data: AccountStatus, account_manager: Any = Depends(get_account_manager)):
2025-11-26 17:40:11 +08:00
"""
新增爬虫账号
- **account_id**: 账号ID
- **password**: 密码
- **app_name**: 应用名称
"""
try:
2025-12-03 10:20:07 +08:00
result = await account_manager.query_account_info(account_data.model_dump())
2025-11-27 10:44:09 +08:00
if result:
2025-12-03 10:20:07 +08:00
return {"code": 200, "message": "获取账号状态", "data": result}
raise HTTPException(status_code=404, detail="获取账号状态")
2025-11-26 17:40:11 +08:00
except Exception as e:
2025-12-03 10:20:07 +08:00
print(f"获取账号状态: {e}")
raise HTTPException(status_code=500, detail="获取账号状态,失败原因:{}".format(e))
2025-11-26 17:40:11 +08:00
2025-11-28 18:34:59 +08:00
@app.post("/update_account", summary="账号状态修改")
2025-11-27 10:44:09 +08:00
async def update_account(account_data: AccountUpdate, account_manager: Any = Depends(get_account_manager)):
2025-11-26 18:06:19 +08:00
"""
删除爬虫账号
- **account_id**: 账号ID
- **password**: 密码
- **app_name**: 应用名称
"""
try:
2025-11-27 11:39:28 +08:00
data = account_data.model_dump()
2025-11-27 10:44:09 +08:00
set_param = {"status": data["status"]}
params = (data["account_id"], data["app_name"])
where_conditions = "account_id = %s and app_name = %s "
result = await account_manager.update_account_info(set_param=set_param, where_conditions=where_conditions, params=params)
if result:
2025-11-28 10:33:31 +08:00
return {"code": 200, "message": "账号状态修改成功", "data": result}
2025-12-03 10:28:14 +08:00
else:
return {"code": 401, "message": "数据库中状态与传入状态一样"}
2025-11-26 18:06:19 +08:00
except Exception as e:
print(f"新增账号失败: {e}")
2025-11-28 10:33:31 +08:00
raise HTTPException(status_code=500, detail="账号失败,请重试,失败原因:{}".format(e))
2025-11-26 18:06:19 +08:00
2025-11-28 18:34:59 +08:00
@app.post("/get_crawler_task", summary="手机获取任务")
2025-11-28 18:32:41 +08:00
async def get_crawler_task(task_data: CrawlerItem, task_manager: Any = Depends(get_task_manager)):
2025-11-28 15:08:58 +08:00
"""
获取指定应用的可用账号
- **app_name**: 应用名称
- **country**: 应用名称
"""
2025-11-28 15:38:58 +08:00
try:
params = task_data.model_dump()
result = task_manager.get_task_item(params)
if result:
2025-11-28 18:32:41 +08:00
data = {"task_id": result["task_id"], "status": 4}
await task_manager.update_task_record(data)
2025-11-28 15:38:58 +08:00
return {"code": 200, "message": "任务获取成功", "data": result}
2025-11-29 16:36:34 +08:00
else:
return {"code": 200, "message": "暂时没有任务", "data": None}
2025-11-28 15:38:58 +08:00
except Exception as e:
print(f"获取任务失败,失败原因: {e}")
raise HTTPException(status_code=500, detail="获取任务失败;失败原因{}".format(e))
2025-11-28 15:08:58 +08:00
2025-11-28 18:34:59 +08:00
@app.post("/receive_data", summary="接收抓取数据")
2025-11-26 18:56:26 +08:00
async def receive_data(task_data: DataReceive, task_manager: Any = Depends(get_task_manager)):
2025-11-26 17:40:11 +08:00
"""数据接收接口"""
2025-11-26 18:56:26 +08:00
try:
2025-11-28 17:13:08 +08:00
# 需要对账号设备进行记录,便于统计每个账号采集数据数量
2025-11-27 11:39:28 +08:00
params = task_data.model_dump()
2025-11-26 18:56:26 +08:00
result = await task_manager.deal_receive_data(params)
if result:
return {"code": 200, "message": "数据保存成功", "data": result}
raise HTTPException(status_code=404, detail="抓取商品数据失败,请重新尝试")
except Exception as e:
2025-11-28 14:50:01 +08:00
print(f"商品数据处理失败,失败原因: {e}")
2025-11-26 18:56:26 +08:00
raise HTTPException(status_code=500, detail="获取数据失败;失败原因{}".format(e))
2025-11-26 17:40:11 +08:00
2025-11-27 14:57:57 +08:00
@app.post("/get_goods_info", summary="客户端获取商品数据")
2025-11-28 17:38:20 +08:00
async def get_goods_info(task_data: GoodsInfo, task_manager: Any = Depends(get_task_manager), token: Optional[str] = Header(None)):
2025-11-27 14:57:57 +08:00
"""客户端获取商品数据接口"""
2025-11-28 17:13:08 +08:00
task_id = uuid.uuid4().hex
2025-11-26 17:40:11 +08:00
try:
2025-11-27 11:39:28 +08:00
params = task_data.model_dump()
2025-11-28 17:13:08 +08:00
params['task_id'] = task_id
params["app_name"] = "Shopee"
2025-11-28 17:14:54 +08:00
params['region'] = params["host"]
2025-11-28 17:38:20 +08:00
params["token"] = token
2025-12-03 10:20:07 +08:00
print(params)
2025-11-28 17:13:08 +08:00
await task_manager.insert_task_record(params)
print(f"开始处理任务: {task_id}")
result = await task_manager.deal_shopee_task(params)
2025-11-26 17:40:11 +08:00
if result:
2025-11-29 16:14:23 +08:00
print(f"任务处理成功: {task_id}")
2025-11-28 17:13:08 +08:00
data = {"task_id": params["task_id"], "status": 2}
await task_manager.update_task_record(data)
2025-11-27 14:57:57 +08:00
return {"code": 200, "message": "数据获取成功", "data": result}
2025-11-28 17:13:08 +08:00
print(f"任务处理失败: {task_id}")
raise HTTPException(status_code=503, detail="抓取商品数据失败,请重新尝试")
2025-11-26 17:40:11 +08:00
except Exception as e:
2025-11-28 17:13:08 +08:00
print(f"任务异常 - ID: {task_id}, 错误: {str(e)}")
raise HTTPException(status_code=503, detail="抓取商品数据失败,请重新尝试")
2025-11-26 17:40:11 +08:00
2025-11-29 17:42:10 +08:00
@app.post("/add_token", summary="添加token")
2025-11-29 10:18:38 +08:00
async def add_token(task_data: TokenItem, task_manager: Any = Depends(get_task_manager)):
2025-11-27 16:00:41 +08:00
"""设备重置接口"""
try:
params = task_data.model_dump()
2025-11-29 10:18:38 +08:00
result = task_manager.add_token(params.get("token"))
print(result)
# result = True
2025-11-27 16:00:41 +08:00
if result:
2025-11-29 10:18:38 +08:00
print(f"新增token:{params.get("token")} 成功")
2025-11-27 16:00:41 +08:00
return {"code": 200, "message": "<UNK>", "data": result}
2025-11-29 10:18:38 +08:00
raise HTTPException(status_code=404, detail=f"新增token:{params.get("token")} 失败")
2025-11-27 16:00:41 +08:00
except Exception as e:
2025-11-29 10:18:38 +08:00
print(f"新增token:{params.get("token")} 失败,失败原因: {e}")
raise HTTPException(status_code=500, detail=f"新增token失败失败原因{e}")
2025-11-27 10:20:20 +08:00
2025-11-29 17:42:10 +08:00
@app.post("/reset_task", summary="重置任务")
async def reset_task(task_data: ResetTask, task_manager: Any = Depends(get_task_manager)):
# 任务上报失败,需要对任务进行重置
try:
params = task_data.model_dump()
result = await task_manager.deal_reset_task(params)
if result:
print(f"任务:{params.get("task_id")} 重启成功")
return {"code": 200, "message": f"任务:{params.get("task_id")} 重启成功", }
else:
return {"code": 200, "message": f"没有查询到相关任务:{params.get("task_id")}"}, 404
except Exception as e:
print(f"新增token:{params.get("token")} 失败,失败原因: {e}")
raise HTTPException(status_code=404, detail=f"任务:{params.get("task_id")} 重启失败")
2025-11-26 17:40:11 +08:00
if __name__ == '__main__':
2025-11-28 14:50:01 +08:00
uvicorn.run(app, host="0.0.0.0", port=8000)