2025-11-26 17:40:11 +08:00
|
|
|
|
import os
|
|
|
|
|
|
import yaml
|
|
|
|
|
|
import uuid
|
|
|
|
|
|
import asyncio
|
|
|
|
|
|
import uvicorn
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
from typing import Dict, Any, Optional, List
|
2025-11-28 17:38:20 +08:00
|
|
|
|
from fastapi import FastAPI, HTTPException, Depends, Header
|
2025-11-28 11:14:38 +08:00
|
|
|
|
from public_function.auth import verify_tk_token
|
2025-11-27 16:18:12 +08:00
|
|
|
|
from public_function.public_func import read_config, create_logger
|
2025-11-29 17:42:10 +08:00
|
|
|
|
from model.model import GoodsInfo, DataReceive, AccountCreate, AccountUpdate, TokenItem, CrawlerItem, ResetTask
|
2025-11-26 17:40:11 +08:00
|
|
|
|
|
|
|
|
|
|
app = FastAPI()
|
2025-11-28 11:14:38 +08:00
|
|
|
|
app.middleware("http")(verify_tk_token)
|
2025-11-27 16:18:12 +08:00
|
|
|
|
logger = create_logger(file_name="crawler_main")
|
2025-11-26 17:40:11 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_config():
|
|
|
|
|
|
"""获取配置文件"""
|
|
|
|
|
|
config_path = os.path.join(Path(__file__).resolve().parent, 'public_function/config.yaml')
|
|
|
|
|
|
try:
|
|
|
|
|
|
# 这里假设read_config函数存在
|
|
|
|
|
|
from public_function.public_func import read_config
|
|
|
|
|
|
return read_config(config_path)
|
|
|
|
|
|
except ImportError:
|
2025-11-27 16:00:41 +08:00
|
|
|
|
print(f"未找到read_config函数,使用默认配置")
|
2025-11-26 17:40:11 +08:00
|
|
|
|
return {'host': 'localhost', 'port': 3306, 'user': 'root', 'password': 'password', 'db': 'test_db', 'max_overflow': 10}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_account_manager():
|
|
|
|
|
|
"""获取账号管理器实例"""
|
|
|
|
|
|
config = get_config()
|
|
|
|
|
|
try:
|
|
|
|
|
|
from account_management.deal_account import DealAccount
|
|
|
|
|
|
return DealAccount(config)
|
|
|
|
|
|
except ImportError:
|
2025-11-27 16:00:41 +08:00
|
|
|
|
print(f"未找到DealAccount类,返回模拟实例")
|
2025-11-26 17:40:11 +08:00
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
2025-11-28 15:38:58 +08:00
|
|
|
|
def get_task_manager():
|
2025-11-28 15:08:58 +08:00
|
|
|
|
"""任务获取管理器实例"""
|
2025-11-26 17:40:11 +08:00
|
|
|
|
config = get_config()
|
|
|
|
|
|
try:
|
|
|
|
|
|
from task_management.all_task_management import AllTask
|
|
|
|
|
|
return AllTask(config)
|
|
|
|
|
|
except ImportError:
|
2025-11-27 16:00:41 +08:00
|
|
|
|
print(f"未找到AllTask类,返回模拟实例")
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_reset_manager():
|
|
|
|
|
|
"""设备重置管理器"""
|
|
|
|
|
|
config = get_config()
|
|
|
|
|
|
try:
|
|
|
|
|
|
from task_management.all_task_management import AllTask
|
|
|
|
|
|
return AllTask(config)
|
|
|
|
|
|
except ImportError:
|
|
|
|
|
|
print("未找到 类,返回模拟实例")
|
2025-11-26 17:40:11 +08:00
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 账号处理相关
|
|
|
|
|
|
@app.get("/obtain_account", summary="获取可用账号")
|
2025-11-28 17:13:08 +08:00
|
|
|
|
async def obtain_account(app_name: str, country: str, pad_code: str, account_manager: Any = Depends(get_account_manager)):
|
2025-11-26 17:40:11 +08:00
|
|
|
|
"""
|
|
|
|
|
|
获取指定应用的可用账号
|
|
|
|
|
|
- **app_name**: 应用名称
|
2025-11-28 17:13:08 +08:00
|
|
|
|
pad_code
|
2025-11-26 17:40:11 +08:00
|
|
|
|
"""
|
|
|
|
|
|
if not app_name or not app_name.strip():
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="应用名称不能为空")
|
|
|
|
|
|
if not country or not country.strip():
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="国家不能为空会")
|
|
|
|
|
|
try:
|
|
|
|
|
|
result = await account_manager.obtain_account_info(app_name, country)
|
|
|
|
|
|
if result:
|
2025-11-28 17:13:08 +08:00
|
|
|
|
# 将账号和设备进行记录
|
2025-11-26 17:40:11 +08:00
|
|
|
|
return {"code": 200, "message": "获取账号成功", "data": result[0]}
|
|
|
|
|
|
else:
|
|
|
|
|
|
raise HTTPException(status_code=404, detail="没有可用的账号")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"获取账号失败: {e}")
|
|
|
|
|
|
raise HTTPException(status_code=404, detail="{}".format(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/add_account", summary="新增账号")
|
|
|
|
|
|
async def add_account(account_data: AccountCreate, account_manager: Any = Depends(get_account_manager)):
|
|
|
|
|
|
"""
|
|
|
|
|
|
新增爬虫账号
|
|
|
|
|
|
- **account_id**: 账号ID
|
|
|
|
|
|
- **password**: 密码
|
|
|
|
|
|
- **app_name**: 应用名称
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
2025-11-27 11:39:28 +08:00
|
|
|
|
result = await account_manager.add_account([account_data.model_dump()])
|
2025-11-27 10:44:09 +08:00
|
|
|
|
if result:
|
|
|
|
|
|
return {"code": 200, "message": "新增账号成功", "data": result}
|
|
|
|
|
|
raise HTTPException(status_code=404, detail="新增账号失败")
|
2025-11-26 17:40:11 +08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"新增账号失败: {e}")
|
|
|
|
|
|
raise HTTPException(status_code=500, detail="新增账号失败,失败原因:{}".format(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
2025-11-28 18:34:59 +08:00
|
|
|
|
@app.post("/update_account", summary="账号状态修改")
|
2025-11-27 10:44:09 +08:00
|
|
|
|
async def update_account(account_data: AccountUpdate, account_manager: Any = Depends(get_account_manager)):
|
2025-11-26 18:06:19 +08:00
|
|
|
|
"""
|
|
|
|
|
|
删除爬虫账号
|
|
|
|
|
|
- **account_id**: 账号ID
|
|
|
|
|
|
- **password**: 密码
|
|
|
|
|
|
- **app_name**: 应用名称
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
2025-11-27 11:39:28 +08:00
|
|
|
|
data = account_data.model_dump()
|
2025-11-27 10:44:09 +08:00
|
|
|
|
set_param = {"status": data["status"]}
|
|
|
|
|
|
params = (data["account_id"], data["app_name"])
|
|
|
|
|
|
where_conditions = "account_id = %s and app_name = %s "
|
|
|
|
|
|
result = await account_manager.update_account_info(set_param=set_param, where_conditions=where_conditions, params=params)
|
|
|
|
|
|
if result:
|
2025-11-28 10:33:31 +08:00
|
|
|
|
return {"code": 200, "message": "账号状态修改成功", "data": result}
|
|
|
|
|
|
raise HTTPException(status_code=404, detail="账号状态修改失败")
|
2025-11-26 18:06:19 +08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"新增账号失败: {e}")
|
2025-11-28 10:33:31 +08:00
|
|
|
|
raise HTTPException(status_code=500, detail="账号失败,请重试,失败原因:{}".format(e))
|
2025-11-26 18:06:19 +08:00
|
|
|
|
|
|
|
|
|
|
|
2025-11-28 18:34:59 +08:00
|
|
|
|
@app.post("/get_crawler_task", summary="手机获取任务")
|
2025-11-28 18:32:41 +08:00
|
|
|
|
async def get_crawler_task(task_data: CrawlerItem, task_manager: Any = Depends(get_task_manager)):
|
2025-11-28 15:08:58 +08:00
|
|
|
|
"""
|
|
|
|
|
|
获取指定应用的可用账号
|
|
|
|
|
|
- **app_name**: 应用名称
|
|
|
|
|
|
- **country**: 应用名称
|
|
|
|
|
|
"""
|
2025-11-28 15:38:58 +08:00
|
|
|
|
try:
|
|
|
|
|
|
params = task_data.model_dump()
|
|
|
|
|
|
result = task_manager.get_task_item(params)
|
|
|
|
|
|
if result:
|
2025-11-28 18:32:41 +08:00
|
|
|
|
data = {"task_id": result["task_id"], "status": 4}
|
|
|
|
|
|
await task_manager.update_task_record(data)
|
2025-11-28 15:38:58 +08:00
|
|
|
|
return {"code": 200, "message": "任务获取成功", "data": result}
|
2025-11-29 16:36:34 +08:00
|
|
|
|
else:
|
|
|
|
|
|
return {"code": 200, "message": "暂时没有任务", "data": None}
|
2025-11-28 15:38:58 +08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"获取任务失败,失败原因: {e}")
|
|
|
|
|
|
raise HTTPException(status_code=500, detail="获取任务失败;失败原因{}".format(e))
|
2025-11-28 15:08:58 +08:00
|
|
|
|
|
|
|
|
|
|
|
2025-11-28 18:34:59 +08:00
|
|
|
|
@app.post("/receive_data", summary="接收抓取数据")
|
2025-11-26 18:56:26 +08:00
|
|
|
|
async def receive_data(task_data: DataReceive, task_manager: Any = Depends(get_task_manager)):
|
2025-11-26 17:40:11 +08:00
|
|
|
|
"""数据接收接口"""
|
2025-11-26 18:56:26 +08:00
|
|
|
|
try:
|
2025-11-28 17:13:08 +08:00
|
|
|
|
# 需要对账号设备进行记录,便于统计每个账号采集数据数量
|
2025-11-27 11:39:28 +08:00
|
|
|
|
params = task_data.model_dump()
|
2025-11-26 18:56:26 +08:00
|
|
|
|
result = await task_manager.deal_receive_data(params)
|
|
|
|
|
|
if result:
|
|
|
|
|
|
return {"code": 200, "message": "数据保存成功", "data": result}
|
|
|
|
|
|
raise HTTPException(status_code=404, detail="抓取商品数据失败,请重新尝试")
|
|
|
|
|
|
except Exception as e:
|
2025-11-28 14:50:01 +08:00
|
|
|
|
print(f"商品数据处理失败,失败原因: {e}")
|
2025-11-26 18:56:26 +08:00
|
|
|
|
raise HTTPException(status_code=500, detail="获取数据失败;失败原因{}".format(e))
|
2025-11-26 17:40:11 +08:00
|
|
|
|
|
|
|
|
|
|
|
2025-11-27 14:57:57 +08:00
|
|
|
|
@app.post("/get_goods_info", summary="客户端获取商品数据")
|
2025-11-28 17:38:20 +08:00
|
|
|
|
async def get_goods_info(task_data: GoodsInfo, task_manager: Any = Depends(get_task_manager), token: Optional[str] = Header(None)):
|
2025-11-27 14:57:57 +08:00
|
|
|
|
"""客户端获取商品数据接口"""
|
2025-11-28 17:13:08 +08:00
|
|
|
|
task_id = uuid.uuid4().hex
|
2025-11-26 17:40:11 +08:00
|
|
|
|
try:
|
2025-11-27 11:39:28 +08:00
|
|
|
|
params = task_data.model_dump()
|
2025-11-28 17:13:08 +08:00
|
|
|
|
params['task_id'] = task_id
|
|
|
|
|
|
params["app_name"] = "Shopee"
|
2025-11-28 17:14:54 +08:00
|
|
|
|
params['region'] = params["host"]
|
2025-11-28 17:38:20 +08:00
|
|
|
|
params["token"] = token
|
2025-11-28 17:13:08 +08:00
|
|
|
|
await task_manager.insert_task_record(params)
|
|
|
|
|
|
print(f"开始处理任务: {task_id}")
|
|
|
|
|
|
result = await task_manager.deal_shopee_task(params)
|
2025-11-26 17:40:11 +08:00
|
|
|
|
if result:
|
2025-11-29 16:14:23 +08:00
|
|
|
|
print(f"任务处理成功: {task_id}")
|
2025-11-28 17:13:08 +08:00
|
|
|
|
data = {"task_id": params["task_id"], "status": 2}
|
|
|
|
|
|
await task_manager.update_task_record(data)
|
2025-11-27 14:57:57 +08:00
|
|
|
|
return {"code": 200, "message": "数据获取成功", "data": result}
|
2025-11-28 17:13:08 +08:00
|
|
|
|
print(f"任务处理失败: {task_id}")
|
|
|
|
|
|
raise HTTPException(status_code=503, detail="抓取商品数据失败,请重新尝试")
|
2025-11-26 17:40:11 +08:00
|
|
|
|
except Exception as e:
|
2025-11-28 17:13:08 +08:00
|
|
|
|
print(f"任务异常 - ID: {task_id}, 错误: {str(e)}")
|
|
|
|
|
|
raise HTTPException(status_code=503, detail="抓取商品数据失败,请重新尝试")
|
2025-11-26 17:40:11 +08:00
|
|
|
|
|
|
|
|
|
|
|
2025-11-29 17:42:10 +08:00
|
|
|
|
@app.post("/add_token", summary="添加token")
|
2025-11-29 10:18:38 +08:00
|
|
|
|
async def add_token(task_data: TokenItem, task_manager: Any = Depends(get_task_manager)):
|
2025-11-27 16:00:41 +08:00
|
|
|
|
"""设备重置接口"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
params = task_data.model_dump()
|
2025-11-29 10:18:38 +08:00
|
|
|
|
result = task_manager.add_token(params.get("token"))
|
|
|
|
|
|
print(result)
|
|
|
|
|
|
# result = True
|
2025-11-27 16:00:41 +08:00
|
|
|
|
if result:
|
2025-11-29 10:18:38 +08:00
|
|
|
|
print(f"新增token:{params.get("token")} 成功")
|
2025-11-27 16:00:41 +08:00
|
|
|
|
return {"code": 200, "message": "<UNK>", "data": result}
|
2025-11-29 10:18:38 +08:00
|
|
|
|
raise HTTPException(status_code=404, detail=f"新增token:{params.get("token")} 失败")
|
2025-11-27 16:00:41 +08:00
|
|
|
|
except Exception as e:
|
2025-11-29 10:18:38 +08:00
|
|
|
|
print(f"新增token:{params.get("token")} 失败,失败原因: {e}")
|
|
|
|
|
|
raise HTTPException(status_code=500, detail=f"新增token失败;失败原因{e}")
|
2025-11-27 10:20:20 +08:00
|
|
|
|
|
|
|
|
|
|
|
2025-11-29 17:42:10 +08:00
|
|
|
|
@app.post("/reset_task", summary="重置任务")
|
|
|
|
|
|
async def reset_task(task_data: ResetTask, task_manager: Any = Depends(get_task_manager)):
|
|
|
|
|
|
# 任务上报失败,需要对任务进行重置
|
|
|
|
|
|
try:
|
|
|
|
|
|
params = task_data.model_dump()
|
|
|
|
|
|
result = await task_manager.deal_reset_task(params)
|
|
|
|
|
|
if result:
|
|
|
|
|
|
print(f"任务:{params.get("task_id")} 重启成功")
|
|
|
|
|
|
return {"code": 200, "message": f"任务:{params.get("task_id")} 重启成功", }
|
|
|
|
|
|
else:
|
|
|
|
|
|
return {"code": 200, "message": f"没有查询到相关任务:{params.get("task_id")}"}, 404
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"新增token:{params.get("token")} 失败,失败原因: {e}")
|
|
|
|
|
|
raise HTTPException(status_code=404, detail=f"任务:{params.get("task_id")} 重启失败")
|
|
|
|
|
|
|
|
|
|
|
|
|
2025-11-26 17:40:11 +08:00
|
|
|
|
if __name__ == '__main__':
|
2025-11-28 14:50:01 +08:00
|
|
|
|
uvicorn.run(app, host="0.0.0.0", port=8000)
|