代码提交
This commit is contained in:
commit
ebaabae188
|
|
@ -0,0 +1 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
from typing import Optional, Dict, Any, List
|
||||
|
||||
from public_function.asyn_mysql import AsyncMySQL
|
||||
|
||||
|
||||
class DealAccount:
|
||||
def __init__(self, config_data: Dict[str, Any]):
|
||||
self.config_data = config_data
|
||||
self.db_pool: Optional[AsyncMySQL] = AsyncMySQL(self.config_data["advert_policy"])
|
||||
|
||||
async def add_account(self, params: List[Dict[str, Any]]):
|
||||
"""新增账户"""
|
||||
await self.db_pool.initialize()
|
||||
result = await self.db_pool.insert_many(table="crawler_account_record_info", data=params)
|
||||
if result:
|
||||
return True
|
||||
return False
|
||||
|
||||
async def delete_account(self, params: List[str]):
|
||||
"""删除账户"""
|
||||
if len(params) == 1:
|
||||
condition = "account_id={}".format(params[0])
|
||||
else:
|
||||
condition = f"account_id in ({','.join(params)})"
|
||||
await self.db_pool.initialize()
|
||||
result = await self.db_pool.delete(table="crawler_account_record_info", where_conditions=condition)
|
||||
if result:
|
||||
return True
|
||||
return False
|
||||
|
||||
async def query_account_info(self, params: Dict[str, Any]):
|
||||
"""查询具体账户信息"""
|
||||
sql_str = f"""select * from crawler_account_record_info where account_id='{params['account_id']}'"""
|
||||
await self.db_pool.initialize()
|
||||
result = await self.db_pool.fetch_all(sql_str, )
|
||||
if result:
|
||||
return result
|
||||
return []
|
||||
|
||||
async def obtain_account_info(self, app_name, country, number=1):
|
||||
"""获取指定个数账户信息"""
|
||||
sql_str = f"""select account_id,password from crawler_account_record_info
|
||||
where status=1 and app_name='{app_name}' and country='{country}' limit {number}"""
|
||||
await self.db_pool.initialize()
|
||||
result = await self.db_pool.fetch_all(sql_str, )
|
||||
if result:
|
||||
return result
|
||||
return []
|
||||
|
||||
async def update_account_info(self, set_param: Dict[str, Any], params):
|
||||
"""更新账户信息"""
|
||||
# params = {'name': '张三', 'age': 25}
|
||||
where_conditions = "account_id = %s"
|
||||
await self.db_pool.initialize()
|
||||
affected_rows = await self.db_pool.update(table='crawler_account_record_info', set_columns=set_param,
|
||||
where_conditions=where_conditions, params=params)
|
||||
if affected_rows:
|
||||
return True
|
||||
return False
|
||||
|
|
@ -0,0 +1 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
|
@ -0,0 +1 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
|
@ -0,0 +1,112 @@
|
|||
import os
|
||||
import yaml
|
||||
import uuid
|
||||
import asyncio
|
||||
import uvicorn
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, Optional, List
|
||||
from fastapi import FastAPI, HTTPException, Depends
|
||||
|
||||
from public_function.public_func import read_config
|
||||
from model.model import AccountCreate, AccountUpdate, CrawlerTask
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
def get_config():
|
||||
"""获取配置文件"""
|
||||
config_path = os.path.join(Path(__file__).resolve().parent, 'public_function/config.yaml')
|
||||
try:
|
||||
# 这里假设read_config函数存在
|
||||
from public_function.public_func import read_config
|
||||
return read_config(config_path)
|
||||
except ImportError:
|
||||
logger.warning("未找到read_config函数,使用默认配置")
|
||||
return {'host': 'localhost', 'port': 3306, 'user': 'root', 'password': 'password', 'db': 'test_db', 'max_overflow': 10}
|
||||
|
||||
|
||||
def get_account_manager():
|
||||
"""获取账号管理器实例"""
|
||||
config = get_config()
|
||||
try:
|
||||
from account_management.deal_account import DealAccount
|
||||
return DealAccount(config)
|
||||
except ImportError:
|
||||
logger.warning("未找到DealAccount类,返回模拟实例")
|
||||
return None
|
||||
|
||||
|
||||
def get_task_manager():
|
||||
"""获任务管理器实例"""
|
||||
config = get_config()
|
||||
try:
|
||||
from task_management.all_task_management import AllTask
|
||||
return AllTask(config)
|
||||
except ImportError:
|
||||
logger.warning("未找到AllTask类,返回模拟实例")
|
||||
return None
|
||||
|
||||
|
||||
# 账号处理相关
|
||||
@app.get("/obtain_account", summary="获取可用账号")
|
||||
async def obtain_account(app_name: str, country: str, account_manager: Any = Depends(get_account_manager)):
|
||||
"""
|
||||
获取指定应用的可用账号
|
||||
- **app_name**: 应用名称
|
||||
"""
|
||||
if not app_name or not app_name.strip():
|
||||
raise HTTPException(status_code=400, detail="应用名称不能为空")
|
||||
if not country or not country.strip():
|
||||
raise HTTPException(status_code=400, detail="国家不能为空会")
|
||||
try:
|
||||
result = await account_manager.obtain_account_info(app_name, country)
|
||||
if result:
|
||||
return {"code": 200, "message": "获取账号成功", "data": result[0]}
|
||||
else:
|
||||
raise HTTPException(status_code=404, detail="没有可用的账号")
|
||||
except Exception as e:
|
||||
print(f"获取账号失败: {e}")
|
||||
raise HTTPException(status_code=404, detail="{}".format(e))
|
||||
|
||||
|
||||
@app.post("/add_account", summary="新增账号")
|
||||
async def add_account(account_data: AccountCreate, account_manager: Any = Depends(get_account_manager)):
|
||||
"""
|
||||
新增爬虫账号
|
||||
- **account_id**: 账号ID
|
||||
- **password**: 密码
|
||||
- **app_name**: 应用名称
|
||||
"""
|
||||
try:
|
||||
print(account_data.dict())
|
||||
# 这里应该调用实际的添加账号方法
|
||||
result = await account_manager.add_account([account_data.dict()])
|
||||
return {"code": 200, "message": "新增账号成功", "data": result}
|
||||
except Exception as e:
|
||||
print(f"新增账号失败: {e}")
|
||||
raise HTTPException(status_code=500, detail="新增账号失败,失败原因:{}".format(e))
|
||||
|
||||
|
||||
@app.post("/receive_data")
|
||||
async def receive_data(params: Dict[str, Any]):
|
||||
"""数据接收接口"""
|
||||
# 接收道德数据表写入redis后,存入mysql(可以根据业务需求确认是否需要永久保存)
|
||||
|
||||
|
||||
@app.get("/crawler_task")
|
||||
async def crawler_task(task_data: CrawlerTask, task_manager: Any = Depends(get_task_manager)):
|
||||
"""爬虫任务接口"""
|
||||
try:
|
||||
params = task_data.dict()
|
||||
params['task_id'] = uuid.uuid4().hex
|
||||
result = await task_manager.task_distribution(params)
|
||||
if result:
|
||||
return {"code": 200, "message": "<UNK>", "data": result}
|
||||
raise HTTPException(status_code=404, detail="抓取商品数据失败,请重新尝试")
|
||||
except Exception as e:
|
||||
print(f"<UNK>: {e}")
|
||||
raise HTTPException(status_code=500, detail="获取数据失败;失败原因{}".format(e))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
uvicorn.run(app)
|
||||
|
|
@ -0,0 +1 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
# 定义数据模型
|
||||
class AccountCreate(BaseModel):
|
||||
account_id: str = Field(..., min_length=1, max_length=128, description="账号ID")
|
||||
password: str = Field(..., min_length=1, max_length=128, description="密码")
|
||||
country: str = Field(..., min_length=1, max_length=128, description="账号所在国家")
|
||||
app_name: str = Field(..., min_length=1, max_length=128, description="应用名称")
|
||||
|
||||
|
||||
class AccountUpdate(BaseModel):
|
||||
account_id: str = Field(..., description="账号ID")
|
||||
status: int = Field(..., ge=1, le=2, description="状态:1-空闲,2-使用中")
|
||||
|
||||
|
||||
class CrawlerTask(BaseModel):
|
||||
country: str = Field(..., min_length=1, max_length=128, description="账号所在国家")
|
||||
app_name: str = Field(..., min_length=1, max_length=128, description="应用名称")
|
||||
goods_id: str = Field(..., min_length=1, max_length=128, description="账号所在国家")
|
||||
store_id: str = Field(..., min_length=1, max_length=128, description="应用名称")
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
# param = {'endTime': 1762591513782, 'padCode': 'ACP5B44DWZTR7DP6', 'padStatus': None, 'taskContent': '', 'taskId': 3422505,
|
||||
# 'taskResult': 'Success', 'taskStatus': 3}
|
||||
# task_info = {'mirrorName': 'bkp-ACP251016US1U4WP-1762437337310', 'countryCode': 'US',
|
||||
# 'deviceId': 'ACP251016US1U4WP-2025-11-06 07:48:47-0gsctk80', 'retentionScript': 'reback.js', 'retentionFuture': True,
|
||||
# 'pad_code': ['ACP5B44DWZTR7DP6'], 'task_id': 'acp5b44dwztr7dp6_d773b1a222d84a089ec38d8dacbd37f2', 'taskId': 3422505,
|
||||
# 'task_type': 'restore'}
|
||||
# print(param.get('taskId', 0))
|
||||
# print(task_info.get('taskId', 0))
|
||||
# print(param.get("taskResult", ""))
|
||||
# if param.get('taskId', 0) == task_info.get("taskId", 0) and param.get("taskResult", "") == "Success":
|
||||
# print(task_info)
|
||||
# # 需要判断该任务是备份韩式还原
|
||||
# task_type = task_info.get("task_type", "")
|
||||
# if task_type == "restore":
|
||||
# print(f"{get_local_time()};{param['padCode']};实例还原成功")
|
||||
|
||||
|
|
@ -0,0 +1,142 @@
|
|||
import asyncio
|
||||
import aiomysql
|
||||
from typing import List, Tuple, Dict, Any
|
||||
|
||||
|
||||
class AsyncMySQL:
|
||||
def __init__(self, config_data: Dict):
|
||||
|
||||
self.config = {
|
||||
'host': config_data['host'],
|
||||
'port': config_data['port'],
|
||||
'user': config_data['user'],
|
||||
'password': config_data['password'],
|
||||
'db': config_data['db'],
|
||||
'autocommit': True,
|
||||
'minsize': 1,
|
||||
'maxsize': config_data['max_overflow'],
|
||||
}
|
||||
self.pool = None
|
||||
|
||||
async def initialize(self):
|
||||
"""初始化连接池"""
|
||||
self.pool = await aiomysql.create_pool(**self.config)
|
||||
return self
|
||||
|
||||
async def close(self):
|
||||
"""关闭连接池"""
|
||||
if self.pool:
|
||||
self.pool.close()
|
||||
await self.pool.wait_closed()
|
||||
|
||||
async def execute(self, query: str, params=None):
|
||||
"""执行单条SQL语句"""
|
||||
async with self.pool.acquire() as conn:
|
||||
async with conn.cursor() as cursor:
|
||||
await cursor.execute(query, params)
|
||||
return cursor.rowcount
|
||||
|
||||
async def executemany(self, query: str, params_list: List[Tuple]):
|
||||
"""批量执行SQL语句"""
|
||||
async with self.pool.acquire() as conn:
|
||||
async with conn.cursor() as cursor:
|
||||
await cursor.executemany(query, params_list)
|
||||
return cursor.rowcount
|
||||
|
||||
async def insert_many_tuple(self, table: str, columns: List[str], data: List[Tuple]):
|
||||
"""批量插入数据到指定表"""
|
||||
placeholders = ', '.join(['%s'] * len(columns))
|
||||
columns_str = ', '.join(columns)
|
||||
query = f"INSERT INTO {table} ({columns_str}) VALUES ({placeholders})"
|
||||
return await self.executemany(query, data)
|
||||
|
||||
async def insert_many(self, table: str, data: List[Dict[str, Any]]):
|
||||
columns = list(data[0].keys())
|
||||
# 从字典数据中提取值,转换为元组列表
|
||||
params_list = [tuple(record.get(col) for col in columns) for record in data]
|
||||
placeholders = ', '.join(['%s'] * len(columns))
|
||||
columns_str = ', '.join(columns)
|
||||
query = f"INSERT INTO {table} ({columns_str}) VALUES ({placeholders})"
|
||||
return await self.executemany(query, params_list)
|
||||
|
||||
async def fetch_all(self, query: str, params=None) -> List[Dict[str, Any]]:
|
||||
"""查询多条记录"""
|
||||
async with self.pool.acquire() as conn:
|
||||
async with conn.cursor(aiomysql.DictCursor) as cursor:
|
||||
await cursor.execute(query, params)
|
||||
return await cursor.fetchall()
|
||||
|
||||
async def delete(self, table: str, where_conditions: str = None, params: Tuple = None) -> int:
|
||||
|
||||
if where_conditions:
|
||||
query = f"DELETE FROM {table} WHERE {where_conditions}"
|
||||
else:
|
||||
query = f"DELETE FROM {table}"
|
||||
|
||||
return await self.execute(query, params)
|
||||
|
||||
async def delete_many(self, table: str, conditions_list: List[Tuple[str, Tuple]]) -> int:
|
||||
|
||||
total_affected = 0
|
||||
for where_conditions, params in conditions_list:
|
||||
affected_rows = await self.delete(table, where_conditions, params)
|
||||
total_affected += affected_rows
|
||||
return total_affected
|
||||
|
||||
async def update(self, table: str, set_columns: Dict[str, Any], where_conditions: str = None,
|
||||
params: Tuple = None) -> int:
|
||||
"""
|
||||
更新单条记录
|
||||
:param table: 表名
|
||||
:param set_columns: 要更新的字段和值字典
|
||||
:param where_conditions: WHERE条件
|
||||
:param params: 参数列表
|
||||
:return: 受影响的行数
|
||||
"""
|
||||
set_clause = ', '.join([f"{k} = %s" for k in set_columns])
|
||||
query = f"UPDATE {table} SET {set_clause}"
|
||||
|
||||
if where_conditions:
|
||||
query += f" WHERE {where_conditions}"
|
||||
|
||||
# 构造参数列表
|
||||
update_params = list(set_columns.values())
|
||||
if where_conditions:
|
||||
if params:
|
||||
update_params.extend(params)
|
||||
else:
|
||||
raise ValueError("当使用WHERE条件时,必须提供参数")
|
||||
|
||||
return await self.execute(query, update_params)
|
||||
|
||||
async def update_many(self, table: str, set_columns: Dict[str, Any], conditions_list: List[Tuple[str, Tuple]]) -> int:
|
||||
"""
|
||||
批量更新多条记录
|
||||
:param table: 表名
|
||||
:param set_columns: 要更新的字段和值字典
|
||||
:param conditions_list: 条件列表,每个元素是(where_conditions, params)
|
||||
:return: 受影响的总行数
|
||||
"""
|
||||
set_clause = ', '.join([f"{k} = %s" for k in set_columns])
|
||||
total_affected = 0
|
||||
|
||||
for where_conditions, params in conditions_list:
|
||||
query = f"UPDATE {table} SET {set_clause} WHERE {where_conditions}"
|
||||
update_params = list(set_columns.values())
|
||||
if params:
|
||||
update_params.extend(params)
|
||||
|
||||
affected_rows = await self.execute(query, update_params)
|
||||
total_affected += affected_rows
|
||||
|
||||
return total_affected
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from public_function.public_func import read_config
|
||||
|
||||
config = read_config(r"C:\workfile\crawler_task_management\public_function\config.yaml")
|
||||
obj = AsyncMySQL(config['advert_policy'])
|
||||
sql_str = f"""select account_id,password from crawler_account_record_info
|
||||
where status=1 and app_name='xiapi' limit 1"""
|
||||
obj.fetch_all(sql_str)
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
advert_policy:
|
||||
host: pc-2ze85s1hw783u87wlo.mysql.polardb.rds.aliyuncs.com
|
||||
port: 3306
|
||||
user: policyuser
|
||||
password: IvM@ck#z9$Eqy3KGBK74hk
|
||||
db: advert_policy
|
||||
table: save_create_device_info
|
||||
# 连接池配置
|
||||
pool_size: 10
|
||||
max_overflow: 20
|
||||
pool_recycle: 3600
|
||||
pool_timeout: 30
|
||||
|
||||
access_key_id: LTAI5tA92Av7DQmSQY2MTJPe
|
||||
access_key_secret: KI5s3C78HcPX9MDjUwPwVDytFzxEjY
|
||||
|
||||
redis_config:
|
||||
host: 182.92.181.218
|
||||
port: 6372
|
||||
password: qaz@wsx
|
||||
db: 0
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def read_config(path):
|
||||
if Path(path).exists():
|
||||
with open(path, encoding="utf-8") as f:
|
||||
config = yaml.safe_load(f)
|
||||
return config
|
||||
else:
|
||||
raise FileNotFoundError
|
||||
|
|
@ -0,0 +1,72 @@
|
|||
import os
|
||||
import json
|
||||
import redis
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
|
||||
class RedisTaskManager:
|
||||
def __init__(self, config_data: Dict[str, Any]):
|
||||
self.config_data = config_data
|
||||
"""初始化Redis连接"""
|
||||
self.redis_client = redis.Redis(
|
||||
host=self.config_data['redis_config']['host'],
|
||||
port=self.config_data['redis_config']['port'],
|
||||
password=self.config_data['redis_config']['password'],
|
||||
db=self.config_data['redis_config']['db'],
|
||||
decode_responses=True
|
||||
)
|
||||
self.expire_hours = 24 # 过期时间24小时
|
||||
|
||||
def write_data(self, key: str, data: Any, expire_time: Optional[int] = None):
|
||||
"""
|
||||
写入数据到Redis,设置过期时间
|
||||
Args:
|
||||
key: Redis键名
|
||||
data: 要存储的数据
|
||||
expire_time: 自定义过期时间(秒),默认24小时
|
||||
Returns:
|
||||
bool: 写入是否成功
|
||||
"""
|
||||
try:
|
||||
# 如果数据是字典或列表,先序列化为JSON
|
||||
if isinstance(data, (dict, list)):
|
||||
data_str = json.dumps(data, ensure_ascii=False)
|
||||
else:
|
||||
data_str = str(data)
|
||||
# 设置过期时间,优先使用自定义时间
|
||||
expire_seconds = expire_time if expire_time is not None else self.expire_hours * 3600
|
||||
result = self.redis_client.setex(key, expire_seconds, data_str)
|
||||
|
||||
if result:
|
||||
print(f"数据写入成功 - 键: {key}, 过期时间: {expire_seconds}秒")
|
||||
return True
|
||||
else:
|
||||
print(f"数据写入失败 - 键: {key}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f"写入Redis数据时发生错误: {e}")
|
||||
return False
|
||||
|
||||
def read_data(self, key: str):
|
||||
"""
|
||||
从Redis读取数据
|
||||
Args:
|
||||
key: Redis键名
|
||||
Returns:
|
||||
Optional[Any]: 读取到的数据,如果键不存在返回None
|
||||
"""
|
||||
try:
|
||||
data_str = self.redis_client.get(key)
|
||||
if data_str is None:
|
||||
print(f"键不存在或已过期 - 键: {key}")
|
||||
return None
|
||||
# 尝试解析JSON数据
|
||||
try:
|
||||
return json.loads(data_str)
|
||||
except json.JSONDecodeError:
|
||||
# 如果不是JSON格式,返回原始字符串
|
||||
return data_str
|
||||
except Exception as e:
|
||||
print(f"读取Redis数据时发生错误: {e}")
|
||||
return None
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
CREATE TABLE crawler_device_info_record
|
||||
(
|
||||
id INT(11) NOT NULL AUTO_INCREMENT COMMENT '表自增ID',
|
||||
pad_code VARCHAR(50) NOT NULL COMMENT '设备唯一标识',
|
||||
name VARCHAR(100) NOT NULL DEFAULT '' COMMENT '设备名称',
|
||||
status int NOT NULL DEFAULT 1 COMMENT '任务状态:1、空闲 ;2、使用中',
|
||||
create_time DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
||||
update_time DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE KEY uk_pad_code (pad_code),
|
||||
KEY idx_status (status)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='爬虫设备信息表';
|
||||
|
||||
create table crawler_task_record_info
|
||||
(
|
||||
id INT(11) NOT NULL AUTO_INCREMENT COMMENT '表自增ID',
|
||||
task_id varchar(50) NOT NULL COMMENT '任务ID',
|
||||
app_name VARCHAR(50) NOT NULL COMMENT 'app名称',
|
||||
country VARCHAR(50) NOT NULL COMMENT '国家',
|
||||
status int NOT NULL DEFAULT 1 COMMENT '任务状态:1、开始执行;2、执行成功;3、执行失败',
|
||||
create_time DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
||||
update_time DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE KEY uk_task_id (task_id),
|
||||
KEY idx_status (status)
|
||||
)ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='爬虫任务执行记录表';
|
||||
|
||||
|
||||
|
||||
CREATE TABLE crawler_account_record_info
|
||||
(
|
||||
id INT(11) NOT NULL AUTO_INCREMENT COMMENT '表自增ID',
|
||||
account_id VARCHAR(50) NOT NULL COMMENT '账号名称',
|
||||
password VARCHAR(50) NOT NULL COMMENT '账号密码',
|
||||
country VARCHAR(50) NOT NULL COMMENT '国家',
|
||||
status INT NOT NULL DEFAULT 1 COMMENT '任务状态:1、空闲;2、使用中',
|
||||
app_name VARCHAR(50) NOT NULL COMMENT 'app名称',
|
||||
create_time DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
||||
update_time DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE KEY uk_account_id (account_id,app_name),
|
||||
KEY idx_status (status),
|
||||
KEY idx_country (country)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='爬虫账号记录表';
|
||||
|
||||
|
||||
CREATE TABLE goods_information_record
|
||||
(
|
||||
id INT(11) NOT NULL AUTO_INCREMENT COMMENT '表自增ID',
|
||||
goods_id VARCHAR(50) NOT NULL COMMENT '商品ID',
|
||||
store_id VARCHAR(50) NOT NULL COMMENT '店铺ID',
|
||||
country VARCHAR(50) NOT NULL COMMENT '国家',
|
||||
app_name VARCHAR(50) NOT NULL COMMENT 'app名称',
|
||||
goods_info text NOT NULL COMMENT '商品具体价格详情等信息',
|
||||
create_time DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
||||
update_time DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE KEY uk_goods_info (goods_id,store_id,country,app_name),
|
||||
KEY idx_status (status),
|
||||
KEY idx_country (country)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='爬虫账号记录表';
|
||||
|
|
@ -0,0 +1 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import time
|
||||
from typing import Dict, Any
|
||||
|
||||
from public_function.asyn_mysql import AsyncMySQL
|
||||
from public_function.redis_task_manager import RedisTaskManager
|
||||
|
||||
|
||||
class AllTask:
|
||||
def __init__(self, config_data: Dict[str, Any]):
|
||||
self.config_data = config_data
|
||||
self.redis_conn = RedisTaskManager(self.config_data["redis_config"])
|
||||
self.db_pool: Optional[AsyncMySQL] = AsyncMySQL(self.config_data["advert_policy"])
|
||||
|
||||
async def deal_shopee_task(self, param):
|
||||
# 查询redis数据库,redis 数据库存在该数据直接返回
|
||||
key = f"{param['app_name']}:{param['store_id']}:{param['goods_id']}"
|
||||
result = self.redis_conn.read_data(key)
|
||||
if result:
|
||||
return result
|
||||
# 调用对应爬虫任务
|
||||
# 任务结束后开始等待
|
||||
endtime = time.time() + 55
|
||||
while time.time() < endtime:
|
||||
await time.sleep(1)
|
||||
result = self.redis_conn.read_data(key)
|
||||
if result:
|
||||
return result
|
||||
return []
|
||||
|
||||
async def task_distribution(self, data: Dict[str, Any]):
|
||||
# 需要对任务进行记录
|
||||
try:
|
||||
await self.db_pool.initialize()
|
||||
# 将任务记录到mysql
|
||||
param = {"app_name": data["app_name"], "task_id": data["task_id"], "country": data["country"]}
|
||||
await self.db_pool.insert_many(table="crawler_task_record_info", data=param)
|
||||
except Exception as e:
|
||||
print("将任务记录到数据库失败,失败原因为:{}".format(e))
|
||||
if param["app_name"] == "Shopee":
|
||||
result = await self.deal_shopee_task(param)
|
||||
if result:
|
||||
return result
|
||||
return None
|
||||
Loading…
Reference in New Issue