from clickhouse_connect.driver import AsyncClient from app.log import logger class ClickHouseInitializer: """ClickHouse数据库初始化器""" def __init__(self, client: AsyncClient): self.client = client async def create_boss_job_json_table(self): """创建BOSS招聘职位JSON存储表""" create_table_sql = """ CREATE TABLE IF NOT EXISTS job_data.boss_job ( id UInt64 DEFAULT 0, json_data String DEFAULT '', -- 原始JSON数据 job_id String DEFAULT '', -- BOSS平台去重字段:jobBaseInfoVO.jobId created_at DateTime DEFAULT now(), updated_at DateTime DEFAULT now() ) ENGINE = MergeTree() ORDER BY created_at SETTINGS index_granularity = 8192; """ try: await self.client.command(create_table_sql) logger.info("BOSS职位JSON数据表 boss_job 创建成功") except Exception as e: logger.error(f"创建BOSS职位JSON数据表失败: {e}") raise async def create_boss_company_json_table(self): """创建BOSS招聘公司JSON存储表""" create_table_sql = """ CREATE TABLE IF NOT EXISTS job_data.boss_company ( id UInt64 DEFAULT 0, json_data String DEFAULT '', -- 原始JSON数据 company_name String DEFAULT '', -- 公司名称去重字段 created_at DateTime DEFAULT now(), updated_at DateTime DEFAULT now() ) ENGINE = MergeTree() ORDER BY created_at SETTINGS index_granularity = 8192; """ try: await self.client.command(create_table_sql) logger.info("BOSS公司JSON数据表 boss_company 创建成功") except Exception as e: logger.error(f"创建BOSS公司JSON数据表失败: {e}") raise async def create_qcwy_job_json_table(self): """创建前程无忧职位JSON存储表""" create_table_sql = """ CREATE TABLE IF NOT EXISTS job_data.qcwy_job ( id UInt64 DEFAULT 0, json_data String DEFAULT '', -- 原始JSON数据 job_id String DEFAULT '', -- QCWY平台去重字段:jobId update_date_time String DEFAULT '', -- QCWY平台去重字段:updateDateTime created_at DateTime DEFAULT now(), updated_at DateTime DEFAULT now() ) ENGINE = MergeTree() ORDER BY created_at SETTINGS index_granularity = 8192; """ try: await self.client.command(create_table_sql) logger.info("前程无忧职位JSON数据表 qcwy_job 创建成功") except Exception as e: logger.error(f"创建前程无忧职位JSON数据表失败: {e}") raise async def create_qcwy_company_json_table(self): """创建前程无忧公司JSON存储表""" create_table_sql = """ CREATE TABLE IF NOT EXISTS job_data.qcwy_company ( id UInt64 DEFAULT 0, json_data String DEFAULT '', -- 原始JSON数据 company_name String DEFAULT '', -- 公司名称去重字段 created_at DateTime DEFAULT now(), updated_at DateTime DEFAULT now() ) ENGINE = MergeTree() ORDER BY created_at SETTINGS index_granularity = 8192; """ try: await self.client.command(create_table_sql) logger.info("前程无忧公司JSON数据表 qcwy_company 创建成功") except Exception as e: logger.error(f"创建前程无忧公司JSON数据表失败: {e}") raise async def create_zhilian_job_json_table(self): """创建智联招聘职位JSON存储表""" create_table_sql = """ CREATE TABLE IF NOT EXISTS job_data.zhilian_job ( id UInt64 DEFAULT 0, json_data String DEFAULT '', -- 原始JSON数据 number String DEFAULT '', -- 智联平台去重字段:number first_publish_time String DEFAULT '', -- 智联平台去重字段:firstPublishTime created_at DateTime DEFAULT now(), updated_at DateTime DEFAULT now() ) ENGINE = MergeTree() ORDER BY created_at SETTINGS index_granularity = 8192; """ try: await self.client.command(create_table_sql) logger.info("智联招聘职位JSON数据表 zhilian_job 创建成功") except Exception as e: logger.error(f"创建智联招聘职位JSON数据表失败: {e}") raise async def create_zhilian_company_json_table(self): """创建智联招聘公司JSON存储表""" create_table_sql = """ CREATE TABLE IF NOT EXISTS job_data.zhilian_company ( id UInt64 DEFAULT 0, json_data String DEFAULT '', -- 原始JSON数据 company_name String DEFAULT '', -- 公司名称去重字段 created_at DateTime DEFAULT now(), updated_at DateTime DEFAULT now() ) ENGINE = MergeTree() ORDER BY created_at SETTINGS index_granularity = 8192; """ try: await self.client.command(create_table_sql) logger.info("智联招聘公司JSON数据表 zhilian_company 创建成功") except Exception as e: logger.error(f"创建智联招聘公司JSON数据表失败: {e}") raise async def create_pending_company_table(self): """创建待处理公司表""" create_table_sql = """ CREATE TABLE IF NOT EXISTS job_data.pending_company ( source String, company_id String, company_name String DEFAULT '', status String DEFAULT 'pending', error_msg String DEFAULT '', created_at DateTime DEFAULT now(), updated_at DateTime DEFAULT now(), version UInt64 DEFAULT 1 ) ENGINE = ReplacingMergeTree(version) ORDER BY (source, company_id) SETTINGS index_granularity = 8192; """ try: await self.client.command(create_table_sql) logger.info("待处理公司表 pending_company 创建成功") except Exception as e: logger.error(f"创建待处理公司表失败: {e}") raise async def create_job_analytics_view(self): """创建统一的招聘数据分析视图""" create_view_sql = """ CREATE VIEW IF NOT EXISTS job_data.job_analytics AS SELECT 'boss' as source, job_id, JSONExtractString(json_data, 'jobName') as position_name, JSONExtractString(json_data, 'brandName') as company_name, JSONExtractString(json_data, 'salaryDesc') as salary_text, 0.0 as salary_min, 0.0 as salary_max, JSONExtractString(json_data, 'cityName') as city, JSONExtractString(json_data, 'experienceName') as experience_required, JSONExtractString(json_data, 'degreeName') as education, created_at FROM job_data.boss_job UNION ALL SELECT 'qcwy' as source, job_id, JSONExtractString(json_data, 'jobName') as position_name, JSONExtractString(json_data, 'companyName') as company_name, JSONExtractString(json_data, 'provideSalaryString') as salary_text, 0.0, 0.0, JSONExtractString(json_data, 'workCity') as city, JSONExtractString(json_data, 'workYear') as experience_required, JSONExtractString(json_data, 'degree') as education, created_at FROM job_data.qcwy_job UNION ALL SELECT 'zhilian' as source, number as job_id, JSONExtractString(json_data, 'jobName') as position_name, JSONExtractString(json_data, 'companyName') as company_name, JSONExtractString(json_data, 'salary60') as salary_text, 0.0, 0.0, JSONExtractString(json_data, 'workCity') as city, JSONExtractString(json_data, 'workingExp') as experience_required, JSONExtractString(json_data, 'education') as education, created_at FROM job_data.zhilian_job """ try: await self.client.command(create_view_sql) logger.info("招聘数据分析视图 job_analytics 创建成功") except Exception as e: logger.error(f"创建招聘数据分析视图失败: {e}") raise async def initialize_all_tables(self): """初始化所有表""" logger.info("开始初始化 ClickHouse 数据库表...") try: # 创建BOSS招聘JSON表 await self.create_boss_job_json_table() await self.create_boss_company_json_table() # 创建前程无忧JSON表 await self.create_qcwy_job_json_table() await self.create_qcwy_company_json_table() # 创建智联招聘JSON表 await self.create_zhilian_job_json_table() await self.create_zhilian_company_json_table() # 创建待处理公司表 await self.create_pending_company_table() # 创建统一分析视图 await self.create_job_analytics_view() logger.info("ClickHouse 数据库表初始化完成") except Exception as e: logger.error(f"ClickHouse 数据库初始化失败: {e}") raise