JobData/jobs_spider/boss/enumerate_combos.py

69 lines
2.3 KiB
Python

import os
import json
def _load():
base = os.path.dirname(__file__)
with open(os.path.join(base, "city.json"), "r", encoding="utf-8") as f:
city_data = json.load(f)
with open(os.path.join(base, "work.json"), "r", encoding="utf-8") as f:
work_data = json.load(f)
cities = []
if isinstance(city_data, dict) and city_data.get("zpData") and city_data["zpData"].get("cityList"):
for c in city_data["zpData"]["cityList"]:
cities.append({"code": c.get("code"), "name": c.get("name")})
positions = []
if isinstance(work_data, dict) and work_data.get("zpData") and work_data["zpData"].get("config"):
for cat in work_data["zpData"]["config"]:
subs = cat.get("subLevelModelList") or []
for sub in subs:
subs2 = sub.get("subLevelModelList") or []
for pos in subs2:
positions.append({"code": pos.get("code"), "name": pos.get("name")})
return cities, positions
def _query_name(name: str) -> str:
dev_set = {"Java", "Python", "PHP", "C#", "C/C++", "Golang", "Node.js", "Android", "iOS"}
if name in dev_set:
return f"{name}开发"
return name
def enumerate_pairs():
cities, positions = _load()
pairs = []
print(cities)
for c in cities:
for p in positions:
pairs.append({
"city_code": c["code"],
"city_name": c["name"],
"position_code": p["code"],
"position_name": p["name"],
"query": _query_name(p["name"]),
})
return pairs
def count_pairs():
cities, positions = _load()
cities = [c for c in cities if (c.get("name") or "") != "全国"]
print(cities)
return len(cities) * len(positions)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--exclude-national", action="store_true")
args = parser.parse_args()
cities, positions = _load()
print(cities)
if args.exclude_national:
cities = [c for c in cities if (c.get("name") or "") != "全国"]
city_count = len(cities)
position_count = len(positions)
total = city_count * position_count
print(json.dumps({"cities": city_count, "positions": position_count, "combos": total}, ensure_ascii=False))