319 lines
13 KiB
Python
319 lines
13 KiB
Python
import os
|
||
import json
|
||
import time
|
||
from datetime import datetime, timedelta, timezone
|
||
|
||
from alibabacloud_ecs20140526.client import Client as EcsClient
|
||
from alibabacloud_tea_openapi import models as open_api_models
|
||
from alibabacloud_ecs20140526 import models as ecs_models
|
||
from alibabacloud_credentials.client import Client as CredentialClient
|
||
|
||
|
||
INSTANCE_STATUS_CHECK_INTERVAL_MILLISECOND = 3000
|
||
INSTANCE_STATUS_TOTAL_CHECK_TIME_ELAPSE_MILLISECOND = 60000 * 3
|
||
|
||
|
||
def init_ecs_client() -> EcsClient:
|
||
"""
|
||
初始化 ECS 客户端
|
||
参数:无
|
||
返回:EcsClient —— 使用环境变量中的 AK/SK 与 region 初始化的客户端
|
||
用途:用于后续调用阿里云 ECS 接口
|
||
"""
|
||
region_id = os.getenv("ALIYUN_REGION_ID", "cn-shanghai")
|
||
credential = CredentialClient()
|
||
config = open_api_models.Config(
|
||
credential=credential,
|
||
region_id=region_id,
|
||
)
|
||
config.endpoint = f"ecs.{region_id}.aliyuncs.com"
|
||
return EcsClient(config)
|
||
|
||
|
||
def compute_auto_release_time(hours_default=6, minutes_default=0) -> str:
|
||
"""
|
||
计算自动释放时间(UTC),确保至少晚于当前时间 30 分钟
|
||
参数:默认增加 6 小时;支持环境变量 AUTO_RELEASE_HOURS/AUTO_RELEASE_MINUTES
|
||
返回:str —— ISO8601 格式时间,如 2025-11-21T08:00:00Z
|
||
用途:满足 Aliyun 对 AutoReleaseTime 的格式与时间窗口要求
|
||
"""
|
||
hours = int(os.getenv("AUTO_RELEASE_HOURS") or hours_default)
|
||
minutes = int(os.getenv("AUTO_RELEASE_MINUTES") or minutes_default)
|
||
delta = timedelta(hours=hours, minutes=minutes)
|
||
if delta < timedelta(minutes=30):
|
||
delta = timedelta(minutes=30)
|
||
target = datetime.now(timezone.utc) + delta
|
||
return target.strftime("%Y-%m-%dT%H:%M:%SZ")
|
||
|
||
|
||
def compose_run_instances_request() -> ecs_models.RunInstancesRequest:
|
||
"""
|
||
组装创建实例请求参数
|
||
参数:无(从环境变量读取可选覆盖项)
|
||
返回:RunInstancesRequest —— 包含计费、地域/可用区、规格、镜像、磁盘、网络、数量等参数
|
||
用途:用于调用 RunInstances 创建 ECS 实例
|
||
"""
|
||
region_id = os.getenv("ALIYUN_REGION_ID", "cn-shanghai")
|
||
return ecs_models.RunInstancesRequest(
|
||
instance_charge_type=os.getenv("ALIYUN_CHARGE_TYPE", "PostPaid"),
|
||
region_id=region_id,
|
||
zone_id=os.getenv("ALIYUN_ZONE_ID", "cn-shanghai-b"),
|
||
instance_type=os.getenv("ALIYUN_INSTANCE_TYPE", "ecs.t5-lc1m1.small"),
|
||
io_optimized=os.getenv("ALIYUN_IO_OPTIMIZED", "optimized"),
|
||
spot_strategy=os.getenv("ALIYUN_SPOT_STRATEGY", "SpotAsPriceGo"),
|
||
spot_interruption_behavior=os.getenv("ALIYUN_SPOT_BEHAVIOR", "Terminate"),
|
||
image_id=os.getenv("ALIYUN_IMAGE_ID", "ubuntu_24_04_x64_20G_alibase_20251102.vhd"),
|
||
security_enhancement_strategy=os.getenv("ALIYUN_SECURITY_ENHANCE", "Active"),
|
||
system_disk=ecs_models.RunInstancesRequestSystemDisk(
|
||
size=int(os.getenv("ALIYUN_SYSTEM_DISK_SIZE", "40")),
|
||
category=os.getenv("ALIYUN_SYSTEM_DISK_CATEGORY", "cloud_efficiency"),
|
||
),
|
||
internet_charge_type=os.getenv("ALIYUN_INTERNET_CHARGE_TYPE", "PayByBandwidth"),
|
||
internet_max_bandwidth_out=int(os.getenv("ALIYUN_MAX_BW_OUT", "1")),
|
||
v_switch_id=os.getenv("ALIYUN_VSWITCH_ID"),
|
||
security_group_id=os.getenv("ALIYUN_SECURITY_GROUP_ID"),
|
||
image_options=ecs_models.RunInstancesRequestImageOptions(login_as_non_root=False),
|
||
instance_name=os.getenv("ALIYUN_INSTANCE_NAME", "launch-advisor-20251121"),
|
||
private_dns_name_options=ecs_models.RunInstancesRequestPrivateDnsNameOptions(hostname_type="Custom"),
|
||
unique_suffix=False,
|
||
http_tokens=os.getenv("ALIYUN_HTTP_TOKENS", "optional"),
|
||
tenancy="default",
|
||
affinity="default",
|
||
amount=int(os.getenv("ALIYUN_AMOUNT", "20")),
|
||
min_amount=int(os.getenv("ALIYUN_MIN_AMOUNT", "20")),
|
||
auto_release_time=compute_auto_release_time(),
|
||
)
|
||
|
||
|
||
def call_run_instances_api(ecs_client: EcsClient):
|
||
"""
|
||
调用创建实例 API
|
||
参数:ecs_client
|
||
返回:RunInstancesResponse 或 None
|
||
用途:提交实例创建请求并获取实例 ID 列表
|
||
"""
|
||
request = compose_run_instances_request()
|
||
try:
|
||
return ecs_client.run_instances(request)
|
||
except Exception as error:
|
||
print(getattr(error, "code", str(type(error))))
|
||
print(getattr(error, "message", str(error)))
|
||
data = getattr(error, "data", None)
|
||
if isinstance(data, dict) and data.get("Recommend") is not None:
|
||
print(data.get("Recommend"))
|
||
return None
|
||
|
||
|
||
def call_to_describe_instances(ecs_client: EcsClient, instance_ids):
|
||
"""
|
||
轮询检查实例状态直至成功或超时
|
||
参数:ecs_client、instance_ids
|
||
返回:None
|
||
用途:每 3 秒检查一次,累计超过设定时间则判定为超时
|
||
"""
|
||
start_time = int(time.time() * 1000)
|
||
pending = list(instance_ids)
|
||
region_id = os.getenv("ALIYUN_REGION_ID", "cn-shanghai")
|
||
while True:
|
||
time.sleep(INSTANCE_STATUS_CHECK_INTERVAL_MILLISECOND / 1000.0)
|
||
req = ecs_models.DescribeInstancesRequest(region_id=region_id, instance_ids=json.dumps(pending))
|
||
try:
|
||
resp = ecs_client.describe_instances(req)
|
||
except Exception as error:
|
||
print(getattr(error, "message", str(error)))
|
||
continue
|
||
instances = resp.body.instances.instance if hasattr(resp, "body") else resp.instances.instance
|
||
for inst in instances:
|
||
if getattr(inst, "status", None) == "Running":
|
||
iid = getattr(inst, "instance_id", None) or getattr(inst, "instanceId", None)
|
||
if iid in pending:
|
||
pending.remove(iid)
|
||
print(f"Instance boot successfully: {iid}")
|
||
if not pending:
|
||
print("Instances all boot successfully.")
|
||
return
|
||
if int(time.time() * 1000) - start_time > INSTANCE_STATUS_TOTAL_CHECK_TIME_ELAPSE_MILLISECOND:
|
||
print(f"Instances boot failed within {int(INSTANCE_STATUS_TOTAL_CHECK_TIME_ELAPSE_MILLISECOND/60000)} mins: {json.dumps(pending)}")
|
||
return
|
||
|
||
|
||
def list_all_instance_ids(ecs_client: EcsClient) -> list:
|
||
"""
|
||
列出当前地域所有实例 ID
|
||
参数:ecs_client
|
||
返回:List[str]
|
||
用途:用于批量清理现有实例
|
||
"""
|
||
region_id = os.getenv("ALIYUN_REGION_ID", "cn-shanghai")
|
||
ids = []
|
||
page = 1
|
||
while True:
|
||
req = ecs_models.DescribeInstancesRequest(region_id=region_id, page_size=100, page_number=page)
|
||
try:
|
||
resp = ecs_client.describe_instances(req)
|
||
except Exception:
|
||
break
|
||
body = resp.body if hasattr(resp, "body") else resp
|
||
items = body.instances.instance if hasattr(body, "instances") else []
|
||
if not items:
|
||
break
|
||
for it in items:
|
||
iid = getattr(it, "instance_id", None) or getattr(it, "instanceId", None)
|
||
if iid:
|
||
ids.append(iid)
|
||
page += 1
|
||
return ids
|
||
|
||
|
||
def wait_instances_status(ecs_client: EcsClient, instance_ids, target_status, timeout_seconds=600) -> bool:
|
||
"""
|
||
等待一组实例达到指定状态
|
||
参数:ecs_client、instance_ids、target_status、timeout_seconds
|
||
返回:bool
|
||
用途:用于重启后等待恢复 Running
|
||
"""
|
||
region_id = os.getenv("ALIYUN_REGION_ID", "cn-shanghai")
|
||
deadline = time.time() + timeout_seconds
|
||
pending = set(instance_ids)
|
||
while time.time() < deadline and pending:
|
||
time.sleep(3)
|
||
req = ecs_models.DescribeInstancesRequest(region_id=region_id, instance_ids=json.dumps(list(pending)))
|
||
try:
|
||
resp = ecs_client.describe_instances(req)
|
||
except Exception:
|
||
continue
|
||
body = resp.body if hasattr(resp, "body") else resp
|
||
items = body.instances.instance if hasattr(body, "instances") else []
|
||
for it in items:
|
||
st = getattr(it, "status", None)
|
||
iid = getattr(it, "instance_id", None) or getattr(it, "instanceId", None)
|
||
if st == target_status and iid in pending:
|
||
pending.remove(iid)
|
||
return not pending
|
||
|
||
|
||
def install_cloud_assistant_and_reboot(ecs_client: EcsClient, instance_ids) -> bool:
|
||
"""
|
||
为所有实例主动安装云助手并重启实例
|
||
参数:ecs_client、instance_ids
|
||
返回:bool
|
||
用途:确保云助手安装需要的重启操作已完成
|
||
"""
|
||
region_id = os.getenv("ALIYUN_REGION_ID", "cn-shanghai")
|
||
if not instance_ids:
|
||
return False
|
||
try:
|
||
install_req = ecs_models.InstallCloudAssistantRequest(region_id=region_id, instance_id=instance_ids)
|
||
ecs_client.install_cloud_assistant(install_req)
|
||
except Exception as e:
|
||
print(f"InstallCloudAssistant 调用失败:{e}")
|
||
return False
|
||
try:
|
||
reboot_req = ecs_models.RebootInstancesRequest(region_id=region_id, instance_id=instance_ids)
|
||
ecs_client.reboot_instances(reboot_req)
|
||
except Exception as e:
|
||
print(f"RebootInstances 调用失败:{e}")
|
||
return False
|
||
ok = wait_instances_status(ecs_client, instance_ids, target_status="Running", timeout_seconds=600)
|
||
return ok
|
||
|
||
|
||
def ensure_cloud_assistant_ready(ecs_client: EcsClient, instance_ids) -> bool:
|
||
"""
|
||
等待并确保 Cloud Assistant 就绪
|
||
参数:ecs_client、instance_ids
|
||
返回:bool
|
||
用途:避免实例刚启动时命令执行失败
|
||
"""
|
||
region_id = os.getenv("ALIYUN_REGION_ID", "cn-shanghai")
|
||
deadline = time.time() + int(os.getenv("CLOUD_ASSISTANT_READY_TIMEOUT_SECONDS", "600"))
|
||
while time.time() < deadline:
|
||
try:
|
||
req = ecs_models.DescribeCloudAssistantStatusRequest(region_id=region_id, instance_id=instance_ids)
|
||
resp = ecs_client.describe_cloud_assistant_status(req)
|
||
except Exception:
|
||
time.sleep(5)
|
||
continue
|
||
body = resp.body if hasattr(resp, "body") else resp
|
||
statuses = getattr(body, "instance_cloud_assistant_status", None)
|
||
if statuses is None and hasattr(body, "cloud_assistant"):
|
||
statuses = body.cloud_assistant.instance_cloud_assistant_status
|
||
ready_count = 0
|
||
for s in statuses or []:
|
||
st = getattr(s, "status", None) or getattr(s, "Status", None)
|
||
is_ready = (st is True) or (isinstance(st, str) and st.lower() in ("true", "enabled", "running"))
|
||
if is_ready:
|
||
ready_count += 1
|
||
if ready_count == len(instance_ids):
|
||
return True
|
||
time.sleep(5)
|
||
return False
|
||
|
||
|
||
def clear_all_instances(ecs_client: EcsClient):
|
||
"""
|
||
清空当前地域的所有 ECS 实例
|
||
参数:ecs_client
|
||
返回:None
|
||
用途:在批量创建前保持干净环境
|
||
"""
|
||
ids = list_all_instance_ids(ecs_client)
|
||
region_id = os.getenv("ALIYUN_REGION_ID", "cn-shanghai")
|
||
if not ids:
|
||
print("当前地域无实例,无需清理")
|
||
return
|
||
print(f"准备清理 {len(ids)} 台实例:{json.dumps(ids)}")
|
||
try:
|
||
stop_req = ecs_models.StopInstancesRequest(instance_id=ids, region_id=region_id)
|
||
ecs_client.stop_instances(stop_req)
|
||
except Exception as e:
|
||
print(f"停止实例失败:{e}")
|
||
wait_instances_status(ecs_client, ids, target_status="Stopped", timeout_seconds=600)
|
||
try:
|
||
del_req = ecs_models.DeleteInstancesRequest(instance_id=ids, force=True, region_id=region_id)
|
||
ecs_client.delete_instances(del_req)
|
||
except Exception:
|
||
for iid in ids:
|
||
try:
|
||
one = ecs_models.DeleteInstanceRequest(instance_id=iid, force=True, region_id=region_id)
|
||
ecs_client.delete_instance(one)
|
||
except Exception as e:
|
||
print(f"删除实例 {iid} 失败:{e}")
|
||
time.sleep(5)
|
||
left = list_all_instance_ids(ecs_client)
|
||
if not left:
|
||
print("实例已全部清理完毕")
|
||
else:
|
||
print(f"仍有实例未删除:{json.dumps(left)}")
|
||
|
||
|
||
def main():
|
||
"""
|
||
脚本入口:清理旧实例 → 创建新实例 → 安装云助手并重启 → 确认就绪
|
||
参数:无
|
||
返回:None(打印创建的实例 ID JSON)
|
||
用途:为后续下发云助手命令准备环境
|
||
"""
|
||
ecs_client = init_ecs_client()
|
||
if os.getenv("NO_CLEAR") != "1":
|
||
clear_all_instances(ecs_client)
|
||
resp = call_run_instances_api(ecs_client)
|
||
if resp is None:
|
||
return
|
||
body = resp.body if hasattr(resp, "body") else resp
|
||
instance_ids = body.instance_id_sets.instance_id_set
|
||
print(f"Success. Instance creation succeed. InstanceIds: {json.dumps(instance_ids)}")
|
||
call_to_describe_instances(ecs_client, instance_ids)
|
||
installed = install_cloud_assistant_and_reboot(ecs_client, instance_ids)
|
||
if not installed:
|
||
print("Cloud Assistant 安装或重启失败,终止")
|
||
return
|
||
ready = ensure_cloud_assistant_ready(ecs_client, instance_ids)
|
||
if not ready:
|
||
print("Cloud Assistant 未全部就绪,终止")
|
||
return
|
||
print(json.dumps(instance_ids))
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main() |