name: simulation-experiment-designer description: 用于高效场景分析和优化的仿真实验设计技能。 allowed-tools: Bash(*) Read Write Edit Glob Grep WebFetch metadata: author: babysitter-sdk version: “1.0.0” category: simulation backlog-id: SK-IE-008
仿真实验设计师
您是仿真实验设计师 - 一个专门用于高效设计和分析仿真实验的专业技能。
概述
此技能支持AI驱动的仿真实验,包括:
- 仿真的因子实验设计
- 拉丁超立方抽样
- 方差缩减技术(公共随机数、对偶变量)
- 排序与选择程序
- 元模型拟合(响应曲面)
- OptQuest式仿真优化
- 带统计检验的场景比较
先决条件
- Python 3.8+ 及 pyDOE2、SALib、scipy
- SimPy 或其他离散事件仿真框架
- 统计分析库
能力
1. 因子实验设计
import pyDOE2 as doe
import numpy as np
def create_factorial_design(factors, levels=2):
"""
创建全因子或部分因子设计
factors: 因子字典 {名称: (下限, 上限)}
"""
n_factors = len(factors)
factor_names = list(factors.keys())
if levels == 2:
# 全因子设计
design_coded = doe.ff2n(n_factors)
# 多因子时的部分因子设计
if n_factors > 5:
design_coded = doe.fracfact(
' '.join(['a', 'b', 'c', 'd', 'e'][:n_factors])
)
else:
# 通用全因子设计
design_coded = doe.fullfact([levels] * n_factors)
# 转换为实际值
design = np.zeros_like(design_coded, dtype=float)
for i, (name, (low, high)) in enumerate(factors.items()):
if levels == 2:
design[:, i] = np.where(design_coded[:, i] == -1, low, high)
else:
step = (high - low) / (levels - 1)
design[:, i] = low + design_coded[:, i] * step
return {
"design_matrix": design,
"factor_names": factor_names,
"num_runs": len(design),
"coded_design": design_coded
}
2. 拉丁超立方抽样
from scipy.stats import qmc
def latin_hypercube_design(factors, n_samples, seed=None):
"""
为仿真创建拉丁超立方样本
factors: 因子字典 {名称: (下限, 上限)}
"""
n_factors = len(factors)
factor_names = list(factors.keys())
bounds = list(factors.values())
# 创建LHS采样器
sampler = qmc.LatinHypercube(d=n_factors, seed=seed)
sample = sampler.random(n=n_samples)
# 缩放到因子范围
l_bounds = [b[0] for b in bounds]
u_bounds = [b[1] for b in bounds]
design = qmc.scale(sample, l_bounds, u_bounds)
# 质量指标
discrepancy = qmc.discrepancy(sample)
return {
"design_matrix": design,
"factor_names": factor_names,
"num_samples": n_samples,
"discrepancy": discrepancy,
"space_filling": "latin_hypercube"
}
3. 方差缩减 - 公共随机数
import numpy as np
class CommonRandomNumbers:
"""
为比较系统配置实现CRN
"""
def __init__(self, seed):
self.base_seed = seed
self.streams = {}
def get_stream(self, stream_id, replication):
"""获取确定性随机流"""
key = (stream_id, replication)
if key not in self.streams:
seed = hash(key) % (2**32)
self.streams[key] = np.random.RandomState(seed)
return self.streams[key]
def compare_configurations(self, sim_func, configs, n_reps):
"""
使用CRN比较配置
"""
results = {c: [] for c in configs}
for rep in range(n_reps):
for config in configs:
# 本次重复中每个配置使用相同的随机流
rng = self.get_stream('main', rep)
result = sim_func(config, rng)
results[config].append(result)
# 配对比较
paired_diffs = []
for i in range(n_reps):
diff = results[configs[0]][i] - results[configs[1]][i]
paired_diffs.append(diff)
from scipy import stats
t_stat, p_value = stats.ttest_1samp(paired_diffs, 0)
return {
"config_means": {c: np.mean(results[c]) for c in configs},
"paired_difference_mean": np.mean(paired_diffs),
"variance_reduction": 1 - np.var(paired_diffs) / (
np.var(results[configs[0]]) + np.var(results[configs[1]])
),
"t_statistic": t_stat,
"p_value": p_value
}
4. 排序与选择
from scipy import stats
def rinott_procedure(configurations, sim_func, alpha=0.05, delta=1.0):
"""
用于选择最佳的Rinott两阶段程序
delta: 无差异区参数
"""
k = len(configurations)
n0 = 20 # 初始样本量
# 第一阶段:初始抽样
stage1_results = {c: [] for c in configurations}
for config in configurations:
for _ in range(n0):
result = sim_func(config)
stage1_results[config].append(result)
# 计算样本方差
variances = {c: np.var(stage1_results[c], ddof=1)
for c in configurations}
# Rinott常数(简化版 - 实践中使用查表)
h = stats.t.ppf(1 - alpha/(k-1), n0-1) * np.sqrt(2)
# 第二阶段样本量
stage2_sizes = {c: max(n0, int(np.ceil((h * np.sqrt(variances[c]) / delta)**2)))
for c in configurations}
# 第二阶段:额外抽样
final_results = {c: list(stage1_results[c]) for c in configurations}
for config in configurations:
additional = stage2_sizes[config] - n0
for _ in range(additional):
final_results[config].append(sim_func(config))
# 选择最佳(假设最大化)
means = {c: np.mean(final_results[c]) for c in configurations}
best = max(means, key=means.get)
return {
"best_configuration": best,
"means": means,
"stage2_sample_sizes": stage2_sizes,
"total_samples": sum(stage2_sizes.values()),
"confidence": 1 - alpha
}
5. 响应曲面元模型
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
import numpy as np
def fit_response_surface(design_matrix, responses, degree=2):
"""
拟合多项式响应曲面模型
"""
# 创建多项式特征
poly = PolynomialFeatures(degree=degree, include_bias=False)
X_poly = poly.fit_transform(design_matrix)
# 拟合回归
model = LinearRegression()
model.fit(X_poly, responses)
# 预测和残差
predictions = model.predict(X_poly)
residuals = responses - predictions
# R平方
ss_res = np.sum(residuals**2)
ss_tot = np.sum((responses - np.mean(responses))**2)
r_squared = 1 - ss_res / ss_tot
# 方差分析
n = len(responses)
p = X_poly.shape[1]
ms_res = ss_res / (n - p - 1)
ms_reg = (ss_tot - ss_res) / p
f_stat = ms_reg / ms_res
return {
"model": model,
"poly_transformer": poly,
"r_squared": r_squared,
"adjusted_r_squared": 1 - (1 - r_squared) * (n - 1) / (n - p - 1),
"coefficients": model.coef_.tolist(),
"intercept": model.intercept_,
"f_statistic": f_stat,
"feature_names": poly.get_feature_names_out()
}
def optimize_response_surface(model, poly, bounds, maximize=True):
"""
寻找最优因子设置
"""
from scipy.optimize import minimize
def objective(x):
x_poly = poly.transform(x.reshape(1, -1))
pred = model.predict(x_poly)[0]
return -pred if maximize else pred
# 多起点优化
best_result = None
for _ in range(10):
x0 = [np.random.uniform(b[0], b[1]) for b in bounds]
result = minimize(objective, x0, bounds=bounds, method='L-BFGS-B')
if best_result is None or result.fun < best_result.fun:
best_result = result
return {
"optimal_factors": best_result.x.tolist(),
"predicted_response": -best_result.fun if maximize else best_result.fun,
"optimization_success": best_result.success
}
6. 敏感性分析
from SALib.sample import saltelli
from SALib.analyze import sobol
def global_sensitivity_analysis(problem_def, sim_func, n_samples=1024):
"""
Sobol敏感性分析
problem_def: {'num_vars': n, 'names': [...], 'bounds': [[lo,hi],...]}
"""
# 生成样本
param_values = saltelli.sample(problem_def, n_samples)
# 运行仿真
Y = np.array([sim_func(params) for params in param_values])
# 分析
Si = sobol.analyze(problem_def, Y)
return {
"first_order_indices": dict(zip(problem_def['names'], Si['S1'])),
"total_order_indices": dict(zip(problem_def['names'], Si['ST'])),
"second_order_indices": Si.get('S2', None),
"confidence_intervals": {
"S1_conf": dict(zip(problem_def['names'], Si['S1_conf'])),
"ST_conf": dict(zip(problem_def['names'], Si['ST_conf']))
}
}
流程集成
此技能与以下流程集成:
discrete-event-simulation-modeling.jsdesign-of-experiments-execution.jscapacity-planning-analysis.js
输出格式
{
"experiment_design": {
"type": "latin_hypercube",
"factors": ["arrival_rate", "service_rate", "num_servers"],
"num_runs": 50
},
"analysis": {
"metamodel_r_squared": 0.94,
"significant_factors": ["arrival_rate", "num_servers"],
"optimal_settings": {
"arrival_rate": 8.5,
"service_rate": 4.0,
"num_servers": 3
},
"predicted_performance": 0.85
},
"sensitivity": {
"most_influential": "arrival_rate",
"sobol_indices": {"arrival_rate": 0.45, "service_rate": 0.25}
}
}
工具/库
| 库 | 描述 | 用例 |
|---|---|---|
| pyDOE2 | 实验设计生成 | 因子设计 |
| SALib | 敏感性分析 | 全局敏感性分析 |
| scipy.stats | 统计 | 假设检验 |
| sklearn | 机器学习 | 元建模 |
最佳实践
- 比较时使用CRN - 减少所需重复次数
- 首先筛选因子 - 对多因子使用筛选设计
- 验证元模型 - 检查残差和R平方
- 记录所有假设 - 实验条件
- 考虑实际显著性 - 不仅仅是统计显著性
- 为失败做计划 - 处理不可行运行
约束
- 报告所有设计决策
- 记录方差缩减效果
- 验证元模型预测
- 使用适当的样本量