name: doe-designer description: 实验设计与分析技能,用于因子实验和响应曲面实验的规划与分析。 allowed-tools: Bash(*) 读取 写入 编辑 全局搜索 查找 Web获取 metadata: author: babysitter-sdk version: “1.0.0” category: 质量工程 backlog-id: SK-IE-017
实验设计专家
您是 实验设计专家 - 一个专门用于设计、执行和分析过程优化实验的专业技能。
概述
此技能支持AI驱动的实验设计,包括:
- 全因子设计生成
- 带混杂分析的分数因子设计
- 响应曲面方法(中心复合设计、Box-Behnken设计)
- 筛选设计(Plackett-Burman、确定性筛选)
- 实验结果方差分析
- 主效应与交互作用图
- 等高线图与曲面图
- 最优因子水平确定
- 确认试验规划
功能
1. 全因子设计
import pyDOE2 as doe
import numpy as np
import pandas as pd
def full_factorial_design(factors, levels=2):
"""
生成全因子设计
factors: 因子字典 {名称: (低水平, 高水平)} 用于2水平
或 {名称: [水平1, 水平2, ...]} 用于多水平
"""
factor_names = list(factors.keys())
n_factors = len(factors)
if levels == 2:
# 2^k 设计
design_coded = doe.ff2n(n_factors)
n_runs = 2 ** n_factors
# 转换为实际值
design_actual = np.zeros_like(design_coded)
for i, (name, bounds) in enumerate(factors.items()):
low, high = bounds
design_actual[:, i] = np.where(design_coded[:, i] == -1, low, high)
else:
# 通用全因子设计
level_counts = [levels] * n_factors
design_coded = doe.fullfact(level_counts)
n_runs = levels ** n_factors
design_actual = np.zeros_like(design_coded)
for i, (name, levels_list) in enumerate(factors.items()):
for j, level in enumerate(levels_list):
design_actual[design_coded[:, i] == j, i] = level
df = pd.DataFrame(design_actual, columns=factor_names)
df['试验序号'] = range(1, n_runs + 1)
df['标准顺序'] = df['试验序号']
# 随机化
df['运行顺序'] = np.random.permutation(n_runs) + 1
df = df.sort_values('运行顺序').reset_index(drop=True)
return {
"设计矩阵": df,
"编码矩阵": design_coded,
"试验次数": n_runs,
"因子数量": n_factors,
"设计类型": f"{levels}^{n_factors} 全因子设计",
"分辨率": "完全"
}
2. 分数因子设计
def fractional_factorial_design(factors, resolution='IV'):
"""
生成分数因子设计
resolution: 'III', 'IV', 或 'V'
"""
n_factors = len(factors)
factor_names = list(factors.keys())
# 常见分数因子生成器
generators = {
3: {'III': 'a b ab'}, # 2^(3-1)
4: {'IV': 'a b c abc'}, # 2^(4-1)
5: {'V': 'a b c d abcd', 'III': 'a b ab c ac'}, # 2^(5-1) 或 2^(5-2)
6: {'IV': 'a b c d ab cd', 'III': 'a b ab c ac bc'},
7: {'IV': 'a b c d ab ac bc', 'III': 'a b ab c ac d ad'}
}
if n_factors in generators and resolution in generators[n_factors]:
gen = generators[n_factors][resolution]
design_coded = doe.fracfact(gen)
else:
# 默认使用分辨率IV(如果可用)
design_coded = doe.fracfact(' '.join(['abcdefghij'[:n_factors]]))
n_runs = len(design_coded)
# 转换为实际值
design_actual = np.zeros_like(design_coded)
for i, (name, bounds) in enumerate(factors.items()):
low, high = bounds
design_actual[:, i] = np.where(design_coded[:, i] == -1, low, high)
df = pd.DataFrame(design_actual, columns=factor_names)
# 分析混杂
confounding = analyze_confounding(n_factors, resolution)
return {
"设计矩阵": df,
"试验次数": n_runs,
"分辨率": resolution,
"混杂模式": confounding,
"设计类型": f"2^({n_factors}-p) 分辨率 {resolution}"
}
def analyze_confounding(n_factors, resolution):
"""按分辨率描述混杂模式"""
patterns = {
'III': "主效应与二因子交互作用混杂",
'IV': "主效应清晰;二因子交互作用相互混杂",
'V': "主效应和二因子交互作用清晰;三因子交互作用混杂"
}
return patterns.get(resolution, "未知混杂模式")
3. 响应曲面设计
def central_composite_design(factors, alpha='rotatable', center_points=5):
"""
生成中心复合设计(CCD)
alpha: 'rotatable', 'orthogonal', 或数值
"""
n_factors = len(factors)
factor_names = list(factors.keys())
# 生成CCD
design_coded = doe.ccdesign(n_factors, center=(0, center_points), alpha=alpha)
n_runs = len(design_coded)
# 转换为实际值
design_actual = np.zeros_like(design_coded)
for i, (name, bounds) in enumerate(factors.items()):
low, high = bounds
center = (low + high) / 2
half_range = (high - low) / 2
design_actual[:, i] = center + design_coded[:, i] * half_range
df = pd.DataFrame(design_actual, columns=factor_names)
return {
"设计矩阵": df,
"编码矩阵": design_coded,
"试验次数": n_runs,
"设计类型": "中心复合设计",
"alpha": alpha,
"中心点": center_points
}
def box_behnken_design(factors, center_points=3):
"""
生成Box-Behnken设计
适用于3-4个因子,避免极端角点
"""
n_factors = len(factors)
factor_names = list(factors.keys())
design_coded = doe.bbdesign(n_factors, center=center_points)
n_runs = len(design_coded)
# 转换为实际值
design_actual = np.zeros_like(design_coded)
for i, (name, bounds) in enumerate(factors.items()):
low, high = bounds
center = (low + high) / 2
half_range = (high - low) / 2
design_actual[:, i] = center + design_coded[:, i] * half_range
df = pd.DataFrame(design_actual, columns=factor_names)
return {
"设计矩阵": df,
"试验次数": n_runs,
"设计类型": "Box-Behnken设计",
"中心点": center_points,
"优势": "无角点 - 避免极端条件"
}
4. 方差分析
import statsmodels.api as sm
from statsmodels.formula.api import ols
def analyze_factorial_experiment(data, response_col, factor_cols):
"""
对因子实验进行方差分析
"""
# 构建包含主效应和交互作用的公式
main_effects = ' + '.join(factor_cols)
interactions = ' + '.join([f'{a}:{b}' for i, a in enumerate(factor_cols)
for b in factor_cols[i+1:]])
formula = f'{response_col} ~ {main_effects} + {interactions}'
model = ols(formula, data=data).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
# 效应估计
effects = {}
for factor in factor_cols:
high_mean = data[data[factor] == data[factor].max()][response_col].mean()
low_mean = data[data[factor] == data[factor].min()][response_col].mean()
effects[factor] = high_mean - low_mean
return {
"方差分析表": anova_table.to_dict(),
"决定系数": model.rsquared,
"调整决定系数": model.rsquared_adj,
"效应": effects,
"显著因子": [f for f in factor_cols
if anova_table.loc[f, 'PR(>F)'] < 0.05],
"模型摘要": model.summary().as_text()
}
5. 响应曲面分析
def fit_response_surface(data, response_col, factor_cols):
"""
拟合二阶响应曲面模型
"""
# 构建二次公式
linear = ' + '.join(factor_cols)
quadratic = ' + '.join([f'I({f}**2)' for f in factor_cols])
interactions = ' + '.join([f'{a}:{b}' for i, a in enumerate(factor_cols)
for b in factor_cols[i+1:]])
formula = f'{response_col} ~ {linear} + {quadratic} + {interactions}'
model = ols(formula, data=data).fit()
# 提取系数用于优化
coeffs = model.params
return {
"模型": model,
"决定系数": model.rsquared,
"系数": coeffs.to_dict(),
"显著项": [t for t in model.pvalues.index
if model.pvalues[t] < 0.05],
"公式": formula
}
def find_optimal_conditions(model, factor_cols, bounds, maximize=True):
"""
使用响应曲面寻找最优因子设置
"""
from scipy.optimize import minimize
def predict(x):
data = pd.DataFrame([dict(zip(factor_cols, x))])
pred = model.predict(data)[0]
return -pred if maximize else pred
# 多次起始点进行全局优化
best_result = None
for _ in range(20):
x0 = [np.random.uniform(b[0], b[1]) for b in bounds]
result = minimize(predict, x0, bounds=bounds, method='L-BFGS-B')
if best_result is None or result.fun < best_result.fun:
best_result = result
optimal = dict(zip(factor_cols, best_result.x))
optimal_response = -best_result.fun if maximize else best_result.fun
return {
"最优设置": optimal,
"预测响应": optimal_response,
"优化成功": best_result.success
}
6. 确认试验规划
def plan_confirmation_runs(optimal_settings, model, n_runs=5, alpha=0.05):
"""
在最优设置下规划确认试验
"""
from scipy import stats
# 在最优点预测
data = pd.DataFrame([optimal_settings])
predicted = model.predict(data)[0]
# 预测区间
pred_se = np.sqrt(model.mse_resid) # 简化
t_val = stats.t.ppf(1 - alpha/2, model.df_resid)
pi_lower = predicted - t_val * pred_se * np.sqrt(1 + 1/len(model.model.data.orig_endog))
pi_upper = predicted + t_val * pred_se * np.sqrt(1 + 1/len(model.model.data.orig_endog))
return {
"最优设置": optimal_settings,
"预测响应": predicted,
"预测区间": {
"下限": pi_lower,
"上限": pi_upper,
"置信度": 1 - alpha
},
"确认试验次数": n_runs,
"接受标准": f"{n_runs}次试验的均值应落在[{pi_lower:.3f}, {pi_upper:.3f}]范围内"
}
流程集成
此技能与以下流程集成:
design-of-experiments-execution.jsroot-cause-analysis-investigation.jsstatistical-process-control-implementation.js
输出格式
{
"设计类型": "2^4 全因子设计",
"因子": ["温度", "压力", "时间", "催化剂"],
"试验次数": 16,
"分析": {
"显著因子": ["温度", "压力"],
"显著交互作用": ["温度:压力"],
"决定系数": 0.94
},
"最优设置": {
"温度": 180,
"压力": 2.5,
"时间": 60,
"催化剂": 0.5
},
"预测响应": 95.3,
"确认计划": {
"试验次数": 5,
"预测区间": [93.1, 97.5]
}
}
最佳实践
- 从筛选开始 - 对多个因子使用Plackett-Burman设计
- 选择适当分辨率 - 主效应至少需要分辨率IV
- 包含中心点 - 检测曲率
- 随机化运行顺序 - 减少系统偏差
- 重复试验 - 估计误差用于显著性检验
- 确认结果 - 始终运行确认实验
约束
- 记录所有实验条件
- 控制干扰因子
- 严格按照计划执行设计
- 报告实际和统计显著性