tensorflow-训练器

概述

具备回调函数、分布式策略、TensorBoard集成以及生产就绪模型导出能力的TensorFlow/Keras模型训练技能。

能力

使用回调函数的Keras模型训练
使用tf.GradientTape的自定义训练循环
分布式策略配置（MirroredStrategy, MultiWorkerMirroredStrategy, TPUStrategy）
TensorBoard日志记录与可视化
用于TF Serving的SavedModel导出
用于边缘部署的TFLite转换
混合精度训练

目标流程

带有实验跟踪的模型训练流水线
分布式训练编排
模型部署流水线

工具与库

TensorFlow
Keras
TensorBoard
TensorFlow Serving
TensorFlow Lite

输入模式

{
  "type": "object",
  "required": ["modelConfig", "dataConfig", "trainingConfig"],
  "properties": {
    "modelConfig": {
      "type": "object",
      "properties": {
        "modelPath": { "type": "string" },
        "modelType": { "type": "string", "enum": ["sequential", "functional", "subclassed"] }
      }
    },
    "dataConfig": {
      "type": "object",
      "properties": {
        "trainPath": { "type": "string" },
        "valPath": { "type": "string" },
        "batchSize": { "type": "integer" },
        "prefetch": { "type": "boolean" }
      }
    },
    "trainingConfig": {
      "type": "object",
      "properties": {
        "epochs": { "type": "integer" },
        "optimizer": { "type": "string" },
        "learningRate": { "type": "number" },
        "loss": { "type": "string" },
        "metrics": { "type": "array", "items": { "type": "string" } },
        "callbacks": { "type": "array", "items": { "type": "string" } },
        "distributionStrategy": { "type": "string" }
      }
    },
    "exportConfig": {
      "type": "object",
      "properties": {
        "savedModelPath": { "type": "string" },
        "tflitePath": { "type": "string" },
        "servingSignatures": { "type": "array", "items": { "type": "string" } }
      }
    }
  }
}

输出模式

{
  "type": "object",
  "required": ["status", "metrics", "modelPath"],
  "properties": {
    "status": {
      "type": "string",
      "enum": ["success", "error", "early_stopped"]
    },
    "metrics": {
      "type": "object",
      "properties": {
        "loss": { "type": "number" },
        "valLoss": { "type": "number" },
        "accuracy": { "type": "number" },
        "valAccuracy": { "type": "number" },
        "epochsTrained": { "type": "integer" }
      }
    },
    "modelPath": {
      "type": "string"
    },
    "savedModelPath": {
      "type": "string"
    },
    "tensorboardLogDir": {
      "type": "string"
    },
    "history": {
      "type": "object",
      "description": "包含每个epoch所有指标的完整训练历史记录"
    }
  }
}

使用示例

{
  kind: 'skill',
  title: '训练TensorFlow模型',
  skill: {
    name: 'tensorflow-trainer',
    context: {
      modelConfig: {
        modelPath: 'models/cnn_model.py',
        modelType: 'functional'
      },
      dataConfig: {
        trainPath: 'data/train',
        valPath: 'data/val',
        batchSize: 64,
        prefetch: true
      },
      trainingConfig: {
        epochs: 50,
        optimizer: 'adam',
        learningRate: 0.001,
        loss: 'sparse_categorical_crossentropy',
        metrics: ['accuracy'],
        callbacks: ['early_stopping', 'model_checkpoint', 'tensorboard']
      }
    }
  }
}