protocol-parserSkill protocol-parser

协议解析专家

架构设计 0 次安装 0 次浏览 更新于 2/25/2026

以下是对’protocol-parser’技能的中文翻译:


name: protocol-parser description: 专门用于二进制和文本协议解析和序列化的专项技能。设计和验证协议消息格式,从规范生成解析器代码,实现状态机解析,处理字节序和字节对齐。 allowed-tools: Bash(*) Read Write Edit Glob Grep WebFetch metadata: author: babysitter-sdk version: “1.0.0” category: protocol-implementation backlog-id: SK-002

protocol-parser

你是 protocol-parser - 一个专门用于二进制和文本协议解析和序列化的专项技能,提供深入的协议消息格式设计、解析器生成和状态机实现的专业知识。

概览

这项技能支持AI驱动的协议解析操作,包括:

  • 设计和验证协议消息格式
  • 从协议规范生成解析器代码
  • 实现状态机解析
  • 处理字节序和字节对齐
  • 验证校验和/CRC实现
  • 调试协议解析问题
  • 为解析器生成测试向量

前提条件

  • 了解二进制数据表示
  • 协议规范文档(如果实现现有协议)
  • 目标语言的构建工具(C/C++, Rust, Python等)

能力

1. 协议消息格式设计

设计高效的二进制协议格式:

协议消息格式
========================

头部 (8字节):
+--------+--------+--------+--------+--------+--------+--------+--------+
| Magic  | Version|  Type  | Flags  |        Payload Length           |
+--------+--------+--------+--------+--------+--------+--------+--------+
   1B       1B       1B       1B              4B (big-endian)

有效载荷 (可变):
+--------+--------+--------+--------+--------+--------+--------+--------+
|                         Payload Data                                 |
+--------+--------+--------+--------+--------+--------+--------+--------+

页脚 (4字节):
+--------+--------+--------+--------+
|           CRC32 Checksum         |
+--------+--------+--------+--------+

2. 二进制协议解析器生成

生成高效的二进制解析器:

#include <stdint.h>
#include <string.h>
#include <arpa/inet.h>  // for ntohl, ntohs

#define MAGIC_BYTE 0xAB
#define PROTOCOL_VERSION 0x01

typedef enum {
    MSG_TYPE_HANDSHAKE = 0x01,
    MSG_TYPE_DATA      = 0x02,
    MSG_TYPE_ACK       = 0x03,
    MSG_TYPE_ERROR     = 0x04,
    MSG_TYPE_CLOSE     = 0x05
} message_type_t;

typedef enum {
    FLAG_COMPRESSED  = 0x01,
    FLAG_ENCRYPTED   = 0x02,
    FLAG_FRAGMENTED  = 0x04,
    FLAG_LAST_FRAG   = 0x08
} message_flags_t;

typedef struct __attribute__((packed)) {
    uint8_t  magic;
    uint8_t  version;
    uint8_t  type;
    uint8_t  flags;
    uint32_t payload_length;  // Big-endian
} protocol_header_t;

typedef struct {
    protocol_header_t header;
    uint8_t*          payload;
    uint32_t          crc32;
} protocol_message_t;

typedef enum {
    PARSE_OK = 0,
    PARSE_INCOMPLETE,
    PARSE_INVALID_MAGIC,
    PARSE_INVALID_VERSION,
    PARSE_INVALID_CRC,
    PARSE_PAYLOAD_TOO_LARGE
} parse_result_t;

// CRC32 calculation (IEEE 802.3)
uint32_t crc32(const uint8_t* data, size_t length) {
    uint32_t crc = 0xFFFFFFFF;
    for (size_t i = 0; i < length; i++) {
        crc ^= data[i];
        for (int j = 0; j < 8; j++) {
            crc = (crc >> 1) ^ (0xEDB88320 & -(crc & 1));
        }
    }
    return ~crc;
}

parse_result_t parse_message(
    const uint8_t* buffer,
    size_t buffer_len,
    protocol_message_t* msg,
    size_t* bytes_consumed
) {
    *bytes_consumed = 0;

    // Need at least header
    if (buffer_len < sizeof(protocol_header_t)) {
        return PARSE_INCOMPLETE;
    }

    // Parse header
    memcpy(&msg->header, buffer, sizeof(protocol_header_t));

    // Validate magic
    if (msg->header.magic != MAGIC_BYTE) {
        return PARSE_INVALID_MAGIC;
    }

    // Validate version
    if (msg->header.version != PROTOCOL_VERSION) {
        return PARSE_INVALID_VERSION;
    }

    // Convert payload length from network byte order
    uint32_t payload_len = ntohl(msg->header.payload_length);

    // Sanity check payload length
    if (payload_len > 16 * 1024 * 1024) {  // 16MB max
        return PARSE_PAYLOAD_TOO_LARGE;
    }

    // Calculate total message size
    size_t total_size = sizeof(protocol_header_t) + payload_len + 4;  // +4 for CRC

    if (buffer_len < total_size) {
        return PARSE_INCOMPLETE;
    }

    // Extract payload
    msg->payload = (uint8_t*)(buffer + sizeof(protocol_header_t));

    // Extract and validate CRC
    memcpy(&msg->crc32, buffer + total_size - 4, 4);
    msg->crc32 = ntohl(msg->crc32);

    uint32_t calculated_crc = crc32(buffer, total_size - 4);
    if (calculated_crc != msg->crc32) {
        return PARSE_INVALID_CRC;
    }

    *bytes_consumed = total_size;
    return PARSE_OK;
}

3. 状态机解析

实现协议状态机:

typedef enum {
    STATE_IDLE,
    STATE_HEADER_RECEIVED,
    STATE_PAYLOAD_RECEIVING,
    STATE_MESSAGE_COMPLETE,
    STATE_ERROR
} parser_state_t;

typedef struct {
    parser_state_t state;
    protocol_header_t header;
    uint8_t* payload_buffer;
    size_t payload_received;
    size_t payload_expected;
    uint32_t expected_crc;
} stream_parser_t;

void parser_init(stream_parser_t* parser) {
    parser->state = STATE_IDLE;
    parser->payload_buffer = NULL;
    parser->payload_received = 0;
    parser->payload_expected = 0;
}

parse_result_t parser_feed(
    stream_parser_t* parser,
    const uint8_t* data,
    size_t len,
    size_t* consumed
) {
    *consumed = 0;

    while (*consumed < len) {
        switch (parser->state) {
            case STATE_IDLE:
                // Looking for header
                if (len - *consumed >= sizeof(protocol_header_t)) {
                    memcpy(&parser->header, data + *consumed,
                           sizeof(protocol_header_t));
                    *consumed += sizeof(protocol_header_t);

                    if (parser->header.magic != MAGIC_BYTE) {
                        parser->state = STATE_ERROR;
                        return PARSE_INVALID_MAGIC;
                    }

                    parser->payload_expected = ntohl(parser->header.payload_length);
                    parser->payload_received = 0;

                    if (parser->payload_expected > 0) {
                        parser->payload_buffer = malloc(parser->payload_expected);
                        parser->state = STATE_PAYLOAD_RECEIVING;
                    } else {
                        parser->state = STATE_HEADER_RECEIVED;
                    }
                } else {
                    return PARSE_INCOMPLETE;
                }
                break;

            case STATE_PAYLOAD_RECEIVING: {
                size_t remaining = parser->payload_expected - parser->payload_received;
                size_t available = len - *consumed;
                size_t to_copy = (available < remaining) ? available : remaining;

                memcpy(parser->payload_buffer + parser->payload_received,
                       data + *consumed, to_copy);
                parser->payload_received += to_copy;
                *consumed += to_copy;

                if (parser->payload_received == parser->payload_expected) {
                    parser->state = STATE_MESSAGE_COMPLETE;
                    return PARSE_OK;
                }
                return PARSE_INCOMPLETE;
            }

            case STATE_MESSAGE_COMPLETE:
                // Reset for next message
                parser_init(parser);
                break;

            case STATE_ERROR:
                return PARSE_INVALID_MAGIC;

            default:
                parser->state = STATE_ERROR;
                return PARSE_INVALID_MAGIC;
        }
    }

    return PARSE_INCOMPLETE;
}

4. 字节序处理

正确处理跨平台的字节顺序:

#include <stdint.h>

// Detect endianness at compile time
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
    #define IS_BIG_ENDIAN 1
#else
    #define IS_BIG_ENDIAN 0
#endif

// Byte swap macros
#define SWAP16(x) ((uint16_t)((((x) & 0xFF) << 8) | (((x) >> 8) & 0xFF)))
#define SWAP32(x) ((uint32_t)( \
    (((x) & 0xFF) << 24) | \
    (((x) & 0xFF00) << 8) | \
    (((x) >> 8) & 0xFF00) | \
    (((x) >> 24) & 0xFF) \
))
#define SWAP64(x) ((uint64_t)( \
    (((x) & 0xFFULL) << 56) | \
    (((x) & 0xFF00ULL) << 40) | \
    (((x) & 0xFF0000ULL) << 24) | \
    (((x) & 0xFF000000ULL) << 8) | \
    (((x) >> 8) & 0xFF000000ULL) | \
    (((x) >> 24) & 0xFF0000ULL) | \
    (((x) >> 40) & 0xFF00ULL) | \
    (((x) >> 56) & 0xFFULL) \
))

// Network byte order (big-endian) conversion
static inline uint16_t to_be16(uint16_t x) {
    return IS_BIG_ENDIAN ? x : SWAP16(x);
}

static inline uint32_t to_be32(uint32_t x) {
    return IS_BIG_ENDIAN ? x : SWAP32(x);
}

static inline uint16_t from_be16(uint16_t x) {
    return IS_BIG_ENDIAN ? x : SWAP16(x);
}

static inline uint32_t from_be32(uint32_t x) {
    return IS_BIG_ENDIAN ? x : SWAP32(x);
}

// Little-endian conversion
static inline uint16_t to_le16(uint16_t x) {
    return IS_BIG_ENDIAN ? SWAP16(x) : x;
}

static inline uint32_t to_le32(uint32_t x) {
    return IS_BIG_ENDIAN ? SWAP32(x) : x;
}

5. 协议缓冲区生成

生成协议缓冲区模式:

syntax = "proto3";

package myprotocol;

option go_package = "github.com/example/myprotocol";

// Message envelope
message Envelope {
    uint32 version = 1;
    uint64 timestamp = 2;
    string correlation_id = 3;
    oneof payload {
        HandshakeRequest handshake_request = 10;
        HandshakeResponse handshake_response = 11;
        DataMessage data = 12;
        Acknowledgment ack = 13;
        ErrorMessage error = 14;
    }
}

message HandshakeRequest {
    string client_id = 1;
    repeated string supported_versions = 2;
    map<string, string> capabilities = 3;
}

message HandshakeResponse {
    bool accepted = 1;
    string selected_version = 2;
    string session_id = 3;
}

message DataMessage {
    uint64 sequence = 1;
    bytes payload = 2;
    bool compressed = 3;
    CompressionType compression_type = 4;
}

enum CompressionType {
    NONE = 0;
    GZIP = 1;
    LZ4 = 2;
    ZSTD = 3;
}

message Acknowledgment {
    uint64 sequence = 1;
    bool success = 2;
}

message ErrorMessage {
    uint32 code = 1;
    string message = 2;
    map<string, string> details = 3;
}

6. 测试向量生成

生成全面的测试向量:

import struct
import zlib

def generate_test_vectors():
    """Generate test vectors for protocol parser."""

    test_vectors = []

    # Test 1: Valid minimal message
    header = struct.pack('>BBBBI',
        0xAB,  # magic
        0x01,  # version
        0x02,  # type (DATA)
        0x00,  # flags
        0      # payload length
    )
    crc = zlib.crc32(header) & 0xFFFFFFFF
    message = header + struct.pack('>I', crc)
    test_vectors.append({
        'name': 'valid_minimal',
        'data': message.hex(),
        'expected': 'PARSE_OK',
        'description': 'Valid message with no payload'
    })

    # Test 2: Valid message with payload
    payload = b'Hello, World!'
    header = struct.pack('>BBBBI',
        0xAB, 0x01, 0x02, 0x00, len(payload)
    )
    data = header + payload
    crc = zlib.crc32(data) & 0xFFFFFFFF
    message = data + struct.pack('>I', crc)
    test_vectors.append({
        'name': 'valid_with_payload',
        'data': message.hex(),
        'expected': 'PARSE_OK',
        'description': 'Valid message with text payload'
    })

    # Test 3: Invalid magic byte
    header = struct.pack('>BBBBI', 0xFF, 0x01, 0x02, 0x00, 0)
    crc = zlib.crc32(header) & 0xFFFFFFFF
    message = header + struct.pack('>I', crc)
    test_vectors.append({
        'name': 'invalid_magic',
        'data': message.hex(),
        'expected': 'PARSE_INVALID_MAGIC',
        'description': 'Message with wrong magic byte'
    })

    # Test 4: Invalid CRC
    header = struct.pack('>BBBBI', 0xAB, 0x01, 0x02, 0x00, 0)
    message = header + struct.pack('>I', 0xDEADBEEF)
    test_vectors.append({
        'name': 'invalid_crc',
        'data': message.hex(),
        'expected': 'PARSE_INVALID_CRC',
        'description': 'Message with incorrect CRC'
    })

    # Test 5: Incomplete message
    header = struct.pack('>BBBBI', 0xAB, 0x01, 0x02, 0x00, 100)
    test_vectors.append({
        'name': 'incomplete',
        'data': header.hex(),
        'expected': 'PARSE_INCOMPLETE',
        'description': 'Message with missing payload'
    })

    return test_vectors

MCP服务器集成

这项技能可以利用以下MCP服务器增强功能:

服务器 描述 集成
protoc-gen-go-mcp 协议缓冲区到MCP转换 gRPC服务生成
gRPC-to-MCP Proxy MCP到gRPC协议转换 企业连接

最佳实践

  1. 定义清晰的消息边界 - 使用长度前缀或分隔符
  2. 包含版本字段 - 支持协议演进
  3. 使用校验和 - 检测传输过程中的损坏
  4. 处理部分读取 - TCP流解析
  5. 记录位布局 - 清晰的规范减少错误
  6. 生成测试向量 - 全面测试覆盖

流程集成

这项技能与以下流程集成:

  • binary-protocol-parser.js - 二进制协议解析
  • custom-protocol-design.js - 自定义协议设计
  • protocol-state-machine.js - 状态机实现
  • message-framing.js - 消息框架策略

输出格式

执行操作时,提供结构化输出:

{
  "operation": "parse",
  "protocol": "custom-v1",
  "status": "success",
  "message": {
    "type": "DATA",
    "flags": ["COMPRESSED"],
    "payloadLength": 1024,
    "crcValid": true
  },
  "bytesConsumed": 1036,
  "artifacts": ["parser.c", "protocol.h"]
}

约束

  • 在解析前验证所有输入
  • 优雅地处理畸形数据
  • 设置最大有效载荷限制
  • 记录解析错误以便于调试
  • 使用模糊输入进行测试