合成优化 synthesis-optimization

FPGA合成优化技能,专注于Vivado和Quartus工具,包括综合报告分析、属性应用、资源推断控制和结果质量改进。

嵌入式软件 0 次安装 0 次浏览 更新于 2/25/2026

合成优化技能

针对Vivado和Quartus工具的FPGA合成优化专家技能。提供深入的专业知识,包括合成报告分析、属性应用、资源推断控制和QoR(结果质量)改进。

概览

合成优化技能使得FPGA设计的综合优化全面实现,支持:

  • 综合报告分析和资源利用审查
  • 综合属性(保持、max_fanout、ram_style等)
  • DSP和BRAM推断指导
  • FSM编码优化(one-hot、二进制、Gray)
  • 重定时和寄存器平衡
  • 逻辑优化策略
  • 高扇出网减少
  • 多供应商综合流程

能力

1. 综合报告分析

解析和分析综合报告:

# Vivado综合报告分析
report_utilization -hierarchical
report_utilization -cells [get_cells -hier -filter {IS_PRIMITIVE}]
report_timing_summary -setup -hold

# 资源利用分解
report_utilization -format csv -file utilization.csv

# 检查特定资源类型
report_utilization -cells [get_cells -hier -filter {REF_NAME =~ DSP*}]
report_utilization -cells [get_cells -hier -filter {REF_NAME =~ RAM*}]

2. 综合属性

应用Xilinx/Vivado综合属性:

// 保持层次结构以进行调试
(* KEEP_HIERARCHY = "yes" *)
module critical_path_module (
  // ...
);

// 防止寄存器优化
(* DONT_TOUCH = "yes" *) logic debug_reg;

// 控制寄存器复制以满足时序
(* MAX_FANOUT = 50 *) logic high_fanout_signal;

// 强制特定实现
(* KEEP = "true" *) logic preserved_signal;

// RAM样式控制
(* RAM_STYLE = "block" *) logic [7:0] large_mem [1024];
(* RAM_STYLE = "distributed" *) logic [7:0] small_mem [16];
(* RAM_STYLE = "registers" *) logic [7:0] tiny_mem [4];
(* RAM_STYLE = "ultra" *) logic [7:0] uram_mem [4096];  // UltraRAM

// ROM样式控制
(* ROM_STYLE = "block" *) logic [7:0] lookup_table [256];

// 使用DSP进行算术运算
(* USE_DSP = "yes" *) logic [47:0] mult_result;
(* USE_DSP = "no" *) logic [15:0] small_mult;  // 使用fabric

// Shreg提取控制
(* SHREG_EXTRACT = "yes" *) logic [15:0] shift_reg;
(* SRL_STYLE = "register" *) logic [7:0] no_srl_shift;

// CDC同步器的异步寄存器
(* ASYNC_REG = "TRUE" *) logic [1:0] sync_reg;

// FSM编码
(* FSM_ENCODING = "one_hot" *) enum logic [3:0] {
  IDLE, INIT, RUN, DONE
} state;

(* FSM_ENCODING = "sequential" *)  // 二进制编码
(* FSM_ENCODING = "gray" *)        // Gray编码
(* FSM_ENCODING = "johnson" *)     // Johnson编码
(* FSM_ENCODING = "auto" *)        // 工具决定

3. Intel/Quartus属性

应用Quartus综合属性:

// RAM样式
(* ramstyle = "M20K" *) logic [7:0] intel_bram [1024];
(* ramstyle = "MLAB" *) logic [7:0] intel_lutram [32];
(* ramstyle = "logic" *) logic [7:0] intel_ff_mem [8];
(* ramstyle = "no_rw_check" *) logic [7:0] dual_port_mem [256];

// 保留信号
(* preserve *) logic keep_signal;
(* noprune *) logic unused_but_keep;

// DSP使用
(* multstyle = "dsp" *) logic [31:0] use_dsp;
(* multstyle = "logic" *) logic [7:0] use_fabric;

// 同步器
(* altera_attribute = "-name SYNCHRONIZER_IDENTIFICATION FORCED" *)
logic [1:0] altera_sync;

// 最大扇出
(* maxfan = 32 *) logic fanout_limited;

4. DSP推断优化

指导DSP48/DSP块推断:

// 优化的DSP48推断模式(乘累加)
module dsp_mac #(
  parameter int A_WIDTH = 18,
  parameter int B_WIDTH = 18,
  parameter int P_WIDTH = 48
) (
  input  logic                 clk,
  input  logic                 rst_n,
  input  logic                 ce,
  input  logic signed [A_WIDTH-1:0] a,
  input  logic signed [B_WIDTH-1:0] b,
  input  logic signed [P_WIDTH-1:0] c,
  input  logic                 load,  // 加载C与累加
  output logic signed [P_WIDTH-1:0] p
);

  // 时序管道寄存器
  logic signed [A_WIDTH-1:0] a_reg;
  logic signed [B_WIDTH-1:0] b_reg;
  logic signed [P_WIDTH-1:0] mult_reg;

  always_ff @(posedge clk or negedge rst_n) begin
    if (!rst_n) begin
      a_reg <= '0;
      b_reg <= '0;
      mult_reg <= '0;
      p <= '0;
    end else if (ce) begin
      // 输入寄存器(A和B)
      a_reg <= a;
      b_reg <= b;
      // 乘法器寄存器(M)
      mult_reg <= a_reg * b_reg;
      // 累加器/输出(P)
      p <= load ? c + mult_reg : p + mult_reg;
    end
  end

endmodule

// 防止DSP用于小乘法
(* USE_DSP = "no" *)
module small_mult (
  input  logic [7:0] a, b,
  output logic [15:0] p
);
  assign p = a * b;  // 将使用fabric LUTs
endmodule

5. BRAM推断优化

确保正确的块RAM推断:

// 简单的双端口RAM(正确的推断模式)
module sdp_ram #(
  parameter int DATA_WIDTH = 32,
  parameter int DEPTH = 1024,
  parameter int ADDR_WIDTH = $clog2(DEPTH)
) (
  input  logic                    clk,
  // 写端口
  input  logic                    wr_en,
  input  logic [ADDR_WIDTH-1:0]   wr_addr,
  input  logic [DATA_WIDTH-1:0]   wr_data,
  // 读端口
  input  logic [ADDR_WIDTH-1:0]   rd_addr,
  output logic [DATA_WIDTH-1:0]   rd_data
);

  (* RAM_STYLE = "block" *)
  logic [DATA_WIDTH-1:0] mem [DEPTH];

  // 写端口
  always_ff @(posedge clk) begin
    if (wr_en) begin
      mem[wr_addr] <= wr_data;
    end
  end

  // 读端口(BRAM的寄存器输出)
  always_ff @(posedge clk) begin
    rd_data <= mem[rd_addr];
  end

endmodule

// 真正的双端口RAM
module tdp_ram #(
  parameter int DATA_WIDTH = 32,
  parameter int DEPTH = 1024
) (
  // 端口A
  input  logic                    clk_a,
  input  logic                    en_a,
  input  logic                    wr_en_a,
  input  logic [$clog2(DEPTH)-1:0] addr_a,
  input  logic [DATA_WIDTH-1:0]   wr_data_a,
  output logic [DATA_WIDTH-1:0]   rd_data_a,
  // 端口B
  input  logic                    clk_b,
  input  logic                    en_b,
  input  logic                    wr_en_b,
  input  logic [$clog2(DEPTH)-1:0] addr_b,
  input  logic [DATA_WIDTH-1:0]   wr_data_b,
  output logic [DATA_WIDTH-1:0]   rd_data_b
);

  (* RAM_STYLE = "block" *)
  logic [DATA_WIDTH-1:0] mem [DEPTH];

  // 端口A
  always_ff @(posedge clk_a) begin
    if (en_a) begin
      if (wr_en_a)
        mem[addr_a] <= wr_data_a;
      rd_data_a <= mem[addr_a];  // 读优先模式
    end
  end

  // 端口B
  always_ff @(posedge clk_b) begin
    if (en_b) begin
      if (wr_en_b)
        mem[addr_b] <= wr_data_b;
      rd_data_b <= mem[addr_b];  // 读优先模式
    end
  end

endmodule

6. FSM编码优化

选择最优的FSM编码:

// 一位有效编码(快速,更多寄存器)
(* FSM_ENCODING = "one_hot" *)
typedef enum logic [7:0] {
  IDLE    = 8'b00000001,
  INIT    = 8'b00000010,
  CONFIG  = 8'b00000100,
  RUN     = 8'b00001000,
  PAUSE   = 8'b00010000,
  DONE    = 8'b00100000,
  ERROR   = 8'b01000000,
  RESET   = 8'b10000000
} state_t;

// 二进制编码(紧凑,解码较慢)
(* FSM_ENCODING = "sequential" *)
typedef enum logic [2:0] {
  S_IDLE  = 3'd0,
  S_INIT  = 3'd1,
  S_RUN   = 3'd2,
  S_DONE  = 3'd3
} compact_state_t;

// Gray编码(低功耗,一位转换)
(* FSM_ENCODING = "gray" *)
typedef enum logic [2:0] {
  G_IDLE  = 3'b000,
  G_INIT  = 3'b001,
  G_RUN   = 3'b011,
  G_DONE  = 3'b010
} gray_state_t;

// 带有非法状态恢复的安全FSM
module safe_fsm (
  input  logic clk,
  input  logic rst_n,
  input  logic start,
  input  logic done,
  output state_t state
);

  state_t current_state, next_state;

  always_ff @(posedge clk or negedge rst_n) begin
    if (!rst_n)
      current_state <= IDLE;
    else
      current_state <= next_state;
  end

  always_comb begin
    next_state = current_state;
    case (current_state)
      IDLE:   if (start) next_state = INIT;
      INIT:   next_state = RUN;
      RUN:    if (done) next_state = DONE;
      DONE:   next_state = IDLE;
      default: next_state = IDLE;  // 非法状态恢复
    endcase
  end

  assign state = current_state;

endmodule

7. 高扇出网优化

减少高扇出时序问题:

// 寄存器复制以控制扇出
module fanout_control (
  input  logic clk,
  input  logic rst_n,
  input  logic enable_in,
  output logic [31:0] data_out
);

  // 高扇出使能 - 复制以满足时序
  (* MAX_FANOUT = 50 *)
  logic enable_r;

  // 或明确复制
  logic enable_bank0, enable_bank1, enable_bank2, enable_bank3;

  always_ff @(posedge clk or negedge rst_n) begin
    if (!rst_n) begin
      enable_r <= 1'b0;
      enable_bank0 <= 1'b0;
      enable_bank1 <= 1'b0;
      enable_bank2 <= 1'b0;
      enable_bank3 <= 1'b0;
    end else begin
      enable_r <= enable_in;
      // 复制的使能
      enable_bank0 <= enable_in;
      enable_bank1 <= enable_in;
      enable_bank2 <= enable_in;
      enable_bank3 <= enable_in;
    end
  end

  // 使用专用使能信号为每个银行
  always_ff @(posedge clk) begin
    if (enable_bank0) data_out[7:0]   <= /* ... */;
    if (enable_bank1) data_out[15:8]  <= /* ... */;
    if (enable_bank2) data_out[23:16] <= /* ... */;
    if (enable_bank3) data_out[31:24] <= /* ... */;
  end

endmodule

8. 重定时和流水线

应用重定时以改善时序:

// Vivado重定时属性
(* RETIMING_FORWARD = 1 *)
module forward_retiming (
  input  logic clk,
  input  logic [7:0] a, b,
  output logic [15:0] result
);
  // 综合可能将寄存器向前移动
  logic [15:0] mult;
  logic [15:0] result_r1, result_r2;

  assign mult = a * b;

  always_ff @(posedge clk) begin
    result_r1 <= mult;
    result_r2 <= result_r1;
    result <= result_r2;
  end
endmodule

// 手动流水线平衡
module balanced_pipeline #(
  parameter int PIPELINE_STAGES = 3
) (
  input  logic clk,
  input  logic [31:0] data_in,
  input  logic        valid_in,
  output logic [31:0] data_out,
  output logic        valid_out
);

  logic [31:0] data_pipe [PIPELINE_STAGES];
  logic [PIPELINE_STAGES-1:0] valid_pipe;

  always_ff @(posedge clk) begin
    // 数据流水线
    data_pipe[0] <= data_in;
    for (int i = 1; i < PIPELINE_STAGES; i++) begin
      data_pipe[i] <= data_pipe[i-1];
    end

    // 有效流水线
    valid_pipe <= {valid_pipe[PIPELINE_STAGES-2:0], valid_in};
  end

  assign data_out = data_pipe[PIPELINE_STAGES-1];
  assign valid_out = valid_pipe[PIPELINE_STAGES-1];

endmodule

流程集成

这项技能与以下流程集成:

流程 集成点
synthesis-optimization.js 主要综合优化
timing-closure.js 综合时序
place-and-route.js 后综合优化
power-analysis-optimization.js 功率感知综合

输出模式

{
  "synthesisAnalysis": {
    "resourceUtilization": {
      "luts": { "used": 45000, "available": 203800, "percentage": 22.1 },
      "registers": { "used": 52000, "available": 407600, "percentage": 12.8 },
      "bram": { "used": 120, "available": 445, "percentage": 27.0 },
      "dsp": { "used": 48, "available": 740, "percentage": 6.5 }
    },
    "hierarchy": [
      { "module": "processor", "luts": 25000, "regs": 30000 },
      { "module": "memory_ctrl", "luts": 10000, "regs": 12000 }
    ],
    "criticalPaths": [
      { "from": "reg_a", "to": "reg_b", "slack": -0.5, "levels": 12 }
    ]
  },
  "optimizations": [
    { "type": "attribute", "target": "sync_reg", "attribute": "ASYNC_REG" },
    { "type": "encoding", "target": "state_fsm", "encoding": "one_hot" },
    { "type": "ramStyle", "target": "data_mem", "style": "block" }
  ],
  "recommendations": [
    "Consider pipelining multiplier at line 125",
    "High fanout on enable signal (fan=500) - add MAX_FANOUT",
    "Consider distributed RAM for small_mem (depth=16)"
  ],
  "artifacts": [
    "src/optimized_module.sv",
    "reports/utilization.rpt",
    "reports/timing_summary.rpt"
  ]
}

最佳实践

属性应用

  • 在所有CDC同步器上使用ASYNC_REG
  • 对高扇出信号应用MAX_FANOUT
  • 控制RAM_STYLE以预测推断
  • 谨慎使用USE_DSP以平衡资源

资源推断

  • 为BRAM推断注册RAM输出
  • 匹配DSP48模式以自动推断
  • 使用SRL进行深移位寄存器
  • 避免BRAM的异步复位

时序优化

  • 为长组合路径添加流水线阶段
  • 在适当时使用重定时属性
  • 在模块间平衡流水线阶段
  • 考虑FSM编码对时序的影响

参考资料

  • Xilinx UG901: Vivado综合指南
  • Xilinx UG949: UltraFast设计方法论
  • Intel Quartus Prime综合指南
  • Yosys Open综合手册

另见

  • synthesis-optimization.js - 综合优化过程
  • timing-closure.js - 时序封闭方法论
  • SK-010: 放置和路由技能
  • AG-007: 综合专家代理