name: jit-compiler-builder
description: ‘构建即时编译系统。用途: (1) 语言实现, (2) 动态优化, (3) 虚拟机设计。’
version: 1.0.0
tags:
- compiler
- jit
- runtime
- optimization
difficulty: advanced
languages:
- rust
- c++
- llvm
dependencies:
- llvm-backend-generator
- ssa-constructor
JIT编译器构建者
为动态语言构建即时编译系统。
何时使用
此技能的作用
- 生成机器码 - 从中间表示
- 处理重新编译 - 在热点路径上
- 动态优化 - 运行时分析
- 管理内存 - 代码分配
JIT策略
| 策略 |
何时使用 |
权衡 |
| 急切编译 |
立即 |
首次运行快 |
| 延迟编译 |
调用时 |
首次运行慢 |
| 自适应编译 |
热点代码 |
整体最佳 |
| 多级编译 |
分层 |
最佳性能 |
实现
import struct
import mmap
from typing import Dict, List, Callable, Optional
from dataclasses import dataclass
# 机器码缓冲区
class CodeBuffer:
"""可执行代码缓冲区"""
def __init__(self, size: int = 64 * 1024):
self.size = size
self.buffer = mmap.mmap(-1, size, mmap.ACCESS_WRITE | mmap.ACCESS_EXEC)
self.cursor = 0
self.labels: Dict[str, int] = {}
def emit(self, bytes_data: bytes):
"""生成机器码"""
if self.cursor + len(bytes_data) > self.size:
raise CodeBufferFull()
self.buffer[self.cursor:self.cursor + len(bytes_data)] = bytes_data
self.cursor += len(bytes_data)
return self.cursor - len(bytes_data)
def emit_byte(self, b: int):
return self.emit(bytes([b]))
def emit_word(self, w: int):
return self.emit(struct.pack('<I', w))
def emit_qword(self, q: int):
return self.emit(struct.pack('<Q', q))
def get_address(self, offset: int = 0) -> int:
"""获取当前位置加上偏移量的地址"""
return self.buffer._mmapstart_ + self.cursor + offset
def make_executable(self):
"""使代码可执行"""
import platform
if platform.system() == 'Linux':
import ctypes
libc = ctypes.CDLL('libc.so.6')
addr = self.buffer._mmapstart_ + self.cursor
libc.mprotect(addr & ~0xfff, 0x1000, 7) # rwx
# x86-64代码生成
class X64Emitter:
"""x86-64代码生成器"""
def __init__(self, code: CodeBuffer):
self.code = code
# 寄存器
RAX, RBX, RCX, RDX = 0, 3, 1, 2
RSI, RDI = 6, 7
R8, R9, R10, R11 = 8, 9, 10, 11
RBP, RSP, RIP = 5, 4, 4 # 特殊
def mov_r64_imm64(self, reg: int, imm: int):
"""mov r64, imm64"""
# REX.W + B8+rd | imm32
self.code.emit_byte(0x48 | (reg >> 3))
self.code.emit_byte(0xB8 | (reg & 7))
self.code.emit_qword(imm)
def mov_r64_r64(self, dst: int, src: int):
"""mov r64, r64"""
# REX.W + B8+rd | modrm | ...
self.code.emit_byte(0x48)
self.code.emit_byte(0x89)
self.code.emit_byte(0xC0 | (src << 3) | dst)
def add_r64_r64(self, dst: int, src: int):
"""add r64, r64"""
self.code.emit_byte(0x48)
self.code.emit_byte(0x01)
self.code.emit_byte(0xC0 | (src << 3) | dst)
def ret(self):
"""ret"""
self.code.emit_byte(0xC3)
def call_r64(self, reg: int):
"""call r64"""
self.code.emit_byte(0xFF)
self.code.emit_byte(0xD0 | reg)
自适应优化
class JITCompiler:
"""自适应JIT编译器"""
def __init__(self):
self.code_buffer = CodeBuffer()
self.emitter = X64Emitter(self.code_buffer)
self.compiled: Dict[str, Callable] = {}
# 分析
self.call_counts: Dict[str, int] = {}
self.hot_threshold = 1000
# 编译后的代码
self.machine_code: Dict[str, int] = {}
def compile_function(self, ir: 'IRFunction') -> Callable:
"""将IR函数编译为机器码"""
# 重置代码缓冲区
self.code_buffer.cursor = 0
# 生成代码
self.emit_prologue()
for block in ir.blocks:
self.emit_block(block)
self.emit_epilogue()
# 使可执行
self.code_buffer.make_executable()
# 获取函数指针
addr = self.code_buffer.get_address()
func = self._create_function_pointer(addr, ir.signature)
self.compiled[ir.name] = func
return func
def emit_prologue(self):
"""生成函数序言"""
# push rbp
self.emitter.code.emit_byte(0x55)
# mov rbp, rsp
self.emitter.mov_r64_r64(self.emitter.RBP, self.emitter.RSP)
# sub rsp, frame_size
self.emitter.code.emit_byte(0x48)
self.emitter.code.emit_byte(0x81)
self.emitter.code.emit_byte(0xEC)
self.emitter.code.emit_word(256) # 帧大小
def emit_epilogue(self):
"""生成函数尾声"""
# add rsp, frame_size
self.emitter.code.emit_byte(0x48)
self.emitter.code.emit_byte(0x81)
self.emitter.code.emit_byte(0xC4)
self.emitter.code.emit_word(256)
# pop rbp
self.emitter.code.emit_byte(0x5D)
# ret
self.emitter.ret()
def emit_block(self, block: 'IRBlock'):
"""生成单个块"""
for instr in block.instructions:
self.emit_instruction(instr)
def emit_instruction(self, instr: 'IRInstruction'):
"""生成单个指令"""
match instr.op:
case 'add':
# add dst, src
self.emitter.add_r64_r64(instr.dst, instr.src)
case 'mov':
self.emitter.mov_r64_r64(instr.dst, instr.src)
case 'call':
self._emit_call(instr.target)
case 'ret':
self._emit_return()
def _create_function_pointer(self, addr: int, sig):
"""创建可调用的函数指针"""
import ctypes
# 映射返回类型
restype = {
'int': ctypes.c_int64,
'float': ctypes.c_double,
'void': None
}.get(sig.return_type, ctypes.c_int64)
# 映射参数类型
argtypes = [
ctypes.c_int64 for _ in sig.params
]
return ctypes.CFUNCTYPE(restype, *argtypes)(addr)
内联缓存
class InlineCache:
"""多态调用的内联缓存"""
def __init__(self, slot_size: int = 3):
self.cache: List[Optional[tuple]] = [None] * slot_size
self.slot_size = slot_size
def lookup(self, key) -> Optional[Callable]:
"""在缓存中查找"""
for entry in self.cache:
if entry and entry[0] == key:
return entry[1]
return None
def update(self, key, value):
"""更新缓存"""
# 移动条目
for i in range(self.slot_size - 1, 0, -1):
self.cache[i] = self.cache[i - 1]
self.cache[0] = (key, value)
def generate_guard(self, emitter: X64Emitter, key):
"""生成保护代码"""
# 比较键与缓存值
# mov rax, [key_location]
# cmp rax, cached_key
# jne miss_label
pass
def generate_miss_handler(self, emitter, key):
"""生成缓存未命中处理器"""
# 更新缓存
# 跳转到编译后的代码
pass
跟踪编译
class TraceRecorder:
"""记录和编译热点跟踪"""
def __init__(self):
self.traces: Dict[str, List] = {}
self.current_trace = []
def record(self, instr: 'IRInstruction'):
"""在跟踪中记录指令"""
# 检测循环边界
if instr.is_loop_header:
if len(self.current_trace) > 10:
# 编译跟踪
self.compile_trace()
self.current_trace = []
self.current_trace.append(instr)
def compile_trace(self):
"""编译记录的跟踪"""
# 优化跟踪
optimized = self.optimize_trace(self.current_trace)
# 生成代码
code = self.generate_trace_code(optimized)
# 链接到父级
self.link_trace(code)
关键概念
| 概念 |
描述 |
| JIT |
运行时编译 |
| 热点路径 |
频繁执行的代码 |
| 内联缓存 |
多态调用点 |
| 跟踪 |
线性执行路径 |
| 分层 |
多个编译级别 |
| 推测优化 |
假设类型优化,错误时去优化 |
优化级别
| 层级 |
何时使用 |
优化 |
| 解释器 |
开始 |
无 |
| 基础JIT |
1000次调用 |
基本 |
| 优化JIT |
10000次调用 |
激进 |
提示
- 优化前先分析
- 使用保护进行类型检查
- 实现内联缓存
- 考虑跟踪JIT
- 处理去优化
相关技能
garbage-collector-implementer - 内存管理
ssa-constructor - JIT IR的SSA形式
llvm-backend-generator - LLVM ORC JIT
经典参考
| 参考 |
重要性 |
| Hölzle, Chambers & Ungar, “Optimizing Dynamically-Typed Object-Oriented Languages With Polymorphic Inline Caches” (ECOOP 1991) |
来自SELF的基础内联缓存技术 |
| Hölzle, “Adaptive Optimization for SELF” (PhD Thesis, Stanford 1994) |
全面的自适应优化技术 |
| Gal et al., “Trace-based Just-in-Time Type Specialization for Dynamic Languages” (PLDI 2009) |
跟踪JIT方法论 |
研究工具与工件
JIT实现:
| 工具 |
学习内容 |
| LuaJIT |
跟踪编译器设计,非常快的解释器 |
| GraalVM/Truffle |
通过部分评估的语言无关JIT |
| V8 |
分层编译,推测优化 |
| HotSpot JVM |
生产服务器JIT,C1/C2层级 |
研究前沿
1. 推测优化
实现陷阱
| 陷阱 |
实际后果 |
解决方案 |
| 去优化错误 |
错误代码 |
仔细保护 |