Python数据类Skill python-data-classes

此技能专注于使用Python的dataclasses、attrs和Pydantic库进行数据建模,帮助开发者创建类型安全、可验证和可序列化的数据结构。适用于API请求/响应模型、配置管理、数据转换等场景。关键词:Python, 数据类, 数据建模, 验证, 序列化, dataclasses, attrs, Pydantic。

后端开发 0 次安装 0 次浏览 更新于 3/25/2026

名称: python-data-classes 用户可调用: false 描述: 在Python数据建模时使用dataclasses、attrs和Pydantic。在创建数据结构和模型时使用。 允许工具:

  • Bash
  • Read

Python数据类

掌握使用dataclasses、attrs和Pydantic进行Python数据建模,以创建干净、类型安全的数据结构,并实现验证和序列化。

dataclasses模块

基本dataclass用法:

from dataclasses import dataclass

@dataclass
class User:
    id: int
    name: str
    email: str
    is_active: bool = True  # 默认值

# 创建实例
user = User(
    id=1,
    name="Alice",
    email="alice@example.com"
)

print(user)
# User(id=1, name='Alice', email='alice@example.com', is_active=True)

print(user.name)  # Alice

dataclass带方法:

from dataclasses import dataclass

@dataclass
class Point:
    x: float
    y: float

    def distance_from_origin(self) -> float:
        return (self.x ** 2 + self.y ** 2) ** 0.5

    def move(self, dx: float, dy: float) -> "Point":
        return Point(self.x + dx, self.y + dy)

point = Point(3.0, 4.0)
print(point.distance_from_origin())  # 5.0
new_point = point.move(1.0, 1.0)
print(new_point)  # Point(x=4.0, y=5.0)

dataclass参数

控制dataclass行为:

from dataclasses import dataclass, field

# frozen=True使其不可变
@dataclass(frozen=True)
class ImmutableUser:
    id: int
    name: str

# order=True启用比较运算符
@dataclass(order=True)
class Person:
    age: int
    name: str

p1 = Person(30, "Alice")
p2 = Person(25, "Bob")
print(p1 > p2)  # True(先按年龄比较)

# slots=True使用__slots__以提高内存效率
@dataclass(slots=True)
class Coordinate:
    x: float
    y: float

# kw_only=True要求关键字参数
@dataclass(kw_only=True)
class Config:
    host: str
    port: int

config = Config(host="localhost", port=8080)

字段配置

使用field()进行高级配置:

from dataclasses import dataclass, field
from typing import List

@dataclass
class Product:
    name: str
    price: float

    # 从__init__中排除
    id: int = field(init=False)

    # 从__repr__中排除
    secret: str = field(repr=False, default="")

    # 可变默认的默认工厂
    tags: List[str] = field(default_factory=list)

    # 从比较中排除
    created_at: float = field(compare=False, default=0.0)

    def __post_init__(self) -> None:
        # 初始化后设置id
        self.id = hash(self.name)

product = Product(name="Widget", price=9.99)
print(product.id)  # 自动生成的哈希值

计算字段:

from dataclasses import dataclass, field

@dataclass
class Rectangle:
    width: float
    height: float
    area: float = field(init=False)

    def __post_init__(self) -> None:
        self.area = self.width * self.height

rect = Rectangle(10, 20)
print(rect.area)  # 200.0

继承

Dataclass继承:

from dataclasses import dataclass

@dataclass
class Animal:
    name: str
    age: int

@dataclass
class Dog(Animal):
    breed: str
    is_good_boy: bool = True

dog = Dog(name="Rex", age=5, breed="Labrador")
print(dog)
# Dog(name='Rex', age=5, breed='Labrador', is_good_boy=True)

转换方法

转换为/从字典:

from dataclasses import dataclass, asdict, astuple

@dataclass
class User:
    id: int
    name: str
    email: str

user = User(1, "Alice", "alice@example.com")

# 转换为字典
user_dict = asdict(user)
print(user_dict)
# {'id': 1, 'name': 'Alice', 'email': 'alice@example.com'}

# 转换为元组
user_tuple = astuple(user)
print(user_tuple)
# (1, 'Alice', 'alice@example.com')

# 从字典创建
data = {"id": 2, "name": "Bob", "email": "bob@example.com"}
bob = User(**data)

attrs库

使用attrs获取增强功能:

pip install attrs

基本attrs用法:

import attrs

@attrs.define
class User:
    id: int
    name: str
    email: str
    is_active: bool = True

user = User(1, "Alice", "alice@example.com")
print(user)

attrs验证器:

import attrs
from attrs import validators

@attrs.define
class User:
    id: int = attrs.field(validator=validators.instance_of(int))
    name: str = attrs.field(
        validator=[
            validators.instance_of(str),
            validators.min_len(1)
        ]
    )
    email: str = attrs.field(
        validator=validators.matches_re(r"^[\w\.-]+@[\w\.-]+\.\w+$")
    )
    age: int = attrs.field(
        validator=validators.and_(
            validators.instance_of(int),
            validators.ge(0),
            validators.le(150)
        )
    )

# 在初始化时验证
user = User(
    id=1,
    name="Alice",
    email="alice@example.com",
    age=30
)

attrs转换器:

import attrs

@attrs.define
class User:
    name: str = attrs.field(converter=str.strip)
    age: int = attrs.field(converter=int)
    tags: list[str] = attrs.field(
        factory=list,
        converter=lambda x: [tag.lower() for tag in x]
    )

user = User(
    name="  Alice  ",
    age="30",
    tags=["ADMIN", "User"]
)

print(user.name)  # "Alice"
print(user.age)   # 30 (int)
print(user.tags)  # ["admin", "user"]

Pydantic模型

安装Pydantic:

pip install pydantic

基本Pydantic模型:

from pydantic import BaseModel

class User(BaseModel):
    id: int
    name: str
    email: str
    is_active: bool = True

# 自动验证和转换
user = User(
    id="1",           # 转换为int
    name="Alice",
    email="alice@example.com"
)

print(user.id)  # 1 (int)
print(user.model_dump())  # 字典表示
print(user.model_dump_json())  # JSON字符串

Pydantic验证器:

from pydantic import BaseModel, EmailStr, Field, field_validator
from typing import Annotated

class User(BaseModel):
    id: int = Field(gt=0)
    name: str = Field(min_length=1, max_length=100)
    email: EmailStr
    age: Annotated[int, Field(ge=0, le=150)]
    username: str

    @field_validator("username")
    @classmethod
    def validate_username(cls, v: str) -> str:
        if not v.isalnum():
            raise ValueError("用户名必须为字母数字")
        return v.lower()

    @field_validator("name")
    @classmethod
    def validate_name(cls, v: str) -> str:
        return v.strip().title()

user = User(
    id=1,
    name="  alice  ",
    email="alice@example.com",
    age=30,
    username="ALICE123"
)

print(user.name)      # "Alice"
print(user.username)  # "alice123"

Pydantic模型配置:

from pydantic import BaseModel, ConfigDict

class User(BaseModel):
    model_config = ConfigDict(
        str_strip_whitespace=True,
        validate_assignment=True,
        frozen=False,
        extra="forbid"
    )

    id: int
    name: str
    email: str

# 自动去除空白字符
user = User(id=1, name="  Alice  ", email="alice@example.com")
print(user.name)  # "Alice"

# 在赋值时验证
user.name = "  Bob  "
print(user.name)  # "Bob"

Pydantic高级功能

计算字段:

from pydantic import BaseModel, computed_field

class User(BaseModel):
    first_name: str
    last_name: str

    @computed_field
    @property
    def full_name(self) -> str:
        return f"{self.first_name} {self.last_name}"

user = User(first_name="Alice", last_name="Smith")
print(user.full_name)  # "Alice Smith"
print(user.model_dump())
# {'first_name': 'Alice', 'last_name': 'Smith', 'full_name': 'Alice Smith'}

模型验证器:

from pydantic import BaseModel, model_validator

class DateRange(BaseModel):
    start_date: str
    end_date: str

    @model_validator(mode="after")
    def validate_date_range(self) -> "DateRange":
        if self.start_date > self.end_date:
            raise ValueError("开始日期必须在结束日期之前")
        return self

range_obj = DateRange(
    start_date="2024-01-01",
    end_date="2024-12-31"
)

嵌套模型:

from pydantic import BaseModel

class Address(BaseModel):
    street: str
    city: str
    country: str

class User(BaseModel):
    name: str
    email: str
    address: Address

user = User(
    name="Alice",
    email="alice@example.com",
    address={
        "street": "123 Main St",
        "city": "New York",
        "country": "USA"
    }
)

print(user.address.city)  # "New York"

泛型模型:

from pydantic import BaseModel
from typing import Generic, TypeVar

T = TypeVar("T")

class Response(BaseModel, Generic[T]):
    data: T
    message: str
    success: bool

class User(BaseModel):
    id: int
    name: str

# 创建类型化响应
response = Response[User](
    data=User(id=1, name="Alice"),
    message="用户已获取",
    success=True
)

print(response.data.name)  # "Alice"

序列化和反序列化

Pydantic JSON处理:

from pydantic import BaseModel
from datetime import datetime

class Event(BaseModel):
    name: str
    timestamp: datetime
    metadata: dict[str, str]

# 从JSON
json_data = '''
{
    "name": "用户登录",
    "timestamp": "2024-01-15T10:30:00",
    "metadata": {"ip": "192.168.1.1"}
}
'''

event = Event.model_validate_json(json_data)
print(event.timestamp)

# 到JSON
json_output = event.model_dump_json(indent=2)
print(json_output)

自定义序列化:

from pydantic import BaseModel, field_serializer
from datetime import datetime

class Event(BaseModel):
    name: str
    timestamp: datetime

    @field_serializer("timestamp")
    def serialize_timestamp(self, value: datetime) -> str:
        return value.strftime("%Y-%m-%d %H:%M:%S")

event = Event(name="测试", timestamp=datetime.now())
print(event.model_dump())
# {'name': '测试', 'timestamp': '2024-01-15 10:30:00'}

比较:dataclasses vs attrs vs Pydantic

何时使用dataclasses:

  • 简单的数据容器带有类型提示
  • 标准库的一部分(无依赖)
  • 不需要基本验证
  • 需要Python 3.7+兼容性
  • 使用frozen=True实现不可变性

何时使用attrs:

  • 比dataclasses更多功能(验证器、转换器)
  • 比dataclasses更好的性能
  • 需要高级字段配置
  • 向后兼容(Python 2.7+)
  • 自定义初始化逻辑

何时使用Pydantic:

  • 需要自动数据验证
  • JSON/字典序列化/反序列化
  • API请求/响应模型
  • 配置管理
  • 需要类型强制转换
  • OpenAPI/JSON模式生成

最佳实践

  • 为所有字段使用类型提示
  • 为可选字段提供默认值
  • 使用default_factory处理可变默认值
  • 在边界(API、数据库)验证数据
  • 保持dataclasses专注和内聚
  • 使用frozen=True处理不可变数据
  • 利用验证器处理业务规则
  • 使用计算字段处理派生数据
  • 记录复杂字段需求
  • 根据用例选择合适工具

常见模式

使用dataclass的构建器模式:

from dataclasses import dataclass, field
from typing import Optional

@dataclass
class QueryBuilder:
    _select: list[str] = field(default_factory=list)
    _where: list[str] = field(default_factory=list)
    _limit: Optional[int] = None

    def select(self, *columns: str) -> "QueryBuilder":
        self._select.extend(columns)
        return self

    def where(self, condition: str) -> "QueryBuilder":
        self._where.append(condition)
        return self

    def limit(self, n: int) -> "QueryBuilder":
        self._limit = n
        return self

    def build(self) -> str:
        query = f"SELECT {', '.join(self._select)}"
        if self._where:
            query += f" WHERE {' AND '.join(self._where)}"
        if self._limit:
            query += f" LIMIT {self._limit}"
        return query

query = (
    QueryBuilder()
    .select("id", "name")
    .where("active = true")
    .limit(10)
    .build()
)

使用Pydantic的配置:

from pydantic import Field
from pydantic_settings import BaseSettings

class Settings(BaseSettings):
    app_name: str = "我的应用"
    database_url: str = Field(..., env="DATABASE_URL")
    debug: bool = False
    max_connections: int = Field(10, ge=1, le=100)

    class Config:
        env_file = ".env"
        env_file_encoding = "utf-8"

settings = Settings()

何时使用此技能

在以下情况使用python-data-classes:

  • 创建数据传输对象(DTOs)
  • 建模API请求/响应载荷
  • 定义配置结构
  • 在领域模型中实现值对象
  • 构建类型安全的数据容器
  • 处理JSON序列化/反序列化
  • 验证用户输入或外部数据
  • 创建不可变数据结构
  • 实现构建器或工厂模式
  • 建模数据库模式或ORM实体

常见陷阱

  • 不使用default_factory处理可变默认值(列表、字典)
  • 不验证来自外部源的数据
  • 过度复杂化简单数据结构
  • 将业务逻辑与数据模型混合
  • 不使用frozen处理不可变数据
  • 忘记正确处理None值
  • 不有效利用类型提示
  • 使用错误工具(dataclass vs attrs vs Pydantic)
  • 不记录字段约束
  • 在热点路径忽略验证性能

资源