使用 dacite.from_dict 动态添加数据类字段

发布于 2025-01-11 17:05:12 字数 1092 浏览 3 评论 0原文

我正在使用英安岩将 Python 字典转换为数据类。有没有办法动态地将字段添加到数据类中?就像下面的示例一样,数据类“Parameters”仅定义了一个时间序列“timeseriesA”,但可能还有其他无法声明的时间序列(通过字典提供)。

from dataclasses import asdict, dataclass
from typing import Dict, List, Optional

from dacite import from_dict

@dataclass(frozen = True)
class TimeSeries:
  name: str
  unit: str
  data: Optional[List[float]]
  
@dataclass(frozen = True)
class Parameters:
  timeseriesA: TimeSeries
  
@dataclass(frozen = True)
class Data:
  parameters: Parameters
  
  @classmethod
  def fromDict(cls, data: Dict) -> 'Data':
    return from_dict(cls, data)

  @classmethod
  def toDict(cls) -> Dict:
    return asdict(cls)

  
def main() -> None:

  d: Dict = {
    'parameters': {
      'timeseriesA': {
        'name': 'nameA',
        'unit': 'USD',
        'data': [10, 20, 30, 40]
      },
      'timeseriesB': {
        'name': 'nameB',
        'unit': 'EUR',
        'data': [60, 30, 40, 50]
      }
    }
  }

  data: Data = Data.fromDict(d)

if __name__ == '__main__':
  main()

在此示例中,“timeseriesB”将被英安岩忽略,但应添加为“Parameters”数据类的字段。

I am using dacite to transform a Python dictionary into a dataclass. Is there a way to dynamically add fields to a dataclass? Like in the example below, where the dataclass "Parameters" has defined only one timeseries "timeseriesA", but there might be additional ones (provided through the dictionary) that cannot be declared.

from dataclasses import asdict, dataclass
from typing import Dict, List, Optional

from dacite import from_dict

@dataclass(frozen = True)
class TimeSeries:
  name: str
  unit: str
  data: Optional[List[float]]
  
@dataclass(frozen = True)
class Parameters:
  timeseriesA: TimeSeries
  
@dataclass(frozen = True)
class Data:
  parameters: Parameters
  
  @classmethod
  def fromDict(cls, data: Dict) -> 'Data':
    return from_dict(cls, data)

  @classmethod
  def toDict(cls) -> Dict:
    return asdict(cls)

  
def main() -> None:

  d: Dict = {
    'parameters': {
      'timeseriesA': {
        'name': 'nameA',
        'unit': 'USD',
        'data': [10, 20, 30, 40]
      },
      'timeseriesB': {
        'name': 'nameB',
        'unit': 'EUR',
        'data': [60, 30, 40, 50]
      }
    }
  }

  data: Data = Data.fromDict(d)

if __name__ == '__main__':
  main()

In this example, "timeseriesB" will be ignored by dacite, but should be added as field for the "Parameters" dataclass.

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

南薇 2025-01-18 17:05:12

一般来说,在定义类之后向数据类动态添加字段并不是一个好的做法。然而,由于源 dict 对象中字段的动态性质,这确实为在数据类中使用 dict 提供了一个很好的用例。

下面是一个使用 dict 字段来处理源对象中键的动态映射的简单示例,使用 dataclass-wizard 这也是一个类似的 JSON 序列化库。下面概述的方法处理 dict 对象中的无关数据,例如 timeseriesB

from __future__ import annotations

from dataclasses import dataclass
from dataclass_wizard import JSONWizard


@dataclass(frozen=True)
class Data(JSONWizard):
    parameters: dict[str, TimeSeries]


@dataclass(frozen=True)
class TimeSeries:
    name: str
    unit: str
    data: list[float] | None


data: dict = {
    'parameters': {
        'timeseriesA': {
            'name': 'nameA',
            'unit': 'USD',
            'data': [10, 20, 30, 40]
        },
        'timeseriesB': {
            'name': 'nameB',
            'unit': 'EUR',
            'data': [60, 30, 40, 50]
        }
    }
}


def main():
    # deserialize from dict
    d = Data.from_dict(data)
    print(d.parameters['timeseriesB'].unit)  # EUR

    print(repr(d)) 
    # Data(parameters={'timeseriesA': TimeSeries(name='nameA', unit='USD', data=[10.0, 20.0, 30.0, 40.0]),
    #                  'timeseriesB': TimeSeries(name='nameB', unit='EUR', data=[60.0, 30.0, 40.0, 50.0])})


if __name__ == '__main__':
    main()

诚然,dataclass-wizard 并不像 dacite 那样执行严格类型检查,而是执行隐式类型强制,如果可能的话,就像 str 到带注释的 int 一样。也许因此,它总体上要快得多;另一个好处是序列化甚至比内置 dataclasses.asdict 还要快:-)

以下是一些快速测试:

from dataclasses import asdict, dataclass
from typing import Dict, List, Optional

from dacite import from_dict
from dataclass_wizard import JSONWizard
from timeit import timeit


@dataclass(frozen=True)
class TimeSeries:
    name: str
    unit: str
    data: Optional[List[float]]


@dataclass(frozen=True)
class Parameters:
    timeseriesA: TimeSeries


@dataclass(frozen=True)
class Data:
    parameters: Parameters

    @classmethod
    def fromDict(cls, data: Dict) -> 'Data':
        return from_dict(cls, data)

    def toDict(self) -> Dict:
        return asdict(self)


@dataclass(frozen=True)
class ParametersWizard:
    # renamed because default key transform is `camelCase` -> `snake_case`
    timeseries_a: TimeSeries


@dataclass(frozen=True)
class DataWizard(JSONWizard):
    # enable debug mode in case of incorrect types etc.
    class _(JSONWizard.Meta):
        debug_enabled = True

    parameters: ParametersWizard


data: Dict = {
    'parameters': {
        'timeseriesA': {
            'name': 'nameA',
            'unit': 'USD',
            'data': [10, 20, 30, 40]
        },
        'timeseriesB': {
            'name': 'nameB',
            'unit': 'EUR',
            'data': [60, 30, 40, 50]
        }
    }
}


def main():
    n = 10_000

    print(f"From Dict:        {timeit('Data.fromDict(data)', globals=globals(), number=n):.3f}")
    print(f"From Dict (Wiz):  {timeit('DataWizard.from_dict(data)', globals=globals(), number=n):.3f}")

    data_1: Data = Data.fromDict(data)
    data_wiz: Data = DataWizard.from_dict(data)

    g = globals().copy()
    g.update(locals())

    print(f"To Dict:        {timeit('data_1.toDict()', globals=g, number=n):.3f}")
    print(f"To Dict (Wiz):  {timeit('data_wiz.to_dict()', globals=g, number=n):.3f}")


if __name__ == '__main__':
    main()

结果,在我的 PC (Windows) 上:

From Dict:        1.663
From Dict (Wiz):  0.059
To Dict:        0.105
To Dict (Wiz):  0.057

In general, dynamically adding fields to a dataclass, after the class is defined, is not good practice. However, this does present a good use case for using a dict within a dataclass, due to the dynamic nature of fields in the source dict object.

Here is a straightforward example of using a dict field to handle a dynamic mapping of keys in the source object, using the dataclass-wizard which is also a similar JSON serialization library. The approach outlined below handles extraneous data in the dict object like timeseriesB for instance.

from __future__ import annotations

from dataclasses import dataclass
from dataclass_wizard import JSONWizard


@dataclass(frozen=True)
class Data(JSONWizard):
    parameters: dict[str, TimeSeries]


@dataclass(frozen=True)
class TimeSeries:
    name: str
    unit: str
    data: list[float] | None


data: dict = {
    'parameters': {
        'timeseriesA': {
            'name': 'nameA',
            'unit': 'USD',
            'data': [10, 20, 30, 40]
        },
        'timeseriesB': {
            'name': 'nameB',
            'unit': 'EUR',
            'data': [60, 30, 40, 50]
        }
    }
}


def main():
    # deserialize from dict
    d = Data.from_dict(data)
    print(d.parameters['timeseriesB'].unit)  # EUR

    print(repr(d)) 
    # Data(parameters={'timeseriesA': TimeSeries(name='nameA', unit='USD', data=[10.0, 20.0, 30.0, 40.0]),
    #                  'timeseriesB': TimeSeries(name='nameB', unit='EUR', data=[60.0, 30.0, 40.0, 50.0])})


if __name__ == '__main__':
    main()

The dataclass-wizard admittedly doesn't perform strict type checking like dacite, but instead performs implicit type coercion, like str to annotated int, where possible. Perhaps as a result, it's overall much faster; the other nice thing is serialization is even slightly faster than builtin dataclasses.asdict too :-)

Here are some quick tests:

from dataclasses import asdict, dataclass
from typing import Dict, List, Optional

from dacite import from_dict
from dataclass_wizard import JSONWizard
from timeit import timeit


@dataclass(frozen=True)
class TimeSeries:
    name: str
    unit: str
    data: Optional[List[float]]


@dataclass(frozen=True)
class Parameters:
    timeseriesA: TimeSeries


@dataclass(frozen=True)
class Data:
    parameters: Parameters

    @classmethod
    def fromDict(cls, data: Dict) -> 'Data':
        return from_dict(cls, data)

    def toDict(self) -> Dict:
        return asdict(self)


@dataclass(frozen=True)
class ParametersWizard:
    # renamed because default key transform is `camelCase` -> `snake_case`
    timeseries_a: TimeSeries


@dataclass(frozen=True)
class DataWizard(JSONWizard):
    # enable debug mode in case of incorrect types etc.
    class _(JSONWizard.Meta):
        debug_enabled = True

    parameters: ParametersWizard


data: Dict = {
    'parameters': {
        'timeseriesA': {
            'name': 'nameA',
            'unit': 'USD',
            'data': [10, 20, 30, 40]
        },
        'timeseriesB': {
            'name': 'nameB',
            'unit': 'EUR',
            'data': [60, 30, 40, 50]
        }
    }
}


def main():
    n = 10_000

    print(f"From Dict:        {timeit('Data.fromDict(data)', globals=globals(), number=n):.3f}")
    print(f"From Dict (Wiz):  {timeit('DataWizard.from_dict(data)', globals=globals(), number=n):.3f}")

    data_1: Data = Data.fromDict(data)
    data_wiz: Data = DataWizard.from_dict(data)

    g = globals().copy()
    g.update(locals())

    print(f"To Dict:        {timeit('data_1.toDict()', globals=g, number=n):.3f}")
    print(f"To Dict (Wiz):  {timeit('data_wiz.to_dict()', globals=g, number=n):.3f}")


if __name__ == '__main__':
    main()

Results, on my PC (Windows):

From Dict:        1.663
From Dict (Wiz):  0.059
To Dict:        0.105
To Dict (Wiz):  0.057
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文