Factory Fields¶
The factory api is designed to be as semantic and simple as possible, and by default it requires no customization to mock data. Nonetheless, when you do need to customize and control the data being generated, polyfactory has you covered. Lets look at a few examples:
from dataclasses import dataclass
from datetime import date, datetime
from enum import Enum
from typing import Any, Dict, List, Union
from uuid import UUID
from polyfactory.factories import DataclassFactory
class Species(str, Enum):
CAT = "Cat"
DOG = "Dog"
@dataclass
class Pet:
name: str
species: Species
sound: str
@dataclass
class Person:
id: UUID
name: str
hobbies: List[str]
age: Union[float, int]
birthday: Union[datetime, date]
pets: List[Pet]
assets: List[Dict[str, Dict[str, Any]]]
pet_instance = Pet(name="Roxy", sound="woof woof", species=Species.DOG)
class PersonFactory(DataclassFactory[Person]):
pets = [pet_instance]
def test_is_pet_instance() -> None:
person_instance = PersonFactory.build()
assert len(person_instance.pets) == 1
assert person_instance.pets[0] == pet_instance
In the example above, the call to PersonFactory.build()
results in a Person
where all values are randomly
generated, except the pets
list, which will be the hardcoded value we defined.
The Use
Field¶
This though is often not desirable. We could instead, define a factory for Pet where we restrict the choices to a range we like. For example:
Use
field with a custom PetFactory to control the generation of a Person’s pets list¶from dataclasses import dataclass
from datetime import date, datetime
from enum import Enum
from typing import Any, Dict, List, Union
from uuid import UUID
from polyfactory import Use
from polyfactory.factories import DataclassFactory
class Species(str, Enum):
CAT = "Cat"
DOG = "Dog"
@dataclass
class Pet:
name: str
species: Species
sound: str
@dataclass
class Person:
id: UUID
name: str
hobbies: List[str]
age: Union[float, int]
birthday: Union[datetime, date]
pets: List[Pet]
assets: List[Dict[str, Dict[str, Any]]]
class PetFactory(DataclassFactory[Pet]):
name = Use(DataclassFactory.__random__.choice, ["Ralph", "Roxy"])
species = Use(DataclassFactory.__random__.choice, list(Species))
class PersonFactory(DataclassFactory[Person]):
pets = Use(PetFactory.batch, size=2)
def test_pet_choices() -> None:
person_instance = PersonFactory.build()
assert len(person_instance.pets) == 2
assert all(pet.name in ["Ralph", "Roxy"] for pet in person_instance.pets)
The Use
class is merely a semantic abstraction that makes the factory cleaner and simpler
to understand, you can in fact use any callable (including classes) as values for a factory’s attribute directly, and
these will be invoked at build-time. Thus, you could for example re-write the above PetFactory like so:
from dataclasses import dataclass
from datetime import date, datetime
from enum import Enum
from typing import Any, Dict, List, Union
from uuid import UUID
from polyfactory import Use
from polyfactory.factories import DataclassFactory
class Species(str, Enum):
CAT = "Cat"
DOG = "Dog"
@dataclass
class Pet:
name: str
species: Species
sound: str
@dataclass
class Person:
id: UUID
name: str
hobbies: List[str]
age: Union[float, int]
birthday: Union[datetime, date]
pets: List[Pet]
assets: List[Dict[str, Dict[str, Any]]]
class PetFactory(DataclassFactory[Pet]):
name = lambda: DataclassFactory.__random__.choice(["Ralph", "Roxy"])
species = lambda: DataclassFactory.__random__.choice(list(Species))
class PersonFactory(DataclassFactory[Person]):
pets = Use(PetFactory.batch, size=2)
def test_pet_choices() -> None:
person_instance = PersonFactory.build()
assert len(person_instance.pets) == 2
assert all(pet.name in ["Ralph", "Roxy"] for pet in person_instance.pets)
Or you can use a class method, which will give you easy and nice access to the factory itself:
from dataclasses import dataclass
from datetime import date, datetime
from enum import Enum
from typing import Any, Dict, List, Union
from uuid import UUID
from polyfactory import Use
from polyfactory.factories import DataclassFactory
class Species(str, Enum):
CAT = "Cat"
DOG = "Dog"
@dataclass
class Pet:
name: str
species: Species
sound: str
@dataclass
class Person:
id: UUID
name: str
hobbies: List[str]
age: Union[float, int]
birthday: Union[datetime, date]
pets: List[Pet]
assets: List[Dict[str, Dict[str, Any]]]
class PetFactory(DataclassFactory[Pet]):
@classmethod
def name(cls) -> str:
return cls.__random__.choice(["Ralph", "Roxy"])
@classmethod
def species(cls) -> str:
return cls.__random__.choice(list(Species))
class PersonFactory(DataclassFactory[Person]):
pets = Use(PetFactory.batch, size=2)
def test_pet_choices() -> None:
person_instance = PersonFactory.build()
assert len(person_instance.pets) == 2
assert all(pet.name in ["Ralph", "Roxy"] for pet in person_instance.pets)
Note
All the above examples used DataclassFactory.__random__.choice
, and this is intentional. While you can use
random.choice
or any other function exported from the stdlib random library, the factory class has its own instance
of random.Random
attached under cls.__random__
. This instance can be affected by random seeding in several ways, e.g.
calling the factory seeding method, which will be scoped only to this instance. Thus, for consistent results when seeding
randomness, its important to use the factory random.Random
instance rather than the global one from the stdlib.
The Ignore
Field¶
Ignore
is used to designate an attribute as ignored, which means it will be completely
ignored by the factory:
Ignore
field¶from typing import TypedDict
from polyfactory import Ignore
from polyfactory.factories import TypedDictFactory
class Person(TypedDict):
id: int
name: str
class PersonFactory(TypedDictFactory[Person]):
id = Ignore()
def test_id_is_ignored() -> None:
person_instance = PersonFactory.build()
assert person_instance.get("name")
assert person_instance.get("id") is None
The Require
Field¶
The Require
class is used to designate a given attribute as a required kwarg. This means that the
factory will require passing a value for this attribute as a kwarg to the build method, or an exception will be raised:
Require
field¶from typing import TypedDict
import pytest
from polyfactory import Require
from polyfactory.exceptions import MissingBuildKwargException
from polyfactory.factories import TypedDictFactory
class Person(TypedDict):
id: int
name: str
class PersonFactory(TypedDictFactory[Person]):
id = Require()
def test_id_is_required() -> None:
# this will not raise an exception
person_instance = PersonFactory.build(id=1)
assert person_instance.get("name")
assert person_instance.get("id") == 1
# but when no kwarg is passed, an exception will be raised:
with pytest.raises(MissingBuildKwargException):
PersonFactory.build()
The PostGenerated
Field¶
The PostGenerated
class allows for post generating fields based on already generated
values of other (non post generated) fields. In most cases this pattern is best avoided, but for the few valid cases
the PostGenerated helper is provided. For example:
PostGenerated
field¶from dataclasses import dataclass, field
from datetime import datetime, timedelta
from typing import Any, Dict
from polyfactory import PostGenerated
from polyfactory.factories import DataclassFactory
def add_timedelta(name: str, values: Dict[str, datetime], *args: Any, **kwargs: Any) -> datetime:
delta = timedelta(days=1)
return values["from_dt"] + delta
@dataclass
class DatetimeRange:
to_dt: datetime
from_dt: datetime = field(default_factory=datetime.now)
class DatetimeRangeFactory(DataclassFactory[DatetimeRange]):
to_dt = PostGenerated(add_timedelta)
def test_post_generated() -> None:
date_range_instance = DatetimeRangeFactory.build()
assert date_range_instance.to_dt == date_range_instance.from_dt + timedelta(days=1)
The signature for use is: cb: Callable, *args, **defaults
it can receive any sync callable. The signature for the
callable should be: name: str, values: dict[str, Any], *args, **defaults
. The already generated values are mapped by
name in the values dictionary.
The Param
Field¶
The Param
class denotes a parameter that can be referenced by other fields at build but whose value is not set on the final object. This is useful for passing values needed by other factory fields but that are not part of object being built.
A Param type can be either a constant or a callable. If a callable is used, it will be executed at the beginning of build and its return value will be used as the value for the field. Optional keyword arguments may be passed to the callable as part of the field definition on the factory. Any additional keyword arguments passed to the Param constructor will also not be mapped into the final object.
The Param type allows for flexibility in that it can either accept a value at the definition of the factory, or its value can be set at build time. If a value is provided at build time, it will take precedence over the value provided at the definition of the factory (if any).
If neither a value is provided at the definition of the factory nor at build time, an exception will be raised. Likewise, a Param cannot have the same name as any other model field.
Param
field with a constant¶from dataclasses import dataclass
from typing import List
from polyfactory.decorators import post_generated
from polyfactory.factories import DataclassFactory
from polyfactory.fields import Param
@dataclass
class Pet:
name: str
sound: str
class PetFactoryWithParamValueSetAtBuild(DataclassFactory[Pet]):
"""In this factory, the name_choices must be passed at build time."""
name_choices = Param[List[str]]()
@post_generated
@classmethod
def name(cls, name_choices: List[str]) -> str:
return cls.__random__.choice(name_choices)
def test_factory__build_time() -> None:
names = ["Ralph", "Roxy"]
pet = PetFactoryWithParamValueSetAtBuild.build(name_choices=names)
assert isinstance(pet, Pet)
assert not hasattr(pet, "name_choices")
assert pet.name in names
class PetFactoryWithParamSpecififiedInFactory(DataclassFactory[Pet]):
"""In this factory, the name_choices are specified in the
factory and do not need to be passed at build time."""
name_choices = Param[List[str]](["Ralph", "Roxy"])
@post_generated
@classmethod
def name(cls, name_choices: List[str]) -> str:
return cls.__random__.choice(name_choices)
def test_factory__in_factory() -> None:
pet = PetFactoryWithParamSpecififiedInFactory.build()
assert isinstance(pet, Pet)
assert not hasattr(pet, "name_choices")
assert pet.name in ["Ralph", "Roxy"]
Param
field with a callable¶from dataclasses import dataclass
from polyfactory.decorators import post_generated
from polyfactory.factories import DataclassFactory
from polyfactory.fields import Param
@dataclass
class Person:
name: str
age_next_year: int
class PersonFactoryWithParamValueSpecifiedInFactory(DataclassFactory[Person]):
"""In this factory, the next_years_age_from_calculator must be passed at build time."""
next_years_age_from_calculator = Param[int](lambda age: age + 1, is_callable=True, age=20)
@post_generated
@classmethod
def age_next_year(cls, next_years_age_from_calculator: int) -> int:
return next_years_age_from_calculator
def test_factory__in_factory() -> None:
person = PersonFactoryWithParamValueSpecifiedInFactory.build()
assert isinstance(person, Person)
assert not hasattr(person, "next_years_age_from_calculator")
assert person.age_next_year == 21
class PersonFactoryWithParamValueSetAtBuild(DataclassFactory[Person]):
"""In this factory, the next_years_age_from_calculator must be passed at build time."""
next_years_age_from_calculator = Param[int](is_callable=True, age=20)
@post_generated
@classmethod
def age_next_year(cls, next_years_age_from_calculator: int) -> int:
return next_years_age_from_calculator
def test_factory__build_time() -> None:
person = PersonFactoryWithParamValueSpecifiedInFactory.build(next_years_age_from_calculator=lambda age: age + 1)
assert isinstance(person, Person)
assert not hasattr(person, "next_years_age_from_calculator")
assert person.age_next_year == 21
Factories as Fields¶
Factories themselves can be used as fields. In this usage, build parameters will be passed to the declared factory.
from dataclasses import dataclass
from datetime import date, datetime
from enum import Enum
from typing import Any, Dict, List, Union
from uuid import UUID
from polyfactory.factories import DataclassFactory
class Species(str, Enum):
CAT = "Cat"
DOG = "Dog"
@dataclass
class Pet:
name: str
species: Species
sound: str
@dataclass
class Person:
id: UUID
name: str
hobbies: List[str]
age: Union[float, int]
birthday: Union[datetime, date]
pet: Pet
assets: List[Dict[str, Dict[str, Any]]]
class PetFactory(DataclassFactory[Pet]):
name = lambda: DataclassFactory.__random__.choice(["Ralph", "Roxy"])
class PersonFactory(DataclassFactory[Person]):
pet = PetFactory
def test_subfactory() -> None:
person_instance = PersonFactory.build()
assert isinstance(person_instance.pet, Pet)
assert person_instance.pet.name in ["Ralph", "Roxy"]
person_instance_with_pet_name = PersonFactory.build(pet={"name": "Winston"})
assert person_instance_with_pet_name.pet.name == "Winston"
Handling Asynchronous Data in Factory Fields¶
If you need to populate a factory field with data pre-fetched asynchronously (e.g., from a database using an ORM like SQLAlchemy or Beanie), the recommended approach is to handle the asynchronous call outside the factory and pass the resolved value as a regular argument.
from __future__ import annotations
from sqlalchemy import ForeignKey, select
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
from polyfactory.factories.sqlalchemy_factory import SQLAlchemyFactory
async_engine = create_async_engine("sqlite+aiosqlite:///:memory:")
class Base(DeclarativeBase): ...
class User(Base):
__tablename__ = "users"
id: Mapped[int] = mapped_column(primary_key=True)
class Department(Base):
__tablename__ = "departments"
id: Mapped[int] = mapped_column(primary_key=True)
director_id: Mapped[str] = mapped_column(ForeignKey("users.id"))
class UserFactory(SQLAlchemyFactory[User]): ...
class DepartmentFactory(SQLAlchemyFactory[Department]): ...
async def get_director_ids() -> int:
async with AsyncSession(async_engine) as session:
result = (await session.scalars(select(User.id))).all()
return UserFactory.__random__.choice(result)
async def test_factory_with_pre_fetched_async_data() -> None:
async with async_engine.begin() as conn:
await conn.run_sync(Base.metadata.drop_all)
await conn.run_sync(Base.metadata.create_all)
async with AsyncSession(async_engine) as session:
UserFactory.__async_session__ = session
await UserFactory.create_batch_async(3)
async with AsyncSession(async_engine) as session:
DepartmentFactory.__async_session__ = session
department = await DepartmentFactory.create_async(director_id=await get_director_ids())
user = await session.scalar(select(User).where(User.id == department.director_id))
assert isinstance(user, User)