Factory Fields

The factory api is designed to be as semantic and simple as possible, and by default it requires no customization to mock data. Nonetheless, when you do need to customize and control the data being generated, polyfactory has you covered. Lets look at a few examples:

Declaring a PersonFactory with hardcoded pets
from dataclasses import dataclass
from datetime import date, datetime
from enum import Enum
from typing import Any, Dict, List, Union
from uuid import UUID

from polyfactory.factories import DataclassFactory


class Species(str, Enum):
    CAT = "Cat"
    DOG = "Dog"


@dataclass
class Pet:
    name: str
    species: Species
    sound: str


@dataclass
class Person:
    id: UUID
    name: str
    hobbies: List[str]
    age: Union[float, int]
    birthday: Union[datetime, date]
    pets: List[Pet]
    assets: List[Dict[str, Dict[str, Any]]]


pet_instance = Pet(name="Roxy", sound="woof woof", species=Species.DOG)


class PersonFactory(DataclassFactory[Person]):
    pets = [pet_instance]


def test_is_pet_instance() -> None:
    person_instance = PersonFactory.build()
    assert len(person_instance.pets) == 1
    assert person_instance.pets[0] == pet_instance

In the example above, the call to PersonFactory.build() results in a Person where all values are randomly generated, except the pets list, which will be the hardcoded value we defined.

The Use Field

This though is often not desirable. We could instead, define a factory for Pet where we restrict the choices to a range we like. For example:

Using the Use field with a custom PetFactory to control the generation of a Person’s pets list
from dataclasses import dataclass
from datetime import date, datetime
from enum import Enum
from typing import Any, Dict, List, Union
from uuid import UUID

from polyfactory import Use
from polyfactory.factories import DataclassFactory


class Species(str, Enum):
    CAT = "Cat"
    DOG = "Dog"


@dataclass
class Pet:
    name: str
    species: Species
    sound: str


@dataclass
class Person:
    id: UUID
    name: str
    hobbies: List[str]
    age: Union[float, int]
    birthday: Union[datetime, date]
    pets: List[Pet]
    assets: List[Dict[str, Dict[str, Any]]]


class PetFactory(DataclassFactory[Pet]):
    name = Use(DataclassFactory.__random__.choice, ["Ralph", "Roxy"])
    species = Use(DataclassFactory.__random__.choice, list(Species))


class PersonFactory(DataclassFactory[Person]):
    pets = Use(PetFactory.batch, size=2)


def test_pet_choices() -> None:
    person_instance = PersonFactory.build()

    assert len(person_instance.pets) == 2
    assert all(pet.name in ["Ralph", "Roxy"] for pet in person_instance.pets)

The Use class is merely a semantic abstraction that makes the factory cleaner and simpler to understand, you can in fact use any callable (including classes) as values for a factory’s attribute directly, and these will be invoked at build-time. Thus, you could for example re-write the above PetFactory like so:

Using simple lambda functions to declare custom fields
from dataclasses import dataclass
from datetime import date, datetime
from enum import Enum
from typing import Any, Dict, List, Union
from uuid import UUID

from polyfactory import Use
from polyfactory.factories import DataclassFactory


class Species(str, Enum):
    CAT = "Cat"
    DOG = "Dog"


@dataclass
class Pet:
    name: str
    species: Species
    sound: str


@dataclass
class Person:
    id: UUID
    name: str
    hobbies: List[str]
    age: Union[float, int]
    birthday: Union[datetime, date]
    pets: List[Pet]
    assets: List[Dict[str, Dict[str, Any]]]


class PetFactory(DataclassFactory[Pet]):
    name = lambda: DataclassFactory.__random__.choice(["Ralph", "Roxy"])
    species = lambda: DataclassFactory.__random__.choice(list(Species))


class PersonFactory(DataclassFactory[Person]):
    pets = Use(PetFactory.batch, size=2)


def test_pet_choices() -> None:
    person_instance = PersonFactory.build()

    assert len(person_instance.pets) == 2
    assert all(pet.name in ["Ralph", "Roxy"] for pet in person_instance.pets)

Or you can use a class method, which will give you easy and nice access to the factory itself:

Using class methods to declare custom fields
from dataclasses import dataclass
from datetime import date, datetime
from enum import Enum
from typing import Any, Dict, List, Union
from uuid import UUID

from polyfactory import Use
from polyfactory.factories import DataclassFactory


class Species(str, Enum):
    CAT = "Cat"
    DOG = "Dog"


@dataclass
class Pet:
    name: str
    species: Species
    sound: str


@dataclass
class Person:
    id: UUID
    name: str
    hobbies: List[str]
    age: Union[float, int]
    birthday: Union[datetime, date]
    pets: List[Pet]
    assets: List[Dict[str, Dict[str, Any]]]


class PetFactory(DataclassFactory[Pet]):
    @classmethod
    def name(cls) -> str:
        return cls.__random__.choice(["Ralph", "Roxy"])

    @classmethod
    def species(cls) -> str:
        return cls.__random__.choice(list(Species))


class PersonFactory(DataclassFactory[Person]):
    pets = Use(PetFactory.batch, size=2)


def test_pet_choices() -> None:
    person_instance = PersonFactory.build()

    assert len(person_instance.pets) == 2
    assert all(pet.name in ["Ralph", "Roxy"] for pet in person_instance.pets)

Note

All the above examples used DataclassFactory.__random__.choice, and this is intentional. While you can use random.choice or any other function exported from the stdlib random library, the factory class has its own instance of random.Random attached under cls.__random__. This instance can be affected by random seeding in several ways, e.g. calling the factory seeding method, which will be scoped only to this instance. Thus, for consistent results when seeding randomness, its important to use the factory random.Random instance rather than the global one from the stdlib.

The Ignore Field

Ignore is used to designate an attribute as ignored, which means it will be completely ignored by the factory:

Using the Ignore field
from typing import TypedDict

from polyfactory import Ignore
from polyfactory.factories import TypedDictFactory


class Person(TypedDict):
    id: int
    name: str


class PersonFactory(TypedDictFactory[Person]):
    id = Ignore()


def test_id_is_ignored() -> None:
    person_instance = PersonFactory.build()

    assert person_instance.get("name")
    assert person_instance.get("id") is None

The Require Field

The Require class is used to designate a given attribute as a required kwarg. This means that the factory will require passing a value for this attribute as a kwarg to the build method, or an exception will be raised:

Using the Require field
from typing import TypedDict

import pytest

from polyfactory import Require
from polyfactory.exceptions import MissingBuildKwargException
from polyfactory.factories import TypedDictFactory


class Person(TypedDict):
    id: int
    name: str


class PersonFactory(TypedDictFactory[Person]):
    id = Require()


def test_id_is_required() -> None:
    # this will not raise an exception
    person_instance = PersonFactory.build(id=1)

    assert person_instance.get("name")
    assert person_instance.get("id") == 1

    # but when no kwarg is passed, an exception will be raised:
    with pytest.raises(MissingBuildKwargException):
        PersonFactory.build()

The PostGenerated Field

The PostGenerated class allows for post generating fields based on already generated values of other (non post generated) fields. In most cases this pattern is best avoided, but for the few valid cases the PostGenerated helper is provided. For example:

Using the PostGenerated field
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from typing import Any, Dict

from polyfactory import PostGenerated
from polyfactory.factories import DataclassFactory


def add_timedelta(name: str, values: Dict[str, datetime], *args: Any, **kwargs: Any) -> datetime:
    delta = timedelta(days=1)
    return values["from_dt"] + delta


@dataclass
class DatetimeRange:
    to_dt: datetime
    from_dt: datetime = field(default_factory=datetime.now)


class DatetimeRangeFactory(DataclassFactory[DatetimeRange]):
    to_dt = PostGenerated(add_timedelta)


def test_post_generated() -> None:
    date_range_instance = DatetimeRangeFactory.build()
    assert date_range_instance.to_dt == date_range_instance.from_dt + timedelta(days=1)

The signature for use is: cb: Callable, *args, **defaults it can receive any sync callable. The signature for the callable should be: name: str, values: dict[str, Any], *args, **defaults. The already generated values are mapped by name in the values dictionary.

The Param Field

The Param class denotes a parameter that can be referenced by other fields at build but whose value is not set on the final object. This is useful for passing values needed by other factory fields but that are not part of object being built.

A Param type can be either a constant or a callable. If a callable is used, it will be executed at the beginning of build and its return value will be used as the value for the field. Optional keyword arguments may be passed to the callable as part of the field definition on the factory. Any additional keyword arguments passed to the Param constructor will also not be mapped into the final object.

The Param type allows for flexibility in that it can either accept a value at the definition of the factory, or its value can be set at build time. If a value is provided at build time, it will take precedence over the value provided at the definition of the factory (if any).

If neither a value is provided at the definition of the factory nor at build time, an exception will be raised. Likewise, a Param cannot have the same name as any other model field.

Using the Param field with a constant
from dataclasses import dataclass
from typing import List

from polyfactory.decorators import post_generated
from polyfactory.factories import DataclassFactory
from polyfactory.fields import Param


@dataclass
class Pet:
    name: str
    sound: str


class PetFactoryWithParamValueSetAtBuild(DataclassFactory[Pet]):
    """In this factory, the name_choices must be passed at build time."""

    name_choices = Param[List[str]]()

    @post_generated
    @classmethod
    def name(cls, name_choices: List[str]) -> str:
        return cls.__random__.choice(name_choices)


def test_factory__build_time() -> None:
    names = ["Ralph", "Roxy"]
    pet = PetFactoryWithParamValueSetAtBuild.build(name_choices=names)

    assert isinstance(pet, Pet)
    assert not hasattr(pet, "name_choices")
    assert pet.name in names


class PetFactoryWithParamSpecififiedInFactory(DataclassFactory[Pet]):
    """In this factory, the name_choices are specified in the
    factory and do not need to be passed at build time."""

    name_choices = Param[List[str]](["Ralph", "Roxy"])

    @post_generated
    @classmethod
    def name(cls, name_choices: List[str]) -> str:
        return cls.__random__.choice(name_choices)


def test_factory__in_factory() -> None:
    pet = PetFactoryWithParamSpecififiedInFactory.build()

    assert isinstance(pet, Pet)
    assert not hasattr(pet, "name_choices")
    assert pet.name in ["Ralph", "Roxy"]
Using the Param field with a callable
from dataclasses import dataclass

from polyfactory.decorators import post_generated
from polyfactory.factories import DataclassFactory
from polyfactory.fields import Param


@dataclass
class Person:
    name: str
    age_next_year: int


class PersonFactoryWithParamValueSpecifiedInFactory(DataclassFactory[Person]):
    """In this factory, the next_years_age_from_calculator must be passed at build time."""

    next_years_age_from_calculator = Param[int](lambda age: age + 1, is_callable=True, age=20)

    @post_generated
    @classmethod
    def age_next_year(cls, next_years_age_from_calculator: int) -> int:
        return next_years_age_from_calculator


def test_factory__in_factory() -> None:
    person = PersonFactoryWithParamValueSpecifiedInFactory.build()

    assert isinstance(person, Person)
    assert not hasattr(person, "next_years_age_from_calculator")
    assert person.age_next_year == 21


class PersonFactoryWithParamValueSetAtBuild(DataclassFactory[Person]):
    """In this factory, the next_years_age_from_calculator must be passed at build time."""

    next_years_age_from_calculator = Param[int](is_callable=True, age=20)

    @post_generated
    @classmethod
    def age_next_year(cls, next_years_age_from_calculator: int) -> int:
        return next_years_age_from_calculator


def test_factory__build_time() -> None:
    person = PersonFactoryWithParamValueSpecifiedInFactory.build(next_years_age_from_calculator=lambda age: age + 1)

    assert isinstance(person, Person)
    assert not hasattr(person, "next_years_age_from_calculator")
    assert person.age_next_year == 21

Factories as Fields

Factories themselves can be used as fields. In this usage, build parameters will be passed to the declared factory.

Using a factory as a field
from dataclasses import dataclass
from datetime import date, datetime
from enum import Enum
from typing import Any, Dict, List, Union
from uuid import UUID

from polyfactory.factories import DataclassFactory


class Species(str, Enum):
    CAT = "Cat"
    DOG = "Dog"


@dataclass
class Pet:
    name: str
    species: Species
    sound: str


@dataclass
class Person:
    id: UUID
    name: str
    hobbies: List[str]
    age: Union[float, int]
    birthday: Union[datetime, date]
    pet: Pet
    assets: List[Dict[str, Dict[str, Any]]]


class PetFactory(DataclassFactory[Pet]):
    name = lambda: DataclassFactory.__random__.choice(["Ralph", "Roxy"])


class PersonFactory(DataclassFactory[Person]):
    pet = PetFactory


def test_subfactory() -> None:
    person_instance = PersonFactory.build()

    assert isinstance(person_instance.pet, Pet)
    assert person_instance.pet.name in ["Ralph", "Roxy"]

    person_instance_with_pet_name = PersonFactory.build(pet={"name": "Winston"})
    assert person_instance_with_pet_name.pet.name == "Winston"

Handling Asynchronous Data in Factory Fields

If you need to populate a factory field with data pre-fetched asynchronously (e.g., from a database using an ORM like SQLAlchemy or Beanie), the recommended approach is to handle the asynchronous call outside the factory and pass the resolved value as a regular argument.

SQLAlchemy example
from __future__ import annotations

from sqlalchemy import ForeignKey, select
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column

from polyfactory.factories.sqlalchemy_factory import SQLAlchemyFactory

async_engine = create_async_engine("sqlite+aiosqlite:///:memory:")


class Base(DeclarativeBase): ...


class User(Base):
    __tablename__ = "users"

    id: Mapped[int] = mapped_column(primary_key=True)


class Department(Base):
    __tablename__ = "departments"

    id: Mapped[int] = mapped_column(primary_key=True)
    director_id: Mapped[str] = mapped_column(ForeignKey("users.id"))


class UserFactory(SQLAlchemyFactory[User]): ...


class DepartmentFactory(SQLAlchemyFactory[Department]): ...


async def get_director_ids() -> int:
    async with AsyncSession(async_engine) as session:
        result = (await session.scalars(select(User.id))).all()
        return UserFactory.__random__.choice(result)


async def test_factory_with_pre_fetched_async_data() -> None:
    async with async_engine.begin() as conn:
        await conn.run_sync(Base.metadata.drop_all)
        await conn.run_sync(Base.metadata.create_all)

    async with AsyncSession(async_engine) as session:
        UserFactory.__async_session__ = session
        await UserFactory.create_batch_async(3)

    async with AsyncSession(async_engine) as session:
        DepartmentFactory.__async_session__ = session
        department = await DepartmentFactory.create_async(director_id=await get_director_ids())
        user = await session.scalar(select(User).where(User.id == department.director_id))
        assert isinstance(user, User)