Factory Fields

The factory api is designed to be as semantic and simple as possible, and by default it requires no customization to mock data. Nonetheless, when you do need to customize and control the data being generated, polyfactory has you covered. Lets look at a few examples:

Declaring a PersonFactory with hardcoded pets
from dataclasses import dataclass
from datetime import date, datetime
from enum import Enum
from typing import Any, Dict, List, Union
from uuid import UUID

from polyfactory.factories import DataclassFactory


class Species(str, Enum):
    CAT = "Cat"
    DOG = "Dog"


@dataclass
class Pet:
    name: str
    species: Species
    sound: str


@dataclass
class Person:
    id: UUID
    name: str
    hobbies: List[str]
    age: Union[float, int]
    birthday: Union[datetime, date]
    pets: List[Pet]
    assets: List[Dict[str, Dict[str, Any]]]


pet_instance = Pet(name="Roxy", sound="woof woof", species=Species.DOG)


class PersonFactory(DataclassFactory[Person]):
    pets = [pet_instance]


def test_is_pet_instance() -> None:
    person_instance = PersonFactory.build()
    assert len(person_instance.pets) == 1
    assert person_instance.pets[0] == pet_instance

In the example above, the call to PersonFactory.build() results in a Person where all values are randomly generated, except the pets list, which will be the hardcoded value we defined.

The Use Field

This though is often not desirable. We could instead, define a factory for Pet where we restrict the choices to a range we like. For example:

Using the Use field with a custom PetFactory to control the generation of a Person’s pets list
from dataclasses import dataclass
from datetime import date, datetime
from enum import Enum
from typing import Any, Dict, List, Union
from uuid import UUID

from polyfactory import Use
from polyfactory.factories import DataclassFactory


class Species(str, Enum):
    CAT = "Cat"
    DOG = "Dog"


@dataclass
class Pet:
    name: str
    species: Species
    sound: str


@dataclass
class Person:
    id: UUID
    name: str
    hobbies: List[str]
    age: Union[float, int]
    birthday: Union[datetime, date]
    pets: List[Pet]
    assets: List[Dict[str, Dict[str, Any]]]


class PetFactory(DataclassFactory[Pet]):
    name = Use(DataclassFactory.__random__.choice, ["Ralph", "Roxy"])
    species = Use(DataclassFactory.__random__.choice, list(Species))


class PersonFactory(DataclassFactory[Person]):
    pets = Use(PetFactory.batch, size=2)


def test_pet_choices() -> None:
    person_instance = PersonFactory.build()

    assert len(person_instance.pets) == 2
    assert all(pet.name in ["Ralph", "Roxy"] for pet in person_instance.pets)

The Use class is merely a semantic abstraction that makes the factory cleaner and simpler to understand, you can in fact use any callable (including classes) as values for a factory’s attribute directly, and these will be invoked at build-time. Thus, you could for example re-write the above PetFactory like so:

Using simple lambda functions to declare custom fields
from dataclasses import dataclass
from datetime import date, datetime
from enum import Enum
from typing import Any, Dict, List, Union
from uuid import UUID

from polyfactory import Use
from polyfactory.factories import DataclassFactory


class Species(str, Enum):
    CAT = "Cat"
    DOG = "Dog"


@dataclass
class Pet:
    name: str
    species: Species
    sound: str


@dataclass
class Person:
    id: UUID
    name: str
    hobbies: List[str]
    age: Union[float, int]
    birthday: Union[datetime, date]
    pets: List[Pet]
    assets: List[Dict[str, Dict[str, Any]]]


class PetFactory(DataclassFactory[Pet]):
    name = lambda: DataclassFactory.__random__.choice(["Ralph", "Roxy"])
    species = lambda: DataclassFactory.__random__.choice(list(Species))


class PersonFactory(DataclassFactory[Person]):
    pets = Use(PetFactory.batch, size=2)


def test_pet_choices() -> None:
    person_instance = PersonFactory.build()

    assert len(person_instance.pets) == 2
    assert all(pet.name in ["Ralph", "Roxy"] for pet in person_instance.pets)

Or you can use a class method, which will give you easy and nice access to the factory itself:

Using class methods to declare custom fields
from dataclasses import dataclass
from datetime import date, datetime
from enum import Enum
from typing import Any, Dict, List, Union
from uuid import UUID

from polyfactory import Use
from polyfactory.factories import DataclassFactory


class Species(str, Enum):
    CAT = "Cat"
    DOG = "Dog"


@dataclass
class Pet:
    name: str
    species: Species
    sound: str


@dataclass
class Person:
    id: UUID
    name: str
    hobbies: List[str]
    age: Union[float, int]
    birthday: Union[datetime, date]
    pets: List[Pet]
    assets: List[Dict[str, Dict[str, Any]]]


class PetFactory(DataclassFactory[Pet]):
    @classmethod
    def name(cls) -> str:
        return cls.__random__.choice(["Ralph", "Roxy"])

    @classmethod
    def species(cls) -> str:
        return cls.__random__.choice(list(Species))


class PersonFactory(DataclassFactory[Person]):
    pets = Use(PetFactory.batch, size=2)


def test_pet_choices() -> None:
    person_instance = PersonFactory.build()

    assert len(person_instance.pets) == 2
    assert all(pet.name in ["Ralph", "Roxy"] for pet in person_instance.pets)

Note

All the above examples used DataclassFactory.__random__.choice, and this is intentional. While you can use random.choice or any other function exported from the stdlib random library, the factory class has its own instance of random.Random attached under cls.__random__. This instance can be affected by random seeding in several ways, e.g. calling the factory seeding method, which will be scoped only to this instance. Thus, for consistent results when seeding randomness, its important to use the factory random.Random instance rather than the global one from the stdlib.

The Ignore Field

Ignore is used to designate an attribute as ignored, which means it will be completely ignored by the factory:

Using the Ignore field
from typing import TypedDict

from polyfactory import Ignore
from polyfactory.factories import TypedDictFactory


class Person(TypedDict):
    id: int
    name: str


class PersonFactory(TypedDictFactory[Person]):
    id = Ignore()


def test_id_is_ignored() -> None:
    person_instance = PersonFactory.build()

    assert person_instance.get("name")
    assert person_instance.get("id") is None

The Require Field

The Require class is used to designate a given attribute as a required kwarg. This means that the factory will require passing a value for this attribute as a kwarg to the build method, or an exception will be raised:

Using the Require field
from typing import TypedDict

import pytest

from polyfactory import Require
from polyfactory.exceptions import MissingBuildKwargException
from polyfactory.factories import TypedDictFactory


class Person(TypedDict):
    id: int
    name: str


class PersonFactory(TypedDictFactory[Person]):
    id = Require()


def test_id_is_required() -> None:
    # this will not raise an exception
    person_instance = PersonFactory.build(id=1)

    assert person_instance.get("name")
    assert person_instance.get("id") == 1

    # but when no kwarg is passed, an exception will be raised:
    with pytest.raises(MissingBuildKwargException):
        PersonFactory.build()

The PostGenerated Field

The PostGenerated class allows for post generating fields based on already generated values of other (non post generated) fields. In most cases this pattern is best avoided, but for the few valid cases the PostGenerated helper is provided. For example:

Using the PostGenerated field
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from typing import Any, Dict

from polyfactory import PostGenerated
from polyfactory.factories import DataclassFactory


def add_timedelta(name: str, values: Dict[str, datetime], *args: Any, **kwargs: Any) -> datetime:
    delta = timedelta(days=1)
    return values["from_dt"] + delta


@dataclass
class DatetimeRange:
    to_dt: datetime
    from_dt: datetime = field(default_factory=datetime.now)


class DatetimeRangeFactory(DataclassFactory[DatetimeRange]):
    to_dt = PostGenerated(add_timedelta)


def test_post_generated() -> None:
    date_range_instance = DatetimeRangeFactory.build()
    assert date_range_instance.to_dt == date_range_instance.from_dt + timedelta(days=1)

The signature for use is: cb: Callable, *args, **defaults it can receive any sync callable. The signature for the callable should be: name: str, values: dict[str, Any], *args, **defaults. The already generated values are mapped by name in the values dictionary.

Factories as Fields

Factories themselves can be used as fields. In this usage, build parameters will be passed to the declared factory.

Using a factory as a field
from dataclasses import dataclass
from datetime import date, datetime
from enum import Enum
from typing import Any, Dict, List, Union
from uuid import UUID

from polyfactory.factories import DataclassFactory


class Species(str, Enum):
    CAT = "Cat"
    DOG = "Dog"


@dataclass
class Pet:
    name: str
    species: Species
    sound: str


@dataclass
class Person:
    id: UUID
    name: str
    hobbies: List[str]
    age: Union[float, int]
    birthday: Union[datetime, date]
    pet: Pet
    assets: List[Dict[str, Dict[str, Any]]]


class PetFactory(DataclassFactory[Pet]):
    name = lambda: DataclassFactory.__random__.choice(["Ralph", "Roxy"])


class PersonFactory(DataclassFactory[Person]):
    pet = PetFactory


def test_subfactory() -> None:
    person_instance = PersonFactory.build()

    assert isinstance(person_instance.pet, Pet)
    assert person_instance.pet.name in ["Ralph", "Roxy"]

    person_instance_with_pet_name = PersonFactory.build(pet={"name": "Winston"})
    assert person_instance_with_pet_name.pet.name == "Winston"