Skip to content

Commit

Permalink
Make fields with defaults not required in the serialization schema by…
Browse files Browse the repository at this point in the history
… default (#7275)

Co-authored-by: Marcelo Trylesinski <marcelotryle@gmail.com>
  • Loading branch information
dmontagu and Kludex committed Sep 18, 2023
1 parent 97350c4 commit 3f6f847
Show file tree
Hide file tree
Showing 7 changed files with 197 additions and 30 deletions.
100 changes: 99 additions & 1 deletion docs/usage/model_config.md
Expand Up @@ -658,7 +658,7 @@ except NameError as e:

## Hide Input in Errors

_Pydantic_ shows the input value and type when it raises `ValidationError` during the validation.
Pydantic shows the input value and type when it raises `ValidationError` during the validation.

```py
from pydantic import BaseModel, ValidationError
Expand Down Expand Up @@ -701,3 +701,101 @@ except ValidationError as e:
Input should be a valid string [type=string_type]
"""
```

## JSON schema customization

#### Mark fields with default values as required in the serialization schema

By default, the JSON schema generated for serialization will mark fields as **not-required**, even if they
have a default value that would always be included during serialization. This has the benefit that most
typical types will have the same JSON schema for both validation and serialization, but has the downside
that you can often guarantee that fields will be present when dumping a model even if they don't need to
be included when initializing, and the JSON schema doesn't reflect that.

If you want to opt into having the serialization schema mark fields as required even if they have a default value,
you can set the config setting to `json_schema_serialization_defaults_required=True`:

```py
from pydantic import BaseModel, ConfigDict


class Model(BaseModel):
a: str = 'a'

model_config = ConfigDict(json_schema_serialization_defaults_required=True)


print(Model.model_json_schema(mode='validation'))
"""
{
'properties': {'a': {'default': 'a', 'title': 'A', 'type': 'string'}},
'title': 'Model',
'type': 'object',
}
"""
print(Model.model_json_schema(mode='serialization'))
"""
{
'properties': {'a': {'default': 'a', 'title': 'A', 'type': 'string'}},
'required': ['a'],
'title': 'Model',
'type': 'object',
}
"""
```

#### Override `mode` on JSON schema generation

If you want to be able to force a model to always use a specific mode when generating a JSON schema (even if the
mode is explicitly specified as a different value in the JSON schema generation calls), this can be done by setting
the config setting `json_schema_mode_override='serialization'` or `json_schema_mode_override='validation'`:

```py
from pydantic import BaseModel, ConfigDict, Json


class Model(BaseModel):
a: Json[int] # requires a string to validate, but will dump an int


print(Model.model_json_schema(mode='serialization'))
"""
{
'properties': {'a': {'title': 'A', 'type': 'integer'}},
'required': ['a'],
'title': 'Model',
'type': 'object',
}
"""


class ForceInputModel(Model):
# the following ensures that even with mode='serialization', we
# will get the schema that would be generated for validation.
model_config = ConfigDict(json_schema_mode_override='validation')


print(ForceInputModel.model_json_schema(mode='serialization'))
"""
{
'properties': {
'a': {
'contentMediaType': 'application/json',
'contentSchema': {'type': 'integer'},
'title': 'A',
'type': 'string',
}
},
'required': ['a'],
'title': 'ForceInputModel',
'type': 'object',
}
"""
```

This can be useful when using frameworks (such as FastAPI) that may generate different schemas for validation
and serialization that must both be referenced from the same schema; when this happens, we automatically append
`-Input` to the definition reference for the validation schema and `-Output` to the definition reference for the
serialization schema. By specifying a `json_schema_mode_override` though, this prevents the conflict between
the validation and serialization schemas (since both will use the specified schema), and so prevents the suffixes
from being added to the definition references.
4 changes: 4 additions & 0 deletions pydantic/_internal/_config.py
Expand Up @@ -76,6 +76,8 @@ class ConfigWrapper:
hide_input_in_errors: bool
defer_build: bool
schema_generator: type[GenerateSchema] | None
json_schema_serialization_defaults_required: bool
json_schema_mode_override: Literal['validation', 'serialization', None]

def __init__(self, config: ConfigDict | dict[str, Any] | type[Any] | None, *, check: bool = True):
if check:
Expand Down Expand Up @@ -239,6 +241,8 @@ def _context_manager() -> Iterator[None]:
json_encoders=None,
defer_build=False,
schema_generator=None,
json_schema_serialization_defaults_required=False,
json_schema_mode_override=None,
)


Expand Down
26 changes: 26 additions & 0 deletions pydantic/config.py
Expand Up @@ -207,5 +207,31 @@ class without an annotation and has a type that is not in this tuple (or otherwi
Defaults to `None`.
"""

json_schema_serialization_defaults_required: bool
"""
Whether fields with default values should be marked as required in the serialization schema.
This ensures that the serialization schema will reflect the fact a field with a default will always be present
when serializing the model, even though it is not required for validation.
However, there are scenarios where this may be undesirable — in particular, if you want to share the schema
between validation and serialization, and don't mind fields with defaults being marked as not required during
serialization. See [#7209](https://github.com/pydantic/pydantic/issues/7209) for more details.
Defaults to `False`.
"""

json_schema_mode_override: Literal['validation', 'serialization', None]
"""
If not `None`, the specified mode will be used to generate the JSON schema regardless of what `mode` was passed to
the function call.
This provides a way to force the JSON schema generation to reflect a specific mode, e.g., to always use the
validation schema, even if a framework (like FastAPI) might be indicating to use the serialization schema in some
places.
Defaults to `None`.
"""


__getattr__ = getattr_migration(__name__)
21 changes: 13 additions & 8 deletions pydantic/json_schema.py
Expand Up @@ -276,7 +276,7 @@ def __init__(self, by_alias: bool = True, ref_template: str = DEFAULT_REF_TEMPLA
self.definitions: dict[DefsRef, JsonSchemaValue] = {}
self._config_wrapper_stack = _config.ConfigWrapperStack(_config.ConfigWrapper({}))

self.mode: JsonSchemaMode = 'validation'
self._mode: JsonSchemaMode = 'validation'

# The following includes a mapping of a fully-unique defs ref choice to a list of preferred
# alternatives, which are generally simpler, such as only including the class name.
Expand Down Expand Up @@ -304,6 +304,13 @@ def __init__(self, by_alias: bool = True, ref_template: str = DEFAULT_REF_TEMPLA
def _config(self) -> _config.ConfigWrapper:
return self._config_wrapper_stack.tail

@property
def mode(self) -> JsonSchemaMode:
if self._config.json_schema_mode_override is not None:
return self._config.json_schema_mode_override
else:
return self._mode

def build_schema_type_to_method(
self,
) -> dict[CoreSchemaOrFieldType, Callable[[CoreSchemaOrField], JsonSchemaValue]]:
Expand Down Expand Up @@ -363,14 +370,14 @@ def generate_definitions(
)

for key, mode, schema in inputs:
self.mode = mode
self._mode = mode
self.generate_inner(schema)

definitions_remapping = self._build_definitions_remapping()

json_schemas_map: dict[tuple[JsonSchemaKeyT, JsonSchemaMode], DefsRef] = {}
for key, mode, schema in inputs:
self.mode = mode
self._mode = mode
json_schema = self.generate_inner(schema)
json_schemas_map[(key, mode)] = definitions_remapping.remap_json_schema(json_schema)

Expand All @@ -392,7 +399,7 @@ def generate(self, schema: CoreSchema, mode: JsonSchemaMode = 'validation') -> J
Raises:
PydanticUserError: If the JSON schema generator has already been used to generate a JSON schema.
"""
self.mode = mode
self._mode = mode
if self._used:
raise PydanticUserError(
'This JSON schema generator has already been used to generate a JSON schema. '
Expand Down Expand Up @@ -1442,15 +1449,13 @@ def field_is_required(
Returns:
`True` if the field should be marked as required in the generated JSON schema, `False` otherwise.
"""
if self.mode == 'serialization':
if self.mode == 'serialization' and self._config.json_schema_serialization_defaults_required:
return not field.get('serialization_exclude')
elif self.mode == 'validation':
else:
if field['type'] == 'typed-dict-field':
return field.get('required', total)
else:
return field['schema']['type'] != 'default'
else:
assert_never(self.mode)

def dataclass_args_schema(self, schema: core_schema.DataclassArgsSchema) -> JsonSchemaValue:
"""Generates a JSON schema that matches a schema that defines a dataclass's constructor arguments.
Expand Down
2 changes: 1 addition & 1 deletion pydantic/types.py
Expand Up @@ -713,7 +713,7 @@ class Model(BaseModel):
path.unlink()
path = Path('directory')
path.mkdir()
path.mkdir(exist_ok=True)
try:
Model(f='directory') # directory
except ValidationError as e:
Expand Down
24 changes: 13 additions & 11 deletions tests/test_fastapi.sh
Expand Up @@ -18,14 +18,16 @@ cd .. && pip install . && cd fastapi
# To skip a specific test, add '--deselect path/to/test.py::test_name' to the end of this command
#
# To update the list of deselected tests, remove all deselections, run the tests, and re-add any remaining failures
./scripts/test.sh \
--deselect tests/test_tutorial/test_body_updates/test_tutorial001.py \
--deselect tests/test_tutorial/test_body_updates/test_tutorial001_py310.py \
--deselect tests/test_tutorial/test_body_updates/test_tutorial001_py39.py \
--deselect tests/test_tutorial/test_dataclasses/test_tutorial003.py \
--deselect tests/test_tutorial/test_path_operation_advanced_configurations/test_tutorial004.py \
--deselect tests/test_tutorial/test_path_operation_configurations/test_tutorial005.py \
--deselect tests/test_tutorial/test_path_operation_configurations/test_tutorial005_py310.py \
--deselect tests/test_tutorial/test_path_operation_configurations/test_tutorial005_py39.py \
--deselect tests/test_multi_body_errors.py::test_jsonable_encoder_requiring_error \
--deselect tests/test_multi_body_errors.py::test_put_incorrect_body_multiple \
./scripts/test.sh -vv \
--deselect tests/test_openapi_separate_input_output_schemas.py::test_openapi_schema \
--deselect tests/test_tutorial/test_body_updates/test_tutorial001.py::test_openapi_schema \
--deselect tests/test_tutorial/test_body_updates/test_tutorial001_py310.py::test_openapi_schema \
--deselect tests/test_tutorial/test_body_updates/test_tutorial001_py39.py::test_openapi_schema \
--deselect tests/test_tutorial/test_dataclasses/test_tutorial003.py::test_openapi_schema \
--deselect tests/test_tutorial/test_path_operation_advanced_configurations/test_tutorial004.py::test_openapi_schema \
--deselect tests/test_tutorial/test_path_operation_configurations/test_tutorial005.py::test_openapi_schema \
--deselect tests/test_tutorial/test_path_operation_configurations/test_tutorial005_py310.py::test_openapi_schema \
--deselect tests/test_tutorial/test_path_operation_configurations/test_tutorial005_py39.py::test_openapi_schema \
--deselect tests/test_tutorial/test_separate_openapi_schemas/test_tutorial001.py::test_openapi_schema \
--deselect tests/test_tutorial/test_separate_openapi_schemas/test_tutorial001_py310.py::test_openapi_schema \
--deselect tests/test_tutorial/test_separate_openapi_schemas/test_tutorial001_py39.py::test_openapi_schema \
50 changes: 41 additions & 9 deletions tests/test_json_schema.py
Expand Up @@ -521,7 +521,6 @@ class Model(BaseModel):
'a': {'default': 'foobar', 'format': 'binary', 'title': 'A', 'type': 'string'},
'b': {'default': '12.34', 'title': 'B', 'type': 'string'},
},
'required': ['a', 'b'],
'title': 'Model',
'type': 'object',
}
Expand Down Expand Up @@ -1695,7 +1694,6 @@ class Model(BaseModel):
# insert_assert(Model.model_json_schema(mode='serialization'))
assert Model.model_json_schema(mode='serialization') == {
'properties': properties,
'required': ['duration'],
'title': 'Model',
'type': 'object',
}
Expand All @@ -1717,7 +1715,6 @@ class Model(BaseModel):
# insert_assert(Model.model_json_schema(mode='serialization'))
assert Model.model_json_schema(mode='serialization') == {
'properties': properties,
'required': ['data'],
'title': 'Model',
'type': 'object',
}
Expand All @@ -1740,7 +1737,6 @@ class Dataclass:
# insert_assert(TypeAdapter(Dataclass).json_schema(mode='serialization'))
assert TypeAdapter(Dataclass).json_schema(mode='serialization') == {
'properties': properties,
'required': ['duration'],
'title': 'Dataclass',
'type': 'object',
}
Expand All @@ -1761,7 +1757,6 @@ class Dataclass:
# insert_assert(TypeAdapter(Dataclass).json_schema(mode='serialization'))
assert TypeAdapter(Dataclass).json_schema(mode='serialization') == {
'properties': properties,
'required': ['data'],
'title': 'Dataclass',
'type': 'object',
}
Expand All @@ -1785,7 +1780,6 @@ class MyTypedDict(TypedDict):
# insert_assert(TypeAdapter(MyTypedDict).json_schema(mode='serialization'))
assert TypeAdapter(MyTypedDict).json_schema(mode='serialization') == {
'properties': properties,
'required': ['duration'],
'title': 'MyTypedDict',
'type': 'object',
}
Expand All @@ -1807,7 +1801,6 @@ class MyTypedDict(TypedDict):
# insert_assert(TypeAdapter(MyTypedDict).json_schema(mode='serialization'))
assert TypeAdapter(MyTypedDict).json_schema(mode='serialization') == {
'properties': properties,
'required': ['data'],
'title': 'MyTypedDict',
'type': 'object',
}
Expand Down Expand Up @@ -1928,7 +1921,6 @@ class Foo(BaseModel):
'title': 'Foo',
'type': 'object',
'properties': {'a': {'title': 'A title', 'description': 'A description', 'default': 'foo'}},
'required': ['a'],
}

expected_schema['properties']['a'].update(expected_extra)
Expand Down Expand Up @@ -4755,7 +4747,7 @@ class Model(BaseModel):
}
assert Model.model_json_schema(mode='serialization', schema_generator=MyGenerateJsonSchema) == {
'properties': {'x': {'title': 'X', 'type': 'integer'}, 'y': {'title': 'Y', 'type': 'integer'}},
'required': ['x'],
'required': ['x', 'y'],
'title': 'Model',
'type': 'object',
}
Expand Down Expand Up @@ -5581,3 +5573,43 @@ class MyEnum(Enum):

# insert_assert(ta.json_schema())
assert ta.json_schema() == {'enum': [[1, 2], [2, 3]], 'title': 'MyEnum', 'type': 'array'}


def test_json_schema_serialization_defaults_required():
class Model(BaseModel):
a: str = 'a'

class SerializationDefaultsRequiredModel(Model):
model_config = ConfigDict(json_schema_serialization_defaults_required=True)

model_schema = Model.model_json_schema(mode='serialization')
sdr_model_schema = SerializationDefaultsRequiredModel.model_json_schema(mode='serialization')

assert 'required' not in model_schema
assert sdr_model_schema['required'] == ['a']


def test_json_schema_mode_override():
class Model(BaseModel):
a: Json[int] # requires a string to validate, but will dump an int

class ValidationModel(Model):
model_config = ConfigDict(json_schema_mode_override='validation', title='Model')

class SerializationModel(Model):
model_config = ConfigDict(json_schema_mode_override='serialization', title='Model')

# Ensure the ValidationModel and SerializationModel schemas do not depend on the value of the mode
assert ValidationModel.model_json_schema(mode='validation') == ValidationModel.model_json_schema(
mode='serialization'
)
assert SerializationModel.model_json_schema(mode='validation') == SerializationModel.model_json_schema(
mode='serialization'
)

# Ensure the two submodels models have different JSON schemas
assert ValidationModel.model_json_schema() != SerializationModel.model_json_schema()

# Ensure the submodels' JSON schemas match the expected mode even when the opposite value is specified:
assert ValidationModel.model_json_schema(mode='serialization') == Model.model_json_schema(mode='validation')
assert SerializationModel.model_json_schema(mode='validation') == Model.model_json_schema(mode='serialization')

0 comments on commit 3f6f847

Please sign in to comment.