Skip to content

Commit

Permalink
Add fast-path for common-case unequal objects
Browse files Browse the repository at this point in the history
  • Loading branch information
QuentinSoubeyranAqemia committed Oct 17, 2023
1 parent 0e11b10 commit 90b2d23
Showing 1 changed file with 40 additions and 25 deletions.
65 changes: 40 additions & 25 deletions pydantic/main.py
Expand Up @@ -30,7 +30,13 @@
from .config import ConfigDict
from .errors import PydanticUndefinedAnnotation, PydanticUserError
from .fields import ComputedFieldInfo, FieldInfo, ModelPrivateAttr
from .json_schema import DEFAULT_REF_TEMPLATE, GenerateJsonSchema, JsonSchemaMode, JsonSchemaValue, model_json_schema
from .json_schema import (
DEFAULT_REF_TEMPLATE,
GenerateJsonSchema,
JsonSchemaMode,
JsonSchemaValue,
model_json_schema,
)
from .warnings import PydanticDeprecatedSince20

if typing.TYPE_CHECKING:
Expand Down Expand Up @@ -912,33 +918,42 @@ def __eq__(self, other: Any) -> bool:
):
return False

# Fix GH-7444 by comparing only pydantic fields
# We provide a fast-path for performance: __dict__ comparison is *much* faster
# See tests/benchmarks/test_basemodel_eq_performances.py and GH-7825 for benchmarks
# We only want to compare pydantic fields but ignoring fields is costly.
# We'll performance fast check first, and fallback only when need
# See GH-7444 and GH-7825 for rationale and a performance benchmark

# First, do the fast (and sometimes faulty) __dict__ comparison
if self.__dict__ == other.__dict__:
# If the check above passes, then pydantic fields are equal, we can return early
return True
else:
# Else, we need to perform a more detailed, costlier comparison
# We use operator.itemgetter because it is much faster than dict comprehensions
# NOTE: Contratry to standard python class and instances, when the Model class has
# attribute default values and the model instance doesn't has a corresponding
# attribute, accessing the missing attribute raises an error in
# __getattr__ instance of returning the class attribute
# Thus, using operator.itemgetter() instead of operator.attrgetter() is valid
model_fields = type(self).model_fields.keys()
getter = operator.itemgetter(*model_fields) if model_fields else lambda _: _SENTINEL
try:
return getter(self.__dict__) == getter(other.__dict__)
except KeyError:
# In rare cases (such as when using the deprecated BaseModel.copy() method),
# the __dict__ may not contain all model fields, which is how we can get here.
# getter(self.__dict__) is much faster than any 'safe' method that accounts
# for missing keys, and wrapping it in a `try` doesn't slow things down much
# in the common case.
self_fields_proxy = _SafeGetItemProxy(self.__dict__)
other_fields_proxy = _SafeGetItemProxy(other.__dict__)
return getter(self_fields_proxy) == getter(other_fields_proxy)

# We don't want to trigger unnecessary coslty filtering of __dict__ on all unequal objects, so we return
# early if there are no keys to ignore (we would just return False later on anyway)
model_fields = type(self).model_fields.keys()
if self.__dict__.keys() <= model_fields and other.__dict__.keys() <= model_fields:
return False

# If we reach here, there are non-pydantic-fields keys, mapped to unequal values, that we need to ignore
# Resort to coslty filtering of the __dict__ objects
# We use operator.itemgetter because it is much faster than dict comprehensions
# NOTE: Contratry to standard python class and instances, when the Model class has default value for an
# attribute and the model instance doesn't have a corresponding attribute, accessing the missing attribute
# raises an error in BaseModel.__getattr__ instead of returning the class attribute
# So we can use operator.itemgetter() instead of operator.attrgetter()
getter = operator.itemgetter(*model_fields) if model_fields else lambda _: _SENTINEL
try:
return getter(self.__dict__) == getter(other.__dict__)
except KeyError:
# In rare cases (such as when using the deprecated BaseModel.copy() method),
# the __dict__ may not contain all model fields, which is how we can get here.
# getter(self.__dict__) is much faster than any 'safe' method that accounts
# for missing keys, and wrapping it in a `try` doesn't slow things down much
# in the common case.
self_fields_proxy = _SafeGetItemProxy(self.__dict__)
other_fields_proxy = _SafeGetItemProxy(other.__dict__)
return getter(self_fields_proxy) == getter(other_fields_proxy)

# other instance is not a BaseModel
else:
return NotImplemented # delegate to the other item in the comparison

Expand Down

0 comments on commit 90b2d23

Please sign in to comment.