Skip to content

Commit

Permalink
Adopt jiter 0.2.0 (#1250)
Browse files Browse the repository at this point in the history
Co-authored-by: David Hewitt <mail@davidhewitt.dev>
  • Loading branch information
samuelcolvin and davidhewitt committed Apr 2, 2024
1 parent 1c4baac commit e73b2d1
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 45 deletions.
5 changes: 3 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Expand Up @@ -44,7 +44,7 @@ base64 = "0.21.7"
num-bigint = "0.4.4"
python3-dll-a = "0.2.7"
uuid = "1.7.0"
jiter = { version = "0.1.1", features = ["python"] }
jiter = { version = "0.2.1", features = ["python"] }

[lib]
name = "_pydantic_core"
Expand Down
10 changes: 2 additions & 8 deletions src/input/return_enums.rs
Expand Up @@ -19,7 +19,7 @@ use serde::{ser::Error, Serialize, Serializer};
use crate::errors::{
py_err_string, ErrorType, ErrorTypeDefaults, InputValue, ToErrorValue, ValError, ValLineError, ValResult,
};
use crate::tools::{extract_i64, py_err};
use crate::tools::{extract_i64, new_py_string, py_err};
use crate::validators::{CombinedValidator, Exactness, ValidationState, Validator};

use super::{py_error_on_minusone, BorrowInput, Input};
Expand Down Expand Up @@ -437,13 +437,7 @@ impl<'a> EitherString<'a> {

pub fn as_py_string(&'a self, py: Python<'a>, cache_str: StringCacheMode) -> Bound<'a, PyString> {
match self {
Self::Cow(cow) => {
if matches!(cache_str, StringCacheMode::All) {
jiter::cached_py_string(py, cow.as_ref())
} else {
PyString::new_bound(py, cow.as_ref())
}
}
Self::Cow(cow) => new_py_string(py, cow.as_ref(), cache_str),
Self::Py(py_string) => py_string.clone(),
}
}
Expand Down
55 changes: 26 additions & 29 deletions src/input/shared.rs
Expand Up @@ -2,7 +2,7 @@ use pyo3::prelude::*;
use pyo3::sync::GILOnceCell;
use pyo3::{intern, Py, PyAny, Python};

use num_bigint::BigInt;
use jiter::{JsonErrorType, NumberInt};

use crate::errors::{ErrorTypeDefaults, ValError, ValResult};

Expand Down Expand Up @@ -68,29 +68,24 @@ fn strip_underscores(s: &str) -> Option<String> {
}

/// parse a string as an int
///
/// max length of the input is 4300, see
/// https://docs.python.org/3/whatsnew/3.11.html#other-cpython-implementation-changes and
/// https://github.com/python/cpython/issues/95778 for more info in that length bound
pub fn str_as_int<'py>(input: &(impl Input<'py> + ?Sized), str: &str) -> ValResult<EitherInt<'py>> {
let str = str.trim();
let len = str.len();
if len > 4300 {
Err(ValError::new(ErrorTypeDefaults::IntParsingSize, input))
} else if let Some(int) = _parse_str(input, str, len) {
Ok(int)
} else if let Some(str_stripped) = strip_decimal_zeros(str) {
if let Some(int) = _parse_str(input, str_stripped, len) {
Ok(int)
} else {
Err(ValError::new(ErrorTypeDefaults::IntParsing, input))

// we have to call `NumberInt::try_from` directly first so we fail fast if the string is too long
match NumberInt::try_from(str.as_bytes()) {
Ok(NumberInt::Int(i)) => return Ok(EitherInt::I64(i)),
Ok(NumberInt::BigInt(i)) => return Ok(EitherInt::BigInt(i)),
Err(e) => {
if e.error_type == JsonErrorType::NumberOutOfRange {
return Err(ValError::new(ErrorTypeDefaults::IntParsingSize, input));
}
}
}

if let Some(str_stripped) = strip_decimal_zeros(str) {
_parse_str(input, str_stripped)
} else if let Some(str_stripped) = strip_underscores(str) {
if let Some(int) = _parse_str(input, &str_stripped, len) {
Ok(int)
} else {
Err(ValError::new(ErrorTypeDefaults::IntParsing, input))
}
_parse_str(input, &str_stripped)
} else {
Err(ValError::new(ErrorTypeDefaults::IntParsing, input))
}
Expand All @@ -108,16 +103,18 @@ pub fn str_as_float<'py>(input: &(impl Input<'py> + ?Sized), str: &str) -> ValRe
}

/// parse a string as an int, `input` is required here to get lifetimes to match up
///
fn _parse_str<'py>(_input: &(impl Input<'py> + ?Sized), str: &str, len: usize) -> Option<EitherInt<'py>> {
if len < 19 {
if let Ok(i) = str.parse::<i64>() {
return Some(EitherInt::I64(i));
}
} else if let Ok(i) = str.parse::<BigInt>() {
return Some(EitherInt::BigInt(i));
/// max length of the input is 4300 which is checked by jiter, see
/// https://docs.python.org/3/whatsnew/3.11.html#other-cpython-implementation-changes and
/// https://github.com/python/cpython/issues/95778 for more info in that length bound
fn _parse_str<'py>(input: &(impl Input<'py> + ?Sized), str: &str) -> ValResult<EitherInt<'py>> {
match NumberInt::try_from(str.as_bytes()) {
Ok(jiter::NumberInt::Int(i)) => Ok(EitherInt::I64(i)),
Ok(jiter::NumberInt::BigInt(i)) => Ok(EitherInt::BigInt(i)),
Err(e) => match e.error_type {
JsonErrorType::NumberOutOfRange => Err(ValError::new(ErrorTypeDefaults::IntParsingSize, input)),
_ => Err(ValError::new(ErrorTypeDefaults::IntParsing, input)),
},
}
None
}

/// we don't want to parse as f64 then call `float_as_int` as it can loose precision for large ints, therefore
Expand Down
12 changes: 12 additions & 0 deletions src/tools.rs
Expand Up @@ -5,6 +5,8 @@ use pyo3::prelude::*;
use pyo3::types::{PyDict, PyString};
use pyo3::{ffi, intern, FromPyObject};

use jiter::{cached_py_string, pystring_fast_new, StringCacheMode};

pub trait SchemaDict<'py> {
fn get_as<T>(&self, key: &Bound<'_, PyString>) -> PyResult<Option<T>>
where
Expand Down Expand Up @@ -143,3 +145,13 @@ pub fn extract_i64(v: &Bound<'_, PyAny>) -> Option<i64> {
None
}
}

pub(crate) fn new_py_string<'py>(py: Python<'py>, s: &str, cache_str: StringCacheMode) -> Bound<'py, PyString> {
// we could use `bytecount::num_chars(s.as_bytes()) == s.len()` as orjson does, but it doesn't appear to be faster
let ascii_only = false;
if matches!(cache_str, StringCacheMode::All) {
cached_py_string(py, s, ascii_only)
} else {
pystring_fast_new(py, s, ascii_only)
}
}
7 changes: 2 additions & 5 deletions src/validators/validation_state.rs
Expand Up @@ -4,6 +4,7 @@ use pyo3::types::PyString;
use jiter::StringCacheMode;

use crate::recursion_guard::{ContainsRecursionState, RecursionState};
use crate::tools::new_py_string;

use super::Extra;

Expand Down Expand Up @@ -72,11 +73,7 @@ impl<'a, 'py> ValidationState<'a, 'py> {
}

pub fn maybe_cached_str(&self, py: Python<'py>, s: &str) -> Bound<'py, PyString> {
if matches!(self.extra.cache_str, StringCacheMode::All) {
jiter::cached_py_string(py, s)
} else {
PyString::new_bound(py, s)
}
new_py_string(py, s, self.extra.cache_str)
}
}

Expand Down

0 comments on commit e73b2d1

Please sign in to comment.