Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace peak with more efficient peek #48

Merged
merged 6 commits into from Dec 4, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
69 changes: 41 additions & 28 deletions src/jiter.rs
@@ -1,4 +1,4 @@
use crate::errors::{JiterError, JsonType, LinePosition, DEFAULT_RECURSION_LIMIT};
use crate::errors::{json_error, JiterError, JsonType, LinePosition, DEFAULT_RECURSION_LIMIT};
use crate::number_decoder::{NumberAny, NumberFloat, NumberInt, NumberRange};
use crate::parse::{Parser, Peak};
use crate::string_decoder::{StringDecoder, StringDecoderRange, Tape};
Expand Down Expand Up @@ -110,13 +110,15 @@ impl<'j> Jiter<'j> {

/// Knowing the next value is a number, parse it.
pub fn known_number(&mut self, peak: Peak) -> JiterResult<NumberAny> {
match peak {
Peak::Num(first) => self
.parser
.consume_number::<NumberAny>(first, self.allow_inf_nan)
.map_err(Into::into),
_ => Err(self.wrong_type(JsonType::Int, peak)),
}
self.parser
.consume_number::<NumberAny>(peak.into_inner(), self.allow_inf_nan)
.map_err(|e| {
if !peak.is_num() {
self.wrong_type(JsonType::Int, peak)
} else {
e.into()
}
})
}

/// Assuming the next value is an integer, consume it. Error if it is not an integer, or is invalid JSON.
Expand All @@ -127,13 +129,15 @@ impl<'j> Jiter<'j> {

/// Knowing the next value is an integer, parse it.
pub fn known_int(&mut self, peak: Peak) -> JiterResult<NumberInt> {
match peak {
Peak::Num(first) => self
.parser
.consume_number::<NumberInt>(first, self.allow_inf_nan)
.map_err(Into::into),
_ => Err(self.wrong_type(JsonType::Int, peak)),
}
self.parser
.consume_number::<NumberInt>(peak.into_inner(), self.allow_inf_nan)
.map_err(|e| {
if !peak.is_num() {
self.wrong_type(JsonType::Int, peak)
} else {
e.into()
}
})
}

/// Assuming the next value is a float, consume it. Error if it is not a float, or is invalid JSON.
Expand All @@ -144,13 +148,15 @@ impl<'j> Jiter<'j> {

/// Knowing the next value is a float, parse it.
pub fn known_float(&mut self, peak: Peak) -> JiterResult<f64> {
match peak {
Peak::Num(first) => self
.parser
.consume_number::<NumberFloat>(first, self.allow_inf_nan)
.map_err(Into::into),
_ => Err(self.wrong_type(JsonType::Int, peak)),
}
self.parser
.consume_number::<NumberFloat>(peak.into_inner(), self.allow_inf_nan)
.map_err(|e| {
if !peak.is_num() {
self.wrong_type(JsonType::Float, peak)
} else {
e.into()
}
})
}

/// Assuming the next value is a number, consume it and return bytes from the original JSON data.
Expand All @@ -161,12 +167,18 @@ impl<'j> Jiter<'j> {

/// Knowing the next value is a number, parse it and return bytes from the original JSON data.
fn known_number_bytes(&mut self, peak: Peak) -> JiterResult<&[u8]> {
match peak {
Peak::Num(first) => {
let range = self.parser.consume_number::<NumberRange>(first, self.allow_inf_nan)?;
Ok(&self.data[range])
match self
.parser
.consume_number::<NumberRange>(peak.into_inner(), self.allow_inf_nan)
{
Ok(range) => Ok(&self.data[range]),
Err(e) => {
if !peak.is_num() {
Err(self.wrong_type(JsonType::Float, peak))
} else {
Err(e.into())
}
}
_ => Err(self.wrong_type(JsonType::Float, peak)),
}
}

Expand Down Expand Up @@ -299,9 +311,10 @@ impl<'j> Jiter<'j> {
Peak::True | Peak::False => JiterError::wrong_type(expected, JsonType::Bool, self.parser.index),
Peak::Null => JiterError::wrong_type(expected, JsonType::Null, self.parser.index),
Peak::String => JiterError::wrong_type(expected, JsonType::String, self.parser.index),
Peak::Num(first) => self.wrong_num(first, expected),
Peak::Array => JiterError::wrong_type(expected, JsonType::Array, self.parser.index),
Peak::Object => JiterError::wrong_type(expected, JsonType::Object, self.parser.index),
_ if peak.is_num() => self.wrong_num(peak.into_inner(), expected),
_ => json_error!(ExpectedSomeValue, self.parser.index).into(),
}
}

Expand Down
102 changes: 62 additions & 40 deletions src/parse.rs
Expand Up @@ -2,33 +2,60 @@ use crate::errors::{json_err, JsonResult, LinePosition};
use crate::number_decoder::AbstractNumberDecoder;
use crate::string_decoder::{AbstractStringDecoder, Tape};

/// Enum used to describe the next expected value in JSON.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Peak {
Null,
True,
False,
// we keep the first character of the number as we'll need it when decoding
Num(u8),
String,
Array,
Object,
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct Peak(u8);

#[allow(non_upper_case_globals)] // while testing
impl Peak {
pub const Null: Self = Self(b'n');
pub const True: Self = Self(b't');
pub const False: Self = Self(b'f');
pub const Zero: Self = Self(b'0');
pub const One: Self = Self(b'1');
pub const Two: Self = Self(b'2');
pub const Three: Self = Self(b'3');
pub const Four: Self = Self(b'4');
pub const Five: Self = Self(b'5');
pub const Six: Self = Self(b'6');
pub const Seven: Self = Self(b'7');
pub const Eight: Self = Self(b'8');
pub const Nine: Self = Self(b'9');
pub const Minus: Self = Self(b'-');
pub const Plus: Self = Self(b'+');
pub const Infinity: Self = Self(b'I');
pub const NaN: Self = Self(b'N');
pub const String: Self = Self(b'"');
pub const Array: Self = Self(b'[');
pub const Object: Self = Self(b'{');
}

impl Peak {
fn new(next: u8) -> Option<Self> {
match next {
b'[' => Some(Self::Array),
b'{' => Some(Self::Object),
b'"' => Some(Self::String),
b't' => Some(Self::True),
b'f' => Some(Self::False),
b'n' => Some(Self::Null),
b'0'..=b'9' => Some(Self::Num(next)),
// `-` negative, `I` Infinity, `N` NaN
b'-' | b'I' | b'N' => Some(Self::Num(next)),
_ => None,
}
const fn new(next: u8) -> Self {
Self(next)
}

pub const fn is_num(self) -> bool {
matches!(
self,
Self::Zero
| Self::One
| Self::Two
| Self::Three
| Self::Four
| Self::Five
| Self::Six
| Self::Seven
| Self::Eight
| Self::Nine
| Self::Minus
| Self::Plus
| Self::Infinity
| Self::NaN
)
}

pub const fn into_inner(self) -> u8 {
self.0
}
}

Expand Down Expand Up @@ -57,10 +84,7 @@ impl<'j> Parser<'j> {

pub fn peak(&mut self) -> JsonResult<Peak> {
if let Some(next) = self.eat_whitespace() {
match Peak::new(next) {
Some(p) => Ok(p),
None => json_err!(ExpectedSomeValue, self.index),
}
Ok(Peak::new(next))
} else {
json_err!(EofWhileParsingValue, self.index)
}
Expand All @@ -73,7 +97,7 @@ impl<'j> Parser<'j> {
self.index += 1;
Ok(None)
} else {
self.array_peak()
Ok(Some(Peak::new(next)))
}
} else {
json_err!(EofWhileParsingList, self.index)
Expand All @@ -85,7 +109,12 @@ impl<'j> Parser<'j> {
match next {
b',' => {
self.index += 1;
self.array_peak()
let next = self.array_peak()?;
if next.is_none() {
json_err!(TrailingComma, self.index)
} else {
Ok(next)
}
}
b']' => {
self.index += 1;
Expand Down Expand Up @@ -216,16 +245,9 @@ impl<'j> Parser<'j> {

fn array_peak(&mut self) -> JsonResult<Option<Peak>> {
if let Some(next) = self.eat_whitespace() {
match Peak::new(next) {
Some(p) => Ok(Some(p)),
None => {
// if next is a `]`, we have a "trailing comma" error
if next == b']' {
json_err!(TrailingComma, self.index)
} else {
json_err!(ExpectedSomeValue, self.index)
}
}
match next {
b']' => Ok(None),
_ => Ok(Some(Peak::new(next))),
}
} else {
json_err!(EofWhileParsingValue, self.index)
Expand Down
18 changes: 10 additions & 8 deletions src/python.rs
Expand Up @@ -75,14 +75,6 @@ impl<'j> PythonParser<'j> {
let s = self.parser.consume_string::<StringDecoder>(&mut self.tape)?;
Ok(StringCache::get(py, s.as_str()))
}
Peak::Num(first) => {
let n = self.parser.consume_number::<NumberAny>(first, self.allow_inf_nan)?;
match n {
NumberAny::Int(NumberInt::Int(int)) => Ok(int.to_object(py)),
NumberAny::Int(NumberInt::BigInt(big_int)) => Ok(big_int.to_object(py)),
NumberAny::Float(float) => Ok(float.to_object(py)),
}
}
Peak::Array => {
let list = if let Some(peak_first) = self.parser.array_first()? {
let mut vec: SmallVec<[PyObject; 8]> = SmallVec::with_capacity(8);
Expand Down Expand Up @@ -125,6 +117,16 @@ impl<'j> PythonParser<'j> {
}
Ok(dict.to_object(py))
}
_ => {
let n = self
.parser
.consume_number::<NumberAny>(peak.into_inner(), self.allow_inf_nan)?;
match n {
NumberAny::Int(NumberInt::Int(int)) => Ok(int.to_object(py)),
NumberAny::Int(NumberInt::BigInt(big_int)) => Ok(big_int.to_object(py)),
NumberAny::Float(float) => Ok(float.to_object(py)),
}
}
}
}

Expand Down
25 changes: 16 additions & 9 deletions src/value.rs
Expand Up @@ -3,7 +3,7 @@ use std::sync::Arc;
use num_bigint::BigInt;
use smallvec::SmallVec;

use crate::errors::{JsonError, JsonResult, DEFAULT_RECURSION_LIMIT};
use crate::errors::{json_error, JsonError, JsonResult, DEFAULT_RECURSION_LIMIT};
use crate::lazy_index_map::LazyIndexMap;
use crate::number_decoder::{NumberAny, NumberInt};
use crate::parse::{Parser, Peak};
Expand Down Expand Up @@ -97,14 +97,6 @@ pub(crate) fn take_value(
let s = parser.consume_string::<StringDecoder>(tape)?;
Ok(JsonValue::Str(s.into()))
}
Peak::Num(first) => {
let n = parser.consume_number::<NumberAny>(first, allow_inf_nan)?;
match n {
NumberAny::Int(NumberInt::Int(int)) => Ok(JsonValue::Int(int)),
NumberAny::Int(NumberInt::BigInt(big_int)) => Ok(JsonValue::BigInt(big_int)),
NumberAny::Float(float) => Ok(JsonValue::Float(float)),
}
}
Peak::Array => {
// we could do something clever about guessing the size of the array
let mut array: SmallVec<[JsonValue; 8]> = SmallVec::new();
Expand Down Expand Up @@ -144,5 +136,20 @@ pub(crate) fn take_value(

Ok(JsonValue::Object(Arc::new(object)))
}
_ => {
let n = parser.consume_number::<NumberAny>(peak.into_inner(), allow_inf_nan);
match n {
Ok(NumberAny::Int(NumberInt::Int(int))) => Ok(JsonValue::Int(int)),
Ok(NumberAny::Int(NumberInt::BigInt(big_int))) => Ok(JsonValue::BigInt(big_int)),
Ok(NumberAny::Float(float)) => Ok(JsonValue::Float(float)),
Err(e) => {
if !peak.is_num() {
Err(json_error!(ExpectedSomeValue, self.parser.index).into())
davidhewitt marked this conversation as resolved.
Show resolved Hide resolved
} else {
Err(e.into())
}
}
}
}
}
}