diff --git a/Cargo.lock b/Cargo.lock index 42b1220ea..96b2313b0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -297,8 +297,7 @@ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "jiter" version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8243cf2d026264056bfacf305e54f5bee8866fd46b4c1873adcaebf614a0d306" +source = "git+https://github.com/pydantic/jiter?branch=dh/simpler-value#a3c9ea312b2c37ca664bf06bbb600284d30c91b8" dependencies = [ "ahash", "bitvec", @@ -312,9 +311,9 @@ dependencies = [ [[package]] name = "lexical-parse-float" -version = "0.8.5" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" +checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2" dependencies = [ "lexical-parse-integer", "lexical-util", @@ -323,9 +322,9 @@ dependencies = [ [[package]] name = "lexical-parse-integer" -version = "0.8.6" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" +checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e" dependencies = [ "lexical-util", "static_assertions", @@ -333,9 +332,9 @@ dependencies = [ [[package]] name = "lexical-util" -version = "0.8.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" +checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3" dependencies = [ "static_assertions", ] diff --git a/Cargo.toml b/Cargo.toml index 6f8842bea..44d65569e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,7 +45,7 @@ idna = "1.0.3" base64 = "0.22.1" num-bigint = "0.4.6" uuid = "1.12.1" -jiter = { version = "0.8.2", features = ["python"] } +jiter = { git = "https://github.com/pydantic/jiter", branch = "dh/simpler-value", features = ["python"] } hex = "0.4.3" [lib] diff --git a/src/input/input_json.rs b/src/input/input_json.rs index 139c71a25..6479357cc 100644 --- a/src/input/input_json.rs +++ b/src/input/input_json.rs @@ -1,9 +1,8 @@ use std::borrow::Cow; -use jiter::{JsonArray, JsonObject, JsonValue, LazyIndexMap}; +use jiter::{JsonArray, JsonObject, JsonValue}; use pyo3::prelude::*; use pyo3::types::{PyDict, PyList, PyString}; -use smallvec::SmallVec; use speedate::MicrosecondsPrecisionOverflowBehavior; use strum::EnumMessage; @@ -62,7 +61,9 @@ impl<'py, 'data> Input<'py> for JsonValue<'data> { match self { JsonValue::Object(object) => { let dict = PyDict::new(py); - for (k, v) in LazyIndexMap::iter(object) { + for (k, v) in object.as_slice() { + // TODO: jiter doesn't deduplicate keys, so we should probably do that here to + // avoid potential wasted work creating Python objects. dict.set_item(k, v).unwrap(); } Some(dict) @@ -253,7 +254,14 @@ impl<'py, 'data> Input<'py> for JsonValue<'data> { JsonValue::Str(s) => Ok(string_to_vec(s).into()), JsonValue::Object(object) => { // return keys iterator to match python's behavior - let keys: JsonArray = JsonArray::new(object.keys().map(|k| JsonValue::Str(k.clone())).collect()); + // FIXME jiter doesn't deduplicate keys, should probably do that here before iteration. + let keys: JsonArray = JsonArray::new( + object + .as_slice() + .iter() + .map(|(k, _)| JsonValue::Str(k.clone())) + .collect(), + ); Ok(GenericIterator::from(keys).into_static()) } _ => Err(ValError::new(ErrorTypeDefaults::IterableType, self)), @@ -543,11 +551,11 @@ impl<'data> ValidatedDict<'_> for &'_ JsonObject<'data> { &'a self, consumer: impl ConsumeIterator, Self::Item<'a>)>, Output = R>, ) -> ValResult { - Ok(consumer.consume_iterator(LazyIndexMap::iter(self).map(|(k, v)| Ok((k.as_ref(), v))))) + Ok(consumer.consume_iterator(self.as_slice().iter().map(|(k, v)| Ok((k.as_ref(), v))))) } fn last_key(&self) -> Option> { - self.keys().last().map(AsRef::as_ref) + self.last().map(|(k, _)| k.as_ref()) } } @@ -555,7 +563,7 @@ impl<'a, 'py, 'data> ValidatedList<'py> for &'a JsonArray<'data> { type Item = &'a JsonValue<'data>; fn len(&self) -> Option { - Some(SmallVec::len(self)) + Some(Vec::len(self)) } fn iterate(self, consumer: impl ConsumeIterator, Output = R>) -> ValResult { Ok(consumer.consume_iterator(self.iter().map(Ok))) @@ -569,7 +577,7 @@ impl<'a, 'data> ValidatedTuple<'_> for &'a JsonArray<'data> { type Item = &'a JsonValue<'data>; fn len(&self) -> Option { - Some(SmallVec::len(self)) + Some(Vec::len(self)) } fn iterate(self, consumer: impl ConsumeIterator, Output = R>) -> ValResult { Ok(consumer.consume_iterator(self.iter().map(Ok))) @@ -637,12 +645,12 @@ impl<'data> KeywordArgs<'_> for JsonObject<'data> { Self: 'a; fn len(&self) -> usize { - LazyIndexMap::len(self) + Vec::len(self) } fn get_item<'k>(&self, key: &'k LookupKey) -> ValResult)>> { key.json_get(self) } fn iter(&self) -> impl Iterator, Self::Item<'_>)>> { - LazyIndexMap::iter(self).map(|(k, v)| Ok((k.as_ref(), v))) + self.as_slice().iter().map(|(k, v)| Ok((k.as_ref(), v))) } } diff --git a/src/lookup_key.rs b/src/lookup_key.rs index c83a00583..295d25dca 100644 --- a/src/lookup_key.rs +++ b/src/lookup_key.rs @@ -262,20 +262,33 @@ impl LookupKey { &'s self, dict: &'a JsonObject<'data>, ) -> ValResult)>> { + // FIXME: use of find_map in here probably leads to quadratic complexity match self { - Self::Simple(path) => match dict.get(path.first_key()) { + Self::Simple(path) => match dict + .iter() + .rev() + .find_map(|(k, v)| (k == path.first_key()).then_some(v)) + { Some(value) => { debug_assert!(path.rest.is_empty()); Ok(Some((path, value))) } None => Ok(None), }, - Self::Choice { path1, path2 } => match dict.get(path1.first_key()) { + Self::Choice { path1, path2 } => match dict + .iter() + .rev() + .find_map(|(k, v)| (k == path1.first_key()).then_some(v)) + { Some(value) => { debug_assert!(path1.rest.is_empty()); Ok(Some((path1, value))) } - None => match dict.get(path2.first_key()) { + None => match dict + .iter() + .rev() + .find_map(|(k, v)| (k == path2.first_key()).then_some(v)) + { Some(value) => { debug_assert!(path2.rest.is_empty()); Ok(Some((path2, value))) @@ -287,7 +300,11 @@ impl LookupKey { for path in path_choices { // first step is different from the rest as we already know dict is JsonObject // because of above checks, we know that path should have at least one element, hence unwrap - let v: &JsonValue = match dict.get(path.first_item.key.as_str()) { + let v: &JsonValue = match dict + .iter() + .rev() + .find_map(|(k, v)| (k == path.first_key()).then_some(v)) + { Some(v) => v, None => continue, }; @@ -527,7 +544,7 @@ impl PathItem { pub fn json_obj_get<'a, 'data>(&self, json_obj: &'a JsonObject<'data>) -> Option<&'a JsonValue<'data>> { match self { - Self::S(PathItemString { key, .. }) => json_obj.get(key.as_str()), + Self::S(PathItemString { key, .. }) => json_obj.iter().rev().find_map(|(k, v)| (k == key).then_some(v)), _ => None, } }