Skip to content

Commit 538f87d

Browse files
committed
Use inline for short strings
Closes #276.
1 parent 4042693 commit 538f87d

File tree

4 files changed

+92
-34
lines changed

4 files changed

+92
-34
lines changed

integration-tests/build.rs

+3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ fn main() {
1717
"html",
1818
"head",
1919
"id",
20+
"❤",
21+
"❤💯",
22+
"❤💯❤💯",
2023
])
2124
.write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("test_atom.rs"))
2225
.unwrap()

integration-tests/src/lib.rs

+11-8
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,10 @@ fn test_types() {
4747
assert!(Atom::from("").is_static());
4848
assert!(Atom::from("defaults").is_static());
4949
assert!(Atom::from("font-weight").is_static());
50-
assert!(Atom::from("id").is_static());
51-
assert!(Atom::from("body").is_static());
52-
assert!(Atom::from("a").is_static());
53-
assert!(Atom::from("address").is_static());
50+
assert!(Atom::from("id").is_inline());
51+
assert!(Atom::from("body").is_inline());
52+
assert!(Atom::from("a").is_inline());
53+
assert!(Atom::from("address").is_inline());
5454
assert!(Atom::from("c").is_inline());
5555
assert!(Atom::from("zz").is_inline());
5656
assert!(Atom::from("zzz").is_inline());
@@ -173,11 +173,11 @@ fn repr() {
173173
// Static atoms
174174
check_static("defaults", test_atom!("defaults"));
175175
check_static("font-weight", test_atom!("font-weight"));
176-
check_static("a", test_atom!("a"));
177-
check_static("address", test_atom!("address"));
178-
check_static("area", test_atom!("area"));
179176

180177
// Inline atoms
178+
check("a", 0x0000_0000_0000_6111);
179+
check("address", 0x7373_6572_6464_6171);
180+
check("area", 0x0000_0061_6572_6141);
181181
check("e", 0x0000_0000_0000_6511);
182182
check("xyzzy", 0x0000_797A_7A79_7851);
183183
check("xyzzy01", 0x3130_797A_7A79_7871);
@@ -201,7 +201,10 @@ fn atom_macro() {
201201
assert_eq!(test_atom!("a"), Atom::from("a"));
202202
assert_eq!(test_atom!("body"), Atom::from("body"));
203203
assert_eq!(test_atom!("address"), Atom::from("address"));
204+
assert_eq!(test_atom!("❤"), Atom::from("❤"));
205+
assert_eq!(test_atom!("❤💯"), Atom::from("❤💯"));
204206
assert_eq!(test_atom!("font-weight"), Atom::from("font-weight"));
207+
assert_eq!(test_atom!("❤💯❤💯"), Atom::from("❤💯❤💯"));
205208
}
206209

207210
#[test]
@@ -300,7 +303,7 @@ fn test_from_string() {
300303
#[test]
301304
fn test_try_static() {
302305
assert!(Atom::try_static("defaults").is_some());
303-
assert!(Atom::try_static("head").is_some());
306+
assert!(Atom::try_static("head").is_none());
304307
assert!(Atom::try_static("not in the static table").is_none());
305308
}
306309

src/atom.rs

+37-16
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,25 @@ impl<Static> Atom<Static> {
9999
}
100100
}
101101

102+
/// For the atom!() macros
103+
#[inline(always)]
104+
#[doc(hidden)]
105+
pub const fn pack_inline(mut n: u64, len: u8) -> Self {
106+
if cfg!(target_endian = "big") {
107+
// Reverse order of top 7 bytes.
108+
// Bottom 8 bits of `n` are zero, and we need that to remain so.
109+
// String data is stored in top 7 bytes, tag and length in bottom byte.
110+
n = n.to_le() << 8;
111+
}
112+
113+
let data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET) | n;
114+
Self {
115+
// INLINE_TAG ensures this is never zero
116+
unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
117+
phantom: PhantomData,
118+
}
119+
}
120+
102121
fn tag(&self) -> u8 {
103122
(self.unsafe_data.get() & TAG_MASK) as u8
104123
}
@@ -186,20 +205,22 @@ impl<Static: StaticAtomSet> Hash for Atom<Static> {
186205

187206
impl<'a, Static: StaticAtomSet> From<Cow<'a, str>> for Atom<Static> {
188207
fn from(string_to_add: Cow<'a, str>) -> Self {
189-
Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| {
190-
let len = string_to_add.len();
191-
if len <= MAX_INLINE_LEN {
192-
let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET);
193-
{
194-
let dest = inline_atom_slice_mut(&mut data);
195-
dest[..len].copy_from_slice(string_to_add.as_bytes())
196-
}
197-
Atom {
198-
// INLINE_TAG ensures this is never zero
199-
unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
200-
phantom: PhantomData,
201-
}
202-
} else {
208+
let len = string_to_add.len();
209+
if len == 0 {
210+
Self::pack_static(Static::empty_string_index())
211+
} else if len <= MAX_INLINE_LEN {
212+
let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET);
213+
{
214+
let dest = inline_atom_slice_mut(&mut data);
215+
dest[..len].copy_from_slice(string_to_add.as_bytes());
216+
}
217+
Atom {
218+
// INLINE_TAG ensures this is never zero
219+
unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
220+
phantom: PhantomData,
221+
}
222+
} else {
223+
Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| {
203224
let ptr: std::ptr::NonNull<Entry> = DYNAMIC_SET.insert(string_to_add, hash.g);
204225
let data = ptr.as_ptr() as u64;
205226
debug_assert!(0 == data & TAG_MASK);
@@ -208,8 +229,8 @@ impl<'a, Static: StaticAtomSet> From<Cow<'a, str>> for Atom<Static> {
208229
unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
209230
phantom: PhantomData,
210231
}
211-
}
212-
})
232+
})
233+
}
213234
}
214235
}
215236

string-cache-codegen/lib.rs

+41-10
Original file line numberDiff line numberDiff line change
@@ -187,11 +187,19 @@ impl AtomType {
187187
// which would cause divisions by zero in rust-phf.
188188
self.atoms.insert(String::new());
189189

190-
let atoms: Vec<&str> = self.atoms.iter().map(|s| &**s).collect();
191-
let hash_state = phf_generator::generate_hash(&atoms);
190+
// Strings over 7 bytes + empty string added to static set.
191+
// Otherwise stored inline.
192+
let (static_strs, inline_strs): (Vec<_>, Vec<_>) = self
193+
.atoms
194+
.iter()
195+
.map(String::as_str)
196+
.partition(|s| s.len() > 7 || s.is_empty());
197+
198+
// Static strings
199+
let hash_state = phf_generator::generate_hash(&static_strs);
192200
let phf_generator::HashState { key, disps, map } = hash_state;
193201
let (disps0, disps1): (Vec<_>, Vec<_>) = disps.into_iter().unzip();
194-
let atoms: Vec<&str> = map.iter().map(|&idx| atoms[idx]).collect();
202+
let atoms: Vec<&str> = map.iter().map(|&idx| static_strs[idx]).collect();
195203
let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap() as u32;
196204
let indices = 0..atoms.len() as u32;
197205

@@ -228,16 +236,33 @@ impl AtomType {
228236
let macro_name = new_term(&*self.macro_name);
229237
let module = module.parse::<proc_macro2::TokenStream>().unwrap();
230238
let atom_prefix = format!("ATOM_{}_", type_name.to_string().to_uppercase());
231-
let const_names: Vec<_> = atoms
239+
let new_const_name = |atom: &str| {
240+
let mut name = atom_prefix.clone();
241+
for c in atom.chars() {
242+
name.push_str(&format!("_{:02X}", c as u32))
243+
}
244+
new_term(&name)
245+
};
246+
let const_names: Vec<_> = atoms.iter().copied().map(new_const_name).collect();
247+
248+
// Inline strings
249+
let (inline_const_names, inline_values_and_lengths): (Vec<_>, Vec<_>) = inline_strs
232250
.iter()
233-
.map(|atom| {
234-
let mut name = atom_prefix.clone();
235-
for c in atom.chars() {
236-
name.push_str(&format!("_{:02X}", c as u32))
251+
.map(|s| {
252+
let const_name = new_const_name(s);
253+
254+
let mut value = 0u64;
255+
for (index, c) in s.bytes().enumerate() {
256+
value = value | ((c as u64) << (index * 8 + 8));
237257
}
238-
new_term(&name)
258+
259+
let len = s.len() as u8;
260+
261+
(const_name, (value, len))
239262
})
240-
.collect();
263+
.unzip();
264+
let (inline_values, inline_lengths): (Vec<_>, Vec<_>) =
265+
inline_values_and_lengths.into_iter().unzip();
241266

242267
quote! {
243268
#atom_doc
@@ -265,13 +290,19 @@ impl AtomType {
265290
#(
266291
pub const #const_names: #type_name = #type_name::pack_static(#indices);
267292
)*
293+
#(
294+
pub const #inline_const_names: #type_name = #type_name::pack_inline(#inline_values, #inline_lengths);
295+
)*
268296

269297
#macro_doc
270298
#[macro_export]
271299
macro_rules! #macro_name {
272300
#(
273301
(#atoms) => { #module::#const_names };
274302
)*
303+
#(
304+
(#inline_strs) => { #module::#inline_const_names };
305+
)*
275306
}
276307
}
277308
}

0 commit comments

Comments
 (0)