diff --git a/RELEASES.rst b/RELEASES.rst index 62e0f318f6..5aa9a86825 100644 --- a/RELEASES.rst +++ b/RELEASES.rst @@ -2376,6 +2376,10 @@ Planned algorithm based on single linked chaining of duk_hstrings, with the same algorithm serving both default and low memory environments (GH-1277) +* Replace object property table hash algorithm with a faster algorithm + which uses a 2^N size and a bit mask instead of a prime size and a MOD; + use a hash table more eagerly than before (GH-1284) + * Add a "global" property to the global object to provide easy access to the global object itself without needing idioms like "new Function('return this')()"; implemented based on diff --git a/config/config-options/DUK_USE_HOBJECT_ARRAY_ABANDON_LIMIT.yaml b/config/config-options/DUK_USE_HOBJECT_ARRAY_ABANDON_LIMIT.yaml new file mode 100644 index 0000000000..74b83878c0 --- /dev/null +++ b/config/config-options/DUK_USE_HOBJECT_ARRAY_ABANDON_LIMIT.yaml @@ -0,0 +1,14 @@ +define: DUK_USE_HOBJECT_ARRAY_ABANDON_LIMIT +introduced: 2.1.0 +default: 2 +tags: + - performance + - lowmemory +description: > + Abandon array part if its density is below L. The limit L is expressed as + a .3 fixed point point, e.g. 2 means 2/8 = 25%. + + The default limit is quite low: one array entry with packed duk_tval is 8 + bytes whereas one normal entry is 4+1+8 = 13 bytes without a hash entry, + and 17-21 bytes with a hash entry (load factor 0.5-1.0). So the array part + shouldn't be abandoned very easily from a footprint point of view. diff --git a/config/config-options/DUK_USE_HOBJECT_ARRAY_FAST_RESIZE_LIMIT.yaml b/config/config-options/DUK_USE_HOBJECT_ARRAY_FAST_RESIZE_LIMIT.yaml new file mode 100644 index 0000000000..6429d4bb41 --- /dev/null +++ b/config/config-options/DUK_USE_HOBJECT_ARRAY_FAST_RESIZE_LIMIT.yaml @@ -0,0 +1,13 @@ +define: DUK_USE_HOBJECT_ARRAY_FAST_RESIZE_LIMIT +introduced: 2.1.0 +default: 9 +tags: + - performance + - lowmemory +description: > + Skip abandon check in object array part resize if new_size < L * old_size. + The limit L is expressed as a .3 fixed point value, e.g. 9 means 9/8 = + 112.5% of current size. + + This is rather technical and you should only change the parameter if you've + looked at the internals. diff --git a/config/config-options/DUK_USE_HOBJECT_ARRAY_MINGROW_ADD.yaml b/config/config-options/DUK_USE_HOBJECT_ARRAY_MINGROW_ADD.yaml new file mode 100644 index 0000000000..82e79f7178 --- /dev/null +++ b/config/config-options/DUK_USE_HOBJECT_ARRAY_MINGROW_ADD.yaml @@ -0,0 +1,8 @@ +define: DUK_USE_HOBJECT_ARRAY_MINGROW_ADD +introduced: 2.1.0 +default: 16 +tags: + - performance +description: > + Technical internal parameter, see sources for details. Only adjust if + you've looked at the internals. diff --git a/config/config-options/DUK_USE_HOBJECT_ARRAY_MINGROW_DIVISOR.yaml b/config/config-options/DUK_USE_HOBJECT_ARRAY_MINGROW_DIVISOR.yaml new file mode 100644 index 0000000000..98fdb29f08 --- /dev/null +++ b/config/config-options/DUK_USE_HOBJECT_ARRAY_MINGROW_DIVISOR.yaml @@ -0,0 +1,8 @@ +define: DUK_USE_HOBJECT_ARRAY_MINGROW_DIVISOR +introduced: 2.1.0 +default: 8 +tags: + - performance +description: > + Technical internal parameter, see sources for details. Only adjust if + you've looked at the internals. diff --git a/config/config-options/DUK_USE_HOBJECT_ENTRY_MINGROW_ADD.yaml b/config/config-options/DUK_USE_HOBJECT_ENTRY_MINGROW_ADD.yaml new file mode 100644 index 0000000000..c31b111890 --- /dev/null +++ b/config/config-options/DUK_USE_HOBJECT_ENTRY_MINGROW_ADD.yaml @@ -0,0 +1,8 @@ +define: DUK_USE_HOBJECT_ENTRY_MINGROW_DIVISOR +introduced: 2.1.0 +default: 8 +tags: + - performance +description: > + Technical internal parameter, see sources for details. Only adjust if + you've looked at the internals. diff --git a/config/config-options/DUK_USE_HOBJECT_ENTRY_MINGROW_DIVISOR.yaml b/config/config-options/DUK_USE_HOBJECT_ENTRY_MINGROW_DIVISOR.yaml new file mode 100644 index 0000000000..c38fd8f2f8 --- /dev/null +++ b/config/config-options/DUK_USE_HOBJECT_ENTRY_MINGROW_DIVISOR.yaml @@ -0,0 +1,8 @@ +define: DUK_USE_HOBJECT_ENTRY_MINGROW_ADD +introduced: 2.1.0 +default: 16 +tags: + - performance +description: > + Technical internal parameter, see sources for details. Only adjust if + you've looked at the internals. diff --git a/config/config-options/DUK_USE_HOBJECT_HASH_PART.yaml b/config/config-options/DUK_USE_HOBJECT_HASH_PART.yaml index 16d91de431..4e8c82e997 100644 --- a/config/config-options/DUK_USE_HOBJECT_HASH_PART.yaml +++ b/config/config-options/DUK_USE_HOBJECT_HASH_PART.yaml @@ -9,5 +9,3 @@ description: > enabled unless the target is very low on memory. If DUK_USE_OBJSIZES16 is defined, this option must not be defined. - -# FIXME: expose property limit for hash table as a DUK_USE_xxx flag? diff --git a/config/config-options/DUK_USE_HOBJECT_HASH_PROP_LIMIT.yaml b/config/config-options/DUK_USE_HOBJECT_HASH_PROP_LIMIT.yaml new file mode 100644 index 0000000000..008b836180 --- /dev/null +++ b/config/config-options/DUK_USE_HOBJECT_HASH_PROP_LIMIT.yaml @@ -0,0 +1,20 @@ +define: DUK_USE_HOBJECT_HASH_PROP_LIMIT +introduced: 2.1.0 +default: 8 +tags: + - performance + - lowmemory +description: > + Minimum number of properties needed for a hash part to be included in the + object property table. This limit is checked whenever an object is resized. + + A hash part improves property lookup performance even for small objects, + starting from roughly 4 properties. However, this ignores the cost of + setting up and managing the hash part, which is offset only if property + lookups made through the hash part can offset the setup cost. A hash part + is worth it for heavily accessed small objects or large objects (even those + accessed quite infrequently). The limit doesn't take into account property + access frequency, so it is necessarily a compromise. + + A lower value improves performance (a value as low a 4-8 can be useful) + while a higher value conserves memory. diff --git a/config/examples/low_memory.yaml b/config/examples/low_memory.yaml index 649fb41a88..fb724195fd 100644 --- a/config/examples/low_memory.yaml +++ b/config/examples/low_memory.yaml @@ -55,6 +55,10 @@ DUK_USE_STRTAB_RESIZE_CHECK_MASK: 255 # -""- DUK_USE_HSTRING_ARRIDX: false +# Only add a hash table for quite large objects to conserve memory. Even +# lower memory targets usually drop hash part support entirely. +DUK_USE_HOBJECT_HASH_PROP_LIMIT: 64 + # Consider using pointer compression, see doc/low-memory.rst. #DUK_USE_REFCOUNT16: true #DUK_USE_STRHASH16: true diff --git a/doc/hobject-design.rst b/doc/hobject-design.rst index 46498e43b8..0887a079e5 100644 --- a/doc/hobject-design.rst +++ b/doc/hobject-design.rst @@ -731,12 +731,12 @@ lookups:: | 0 | = 0xffffffffU | UNUSED | | UNUSED | DELETED = DUK_HOBJECT_HASHIDX_DELETED - +---------+ = 0xfffffffeU - + | UNUSED | = 0xfffffffeU + +---------+ DELETED entries don't terminate hash probe sequences, UNUSED entries do. - Here, e_size = 5, e_next = 3, h_size = 7. + Here, e_size = 5, e_next = 3, h_size = 8. .. FIXME for some unknown reason the illustration breaks with pandoc @@ -815,8 +815,7 @@ Hash part details The hash part maps a key ``K`` to an index ``I`` of the entry part or indicates that ``K`` does not exist. The hash part uses a `closed hash table`__, i.e. the hash table has a fixed size and a certain key has -multiple possible locations in a *probe sequence*. The current probe -sequence uses a variant of *double hashing*. +multiple possible locations in a *probe sequence*. __ http://en.wikipedia.org/wiki/Hash_table#Open_addressing @@ -834,46 +833,18 @@ is either an index to the entry part, or one of two markers: Hash table size (``h_size``) is selected relative to the maximum number of inserted elements ``N`` (equal to ``e_size`` in practice) in two steps: -#. A temporary value ``T`` is selected relative to the number of entries, - as ``c * N`` where ``c`` is currently about 1.2. - -#. ``T`` is rounded upwards to the closest prime from a pre-generated - list of primes with an approximately fixed prime-to-prime ratio. - - + The list of primes generated by ``genhashsizes.py``, and is encoded - in a bit packed format, decoded on the fly. See ``genhashsizes.py`` - for details. - - + The fact that the hash table size is a prime simplifies probe sequence - handling: it is easy to select probe steps which are guaranteed to - cover all entries of the hash table. +#. Find lowest N so that ``2 ** N >= e_size``. - + The ratio between successive primes is currently about 1.15. - As a result, the hash table size is about 1.2-1.4 times larger than - the maximum number of properties in the entry part. This implies a - maximum hash table load factor of about 72-83%. - - + The current minimum prime used is 17. +#. Use ``2 ** (N + 1)`` as hash size. This guarantees load factor is + lower than 0.5 after resize. The probe sequence for a certain key is guaranteed to walk through every -hash table entry, and is generated as follows: - -#. The initial hash index is computed directly from the string hash, - modulo hash table size as: ``I = string_hash % h_size``. - -#. The probe step is then selected from a pre-generated table of 32 - probe steps as: ``S = probe_steps[string_hash % 32]``. - - + The probe steps are is guaranteed to be non-zero and relatively prime - to all precomputed hash table size primes. See ``genhashsizes.py``. +hash table entry. Currently the probe sequence is simply: - + Currently the precomputed steps are small primes which are not present - in the precomputed hash size primes list. Technically they don't need - to be primes (or small), as long as they are relatively prime to all - possible hash table sizes, i.e. ``gcd(S, h_size)=1``, to guarantee that - the probe sequence walks through all entries of the hash. +* ``(X + i) % h_size`` where i=0,1,...,h_size-1. -#. The probe sequence is: ``(X + i*S) % h_size`` where i=0,1,...h_size-1. +This isn't ideal for avoiding clustering (double hashing would be better) +but is cache friendly and works well enough with low load factors. When looking up an element from the hash table, we walk through the probe sequence looking at the hash table entries. If a UNUSED entry is found, the diff --git a/src-input/duk_hobject.h b/src-input/duk_hobject.h index 37b794b55f..091bd730f2 100644 --- a/src-input/duk_hobject.h +++ b/src-input/duk_hobject.h @@ -32,8 +32,8 @@ #if !defined(DUK_HOBJECT_H_INCLUDED) #define DUK_HOBJECT_H_INCLUDED -/* Object flag. There are currently 25 flag bits available. Make sure - * this stays in sync with debugger object inspection code. +/* Object flags. Make sure this stays in sync with debugger object + * inspection code. */ /* XXX: some flags are object subtype specific (e.g. common to all function @@ -651,22 +651,9 @@ #if defined(DUK_USE_OBJSIZES16) #define DUK_HOBJECT_MAX_PROPERTIES 0x0000ffffUL #else -#define DUK_HOBJECT_MAX_PROPERTIES 0x7fffffffUL /* 2**31-1 ~= 2G properties */ +#define DUK_HOBJECT_MAX_PROPERTIES 0x3fffffffUL /* 2**30-1 ~= 1G properties */ #endif -/* higher value conserves memory; also note that linear scan is cache friendly */ -#define DUK_HOBJECT_E_USE_HASH_LIMIT 32 - -/* hash size relative to entries size: for value X, approx. hash_prime(e_size + e_size / X) */ -#define DUK_HOBJECT_H_SIZE_DIVISOR 4 /* hash size approx. 1.25 times entries size */ - -/* if new_size < L * old_size, resize without abandon check; L = 3-bit fixed point, e.g. 9 -> 9/8 = 112.5% */ -#define DUK_HOBJECT_A_FAST_RESIZE_LIMIT 9 /* 112.5%, i.e. new size less than 12.5% higher -> fast resize */ - -/* if density < L, abandon array part, L = 3-bit fixed point, e.g. 2 -> 2/8 = 25% */ -/* limit is quite low: one array entry is 8 bytes, one normal entry is 4+1+8+4 = 17 bytes (with hash entry) */ -#define DUK_HOBJECT_A_ABANDON_LIMIT 2 /* 25%, i.e. less than 25% used -> abandon */ - /* internal align target for props allocation, must be 2*n for some n */ #if (DUK_USE_ALIGN_BY == 4) #define DUK_HOBJECT_ALIGN_TARGET 4 @@ -678,18 +665,6 @@ #error invalid DUK_USE_ALIGN_BY #endif -/* controls for minimum entry part growth */ -#define DUK_HOBJECT_E_MIN_GROW_ADD 16 -#define DUK_HOBJECT_E_MIN_GROW_DIVISOR 8 /* 2^3 -> 1/8 = 12.5% min growth */ - -/* controls for minimum array part growth */ -#define DUK_HOBJECT_A_MIN_GROW_ADD 16 -#define DUK_HOBJECT_A_MIN_GROW_DIVISOR 8 /* 2^3 -> 1/8 = 12.5% min growth */ - -/* probe sequence */ -#define DUK_HOBJECT_HASH_INITIAL(hash,h_size) ((hash) % (h_size)) -#define DUK_HOBJECT_HASH_PROBE_STEP(hash) DUK_UTIL_GET_HASH_PROBE_STEP((hash)) - /* * PC-to-line constants */ diff --git a/src-input/duk_hobject_props.c b/src-input/duk_hobject_props.c index ae9cfa3175..8af02d9c96 100644 --- a/src-input/duk_hobject_props.c +++ b/src-input/duk_hobject_props.c @@ -52,10 +52,6 @@ #define DUK__NO_ARRAY_INDEX DUK_HSTRING_NO_ARRAY_INDEX -/* hash probe sequence */ -#define DUK__HASH_INITIAL(hash,h_size) DUK_HOBJECT_HASH_INITIAL((hash),(h_size)) -#define DUK__HASH_PROBE_STEP(hash) DUK_HOBJECT_HASH_PROBE_STEP((hash)) - /* marker values for hash part */ #define DUK__HASH_UNUSED DUK_HOBJECT_HASHIDX_UNUSED #define DUK__HASH_DELETED DUK_HOBJECT_HASHIDX_DELETED @@ -218,14 +214,26 @@ DUK_LOCAL duk_bool_t duk__key_is_plain_buf_ownprop(duk_hthread *thr, duk_hbuffer DUK_LOCAL duk_uint32_t duk__get_default_h_size(duk_uint32_t e_size) { DUK_ASSERT(e_size <= DUK_HOBJECT_MAX_PROPERTIES); - if (e_size >= DUK_HOBJECT_E_USE_HASH_LIMIT) { + if (e_size >= DUK_USE_HOBJECT_HASH_PROP_LIMIT) { duk_uint32_t res; + duk_uint32_t tmp; - /* result: hash_prime(floor(1.2 * e_size)) */ - res = duk_util_get_hash_prime(e_size + e_size / DUK_HOBJECT_H_SIZE_DIVISOR); - - /* if fails, e_size will be zero = not an issue, except performance-wise */ - DUK_ASSERT(res == 0 || res > e_size); + /* Hash size should be 2^N where N is chosen so that 2^N is + * larger than e_size. Extra shifting is used to ensure hash + * is relatively sparse. + */ + tmp = e_size; + res = 2; /* Result will be 2 ** (N + 1). */ + while (tmp >= 0x40) { + tmp >>= 6; + res <<= 6; + } + while (tmp != 0) { + tmp >>= 1; + res <<= 1; + } + DUK_ASSERT((DUK_HOBJECT_MAX_PROPERTIES << 2U) > DUK_HOBJECT_MAX_PROPERTIES); /* Won't wrap, even shifted by 2. */ + DUK_ASSERT(res > e_size); return res; } else { return 0; @@ -239,7 +247,7 @@ DUK_LOCAL duk_uint32_t duk__get_min_grow_e(duk_uint32_t e_size) { DUK_ASSERT(e_size <= DUK_HOBJECT_MAX_PROPERTIES); - res = (e_size + DUK_HOBJECT_E_MIN_GROW_ADD) / DUK_HOBJECT_E_MIN_GROW_DIVISOR; + res = (e_size + DUK_USE_HOBJECT_ENTRY_MINGROW_ADD) / DUK_USE_HOBJECT_ENTRY_MINGROW_DIVISOR; DUK_ASSERT(res >= 1); /* important for callers */ return res; } @@ -250,7 +258,7 @@ DUK_LOCAL duk_uint32_t duk__get_min_grow_a(duk_uint32_t a_size) { DUK_ASSERT((duk_size_t) a_size <= DUK_HOBJECT_MAX_PROPERTIES); - res = (a_size + DUK_HOBJECT_A_MIN_GROW_ADD) / DUK_HOBJECT_A_MIN_GROW_DIVISOR; + res = (a_size + DUK_USE_HOBJECT_ARRAY_MINGROW_ADD) / DUK_USE_HOBJECT_ARRAY_MINGROW_DIVISOR; DUK_ASSERT(res >= 1); /* important for callers */ return res; } @@ -325,7 +333,7 @@ DUK_LOCAL duk_bool_t duk__abandon_array_density_check(duk_uint32_t a_used, duk_u * of the check, but may confuse debugging. */ - return (a_used < DUK_HOBJECT_A_ABANDON_LIMIT * (a_size >> 3)); + return (a_used < DUK_USE_HOBJECT_ARRAY_ABANDON_LIMIT * (a_size >> 3)); } /* Fast check for extending array: check whether or not a slow density check is required. */ @@ -351,7 +359,7 @@ DUK_LOCAL duk_bool_t duk__abandon_array_slow_check_required(duk_uint32_t arr_idx * arr_idx > limit'' * ((old_size + 7) / 8) */ - return (arr_idx > DUK_HOBJECT_A_FAST_RESIZE_LIMIT * ((old_size + 7) >> 3)); + return (arr_idx > DUK_USE_HOBJECT_ARRAY_FAST_RESIZE_LIMIT * ((old_size + 7) >> 3)); } /* @@ -851,6 +859,8 @@ DUK_INTERNAL void duk_hobject_realloc_props(duk_hthread *thr, #if defined(DUK_USE_HOBJECT_HASH_PART) if (DUK_UNLIKELY(new_h_size > 0)) { + duk_uint32_t mask; + DUK_ASSERT(new_h != NULL); /* fill new_h with u32 0xff = UNUSED */ @@ -859,13 +869,15 @@ DUK_INTERNAL void duk_hobject_realloc_props(duk_hthread *thr, DUK_MEMSET(new_h, 0xff, sizeof(duk_uint32_t) * new_h_size); DUK_ASSERT(new_e_next <= new_h_size); /* equality not actually possible */ + + mask = new_h_size - 1; for (i = 0; i < new_e_next; i++) { duk_hstring *key = new_e_k[i]; duk_uint32_t j, step; DUK_ASSERT(key != NULL); - j = DUK__HASH_INITIAL(DUK_HSTRING_GET_HASH(key), new_h_size); - step = DUK__HASH_PROBE_STEP(DUK_HSTRING_GET_HASH(key)); + j = DUK_HSTRING_GET_HASH(key) & mask; + step = 1; /* Cache friendly but clustering prone. */ for (;;) { DUK_ASSERT(new_h[j] != DUK__HASH_DELETED); /* should never happen */ @@ -875,10 +887,9 @@ DUK_INTERNAL void duk_hobject_realloc_props(duk_hthread *thr, break; } DUK_DDD(DUK_DDDPRINT("rebuild miss %ld, step %ld", (long) j, (long) step)); - j = (j + step) % new_h_size; + j = (j + step) & mask; - /* guaranteed to finish */ - DUK_ASSERT(j != (duk_uint32_t) DUK__HASH_INITIAL(DUK_HSTRING_GET_HASH(key), new_h_size)); + /* Guaranteed to finish (hash is larger than #props). */ } } } else { @@ -1122,7 +1133,7 @@ DUK_INTERNAL void duk_hobject_compact_props(duk_hthread *thr, duk_hobject *obj) } #if defined(DUK_USE_HOBJECT_HASH_PART) - if (e_size >= DUK_HOBJECT_E_USE_HASH_LIMIT) { + if (e_size >= DUK_USE_HOBJECT_HASH_PROP_LIMIT) { h_size = duk__get_default_h_size(e_size); } else { h_size = 0; @@ -1183,13 +1194,15 @@ DUK_INTERNAL void duk_hobject_find_existing_entry(duk_heap *heap, duk_hobject *o duk_uint32_t n; duk_uint32_t i, step; duk_uint32_t *h_base; + duk_uint32_t mask; DUK_DDD(DUK_DDDPRINT("duk_hobject_find_existing_entry() using hash part for lookup")); h_base = DUK_HOBJECT_H_GET_BASE(heap, obj); n = DUK_HOBJECT_GET_HSIZE(obj); - i = DUK__HASH_INITIAL(DUK_HSTRING_GET_HASH(key), n); - step = DUK__HASH_PROBE_STEP(DUK_HSTRING_GET_HASH(key)); + mask = n - 1; + i = DUK_HSTRING_GET_HASH(key) & mask; + step = 1; /* Cache friendly but clustering prone. */ for (;;) { duk_uint32_t t; @@ -1217,10 +1230,9 @@ DUK_INTERNAL void duk_hobject_find_existing_entry(duk_heap *heap, duk_hobject *o DUK_DDD(DUK_DDDPRINT("lookup miss i=%ld, t=%ld", (long) i, (long) t)); } - i = (i + step) % n; + i = (i + step) & mask; - /* guaranteed to finish, as hash is never full */ - DUK_ASSERT(i != (duk_uint32_t) DUK__HASH_INITIAL(DUK_HSTRING_GET_HASH(key), n)); + /* Guaranteed to finish (hash is larger than #props). */ } } #endif /* DUK_USE_HOBJECT_HASH_PART */ @@ -1325,13 +1337,14 @@ DUK_LOCAL duk_bool_t duk__alloc_entry_checked(duk_hthread *thr, duk_hobject *obj #if defined(DUK_USE_HOBJECT_HASH_PART) if (DUK_UNLIKELY(DUK_HOBJECT_GET_HSIZE(obj) > 0)) { - duk_uint32_t n; + duk_uint32_t n, mask; duk_uint32_t i, step; duk_uint32_t *h_base = DUK_HOBJECT_H_GET_BASE(thr->heap, obj); n = DUK_HOBJECT_GET_HSIZE(obj); - i = DUK__HASH_INITIAL(DUK_HSTRING_GET_HASH(key), n); - step = DUK__HASH_PROBE_STEP(DUK_HSTRING_GET_HASH(key)); + mask = n - 1; + i = DUK_HSTRING_GET_HASH(key) & mask; + step = 1; /* Cache friendly but clustering prone. */ for (;;) { duk_uint32_t t = h_base[i]; @@ -1346,10 +1359,9 @@ DUK_LOCAL duk_bool_t duk__alloc_entry_checked(duk_hthread *thr, duk_hobject *obj break; } DUK_DDD(DUK_DDDPRINT("duk__alloc_entry_checked() miss %ld", (long) i)); - i = (i + step) % n; + i = (i + step) & mask; - /* guaranteed to find an empty slot */ - DUK_ASSERT(i != (duk_uint32_t) DUK__HASH_INITIAL(DUK_HSTRING_GET_HASH(key), DUK_HOBJECT_GET_HSIZE(obj))); + /* Guaranteed to finish (hash is larger than #props). */ } } #endif /* DUK_USE_HOBJECT_HASH_PART */ diff --git a/src-input/duk_util.h b/src-input/duk_util.h index 56d6661822..4a3ec98522 100644 --- a/src-input/duk_util.h +++ b/src-input/duk_util.h @@ -5,10 +5,6 @@ #if !defined(DUK_UTIL_H_INCLUDED) #define DUK_UTIL_H_INCLUDED -#define DUK_UTIL_MIN_HASH_PRIME 17 /* must match genhashsizes.py */ - -#define DUK_UTIL_GET_HASH_PROBE_STEP(hash) (duk_util_probe_steps[(hash) & 0x1f]) - #if defined(DUK_USE_GET_RANDOM_DOUBLE) #define DUK_UTIL_GET_RANDOM_DOUBLE(thr) DUK_USE_GET_RANDOM_DOUBLE((thr)->heap_udata) #else @@ -504,10 +500,6 @@ DUK_INTERNAL_DECL duk_uint8_t duk_util_probe_steps[32]; DUK_INTERNAL_DECL duk_uint32_t duk_util_hashbytes(const duk_uint8_t *data, duk_size_t len, duk_uint32_t seed); #endif -#if defined(DUK_USE_HOBJECT_HASH_PART) -DUK_INTERNAL_DECL duk_uint32_t duk_util_get_hash_prime(duk_uint32_t size); -#endif - DUK_INTERNAL_DECL duk_uint32_t duk_bd_decode(duk_bitdecoder_ctx *ctx, duk_small_int_t bits); DUK_INTERNAL_DECL duk_small_uint_t duk_bd_decode_flag(duk_bitdecoder_ctx *ctx); DUK_INTERNAL_DECL duk_uint32_t duk_bd_decode_flagged(duk_bitdecoder_ctx *ctx, duk_small_int_t bits, duk_uint32_t def_value); diff --git a/src-input/duk_util_hashprime.c b/src-input/duk_util_hashprime.c deleted file mode 100644 index df2458b28e..0000000000 --- a/src-input/duk_util_hashprime.c +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Round a number upwards to a prime (not usually the nearest one). - * - * Uses a table of successive 32-bit primes whose ratio is roughly - * constant. This keeps the relative upwards 'rounding error' bounded - * and the data size small. A simple 'predict-correct' compression is - * used to compress primes to one byte per prime. See genhashsizes.py - * for details. - * - * The minimum prime returned here must be coordinated with the possible - * probe sequence steps in duk_hobject and duk_heap stringtable. - */ - -#include "duk_internal.h" - -#if defined(DUK_USE_HOBJECT_HASH_PART) - -/* hash size ratio goal, must match genhashsizes.py */ -#define DUK__HASH_SIZE_RATIO 1177 /* floor(1.15 * (1 << 10)) */ - -/* prediction corrections for prime list (see genhashsizes.py) */ -DUK_LOCAL const duk_int8_t duk__hash_size_corrections[] = { - 17, /* minimum prime */ - 4, 3, 4, 1, 4, 1, 1, 2, 2, 2, 2, 1, 6, 6, 9, 5, 1, 2, 2, 5, 1, 3, 3, 3, - 5, 4, 4, 2, 4, 8, 3, 4, 23, 2, 4, 7, 8, 11, 2, 12, 15, 10, 1, 1, 5, 1, 5, - 8, 9, 17, 14, 10, 7, 5, 2, 46, 21, 1, 9, 9, 4, 4, 10, 23, 36, 6, 20, 29, - 18, 6, 19, 21, 16, 11, 5, 5, 48, 9, 1, 39, 14, 8, 4, 29, 9, 1, 15, 48, 12, - 22, 6, 15, 27, 4, 2, 17, 28, 8, 9, 4, 5, 8, 3, 3, 8, 37, 11, 15, 8, 30, - 43, 6, 33, 41, 5, 20, 32, 41, 38, 24, 77, 14, 19, 11, 4, 35, 18, 19, 41, - 10, 23, 16, 9, 2, - -1 -}; - -/* probe steps (see genhashsizes.py), currently assumed to be 32 entries long - * (DUK_UTIL_GET_HASH_PROBE_STEP macro). - */ -DUK_INTERNAL duk_uint8_t duk_util_probe_steps[32] = { - 2, 3, 5, 7, 11, 13, 19, 31, 41, 47, 59, 67, 73, 79, 89, 101, 103, 107, - 109, 127, 137, 139, 149, 157, 163, 167, 173, 181, 191, 193, 197, 199 -}; - -DUK_INTERNAL duk_uint32_t duk_util_get_hash_prime(duk_uint32_t size) { - const duk_int8_t *p = duk__hash_size_corrections; - duk_uint32_t curr; - - curr = (duk_uint32_t) *p++; - for (;;) { - duk_small_int_t t = (duk_small_int_t) *p++; - if (t < 0) { - /* may happen if size is very close to 2^32-1 */ - break; - } - - /* prediction: portable variant using doubles if 64-bit values not available */ -#if defined(DUK_USE_64BIT_OPS) - curr = (duk_uint32_t) ((((duk_uint64_t) curr) * ((duk_uint64_t) DUK__HASH_SIZE_RATIO)) >> 10); -#else - /* 32-bit x 11-bit = 43-bit, fits accurately into a double */ - curr = (duk_uint32_t) DUK_FLOOR(((double) curr) * ((double) DUK__HASH_SIZE_RATIO) / 1024.0); -#endif - - /* correction */ - curr += t; - - DUK_DDD(DUK_DDDPRINT("size=%ld, curr=%ld", (long) size, (long) curr)); - - if (curr >= size) { - return curr; - } - } - return 0; -} - -#endif /* DUK_USE_HOBJECT_HASH_PART */ diff --git a/tests/perf/test-prop-read-1024.js b/tests/perf/test-prop-read-1024.js new file mode 100644 index 0000000000..85e033bfdd --- /dev/null +++ b/tests/perf/test-prop-read-1024.js @@ -0,0 +1,42 @@ +/* + * Basic property read performance + */ + +// XXX: Perf tests should access all or many of the properties to get a +// better sense of the average case. Tests should also cover cases where +// an object has tens of millions of properties which are accessed in a +// pseudorandom sequence (computed beforehand and reused?) which has +// different cache behavior + +if (typeof print !== 'function') { print = console.log; } + +function test() { + var obj = {}; + var i; + var ign; + + for (i = 0; i < 1024 - 1; i++) { + obj['prop-' + i] = 1; + } + obj['foo'] = 123; + + for (i = 0; i < 1e7; i++) { + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + } +} + +try { + test(); +} catch (e) { + print(e.stack || e); + throw e; +} diff --git a/tests/perf/test-prop-read-16.js b/tests/perf/test-prop-read-16.js new file mode 100644 index 0000000000..9eb8b88b4f --- /dev/null +++ b/tests/perf/test-prop-read-16.js @@ -0,0 +1,36 @@ +/* + * Basic property read performance + */ + +if (typeof print !== 'function') { print = console.log; } + +function test() { + var obj = {}; + var i; + var ign; + + for (i = 0; i < 16 - 1; i++) { + obj['prop-' + i] = 1; + } + obj['foo'] = 123; + + for (i = 0; i < 1e7; i++) { + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + } +} + +try { + test(); +} catch (e) { + print(e.stack || e); + throw e; +} diff --git a/tests/perf/test-prop-read-256.js b/tests/perf/test-prop-read-256.js new file mode 100644 index 0000000000..e30015f9f6 --- /dev/null +++ b/tests/perf/test-prop-read-256.js @@ -0,0 +1,36 @@ +/* + * Basic property read performance + */ + +if (typeof print !== 'function') { print = console.log; } + +function test() { + var obj = {}; + var i; + var ign; + + for (i = 0; i < 256 - 1; i++) { + obj['prop-' + i] = 1; + } + obj['foo'] = 123; + + for (i = 0; i < 1e7; i++) { + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + } +} + +try { + test(); +} catch (e) { + print(e.stack || e); + throw e; +} diff --git a/tests/perf/test-prop-read-32.js b/tests/perf/test-prop-read-32.js new file mode 100644 index 0000000000..79d811ce63 --- /dev/null +++ b/tests/perf/test-prop-read-32.js @@ -0,0 +1,36 @@ +/* + * Basic property read performance + */ + +if (typeof print !== 'function') { print = console.log; } + +function test() { + var obj = {}; + var i; + var ign; + + for (i = 0; i < 32 - 1; i++) { + obj['prop-' + i] = 1; + } + obj['foo'] = 123; + + for (i = 0; i < 1e7; i++) { + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + } +} + +try { + test(); +} catch (e) { + print(e.stack || e); + throw e; +} diff --git a/tests/perf/test-prop-read-48.js b/tests/perf/test-prop-read-48.js new file mode 100644 index 0000000000..f02d86ce79 --- /dev/null +++ b/tests/perf/test-prop-read-48.js @@ -0,0 +1,36 @@ +/* + * Basic property read performance + */ + +if (typeof print !== 'function') { print = console.log; } + +function test() { + var obj = {}; + var i; + var ign; + + for (i = 0; i < 48 - 1; i++) { + obj['prop-' + i] = 1; + } + obj['foo'] = 123; + + for (i = 0; i < 1e7; i++) { + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + } +} + +try { + test(); +} catch (e) { + print(e.stack || e); + throw e; +} diff --git a/tests/perf/test-prop-read-64.js b/tests/perf/test-prop-read-64.js new file mode 100644 index 0000000000..3abd6c8b39 --- /dev/null +++ b/tests/perf/test-prop-read-64.js @@ -0,0 +1,36 @@ +/* + * Basic property read performance + */ + +if (typeof print !== 'function') { print = console.log; } + +function test() { + var obj = {}; + var i; + var ign; + + for (i = 0; i < 64 - 1; i++) { + obj['prop-' + i] = 1; + } + obj['foo'] = 123; + + for (i = 0; i < 1e7; i++) { + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + ign = obj.foo; + } +} + +try { + test(); +} catch (e) { + print(e.stack || e); + throw e; +} diff --git a/tools/configure.py b/tools/configure.py index ea302c2f4c..9e420abc83 100644 --- a/tools/configure.py +++ b/tools/configure.py @@ -505,7 +505,6 @@ def default_from_script_path(optname, orig, alternatives): 'duk_util_bitencoder.c', 'duk_util.h', 'duk_util_hashbytes.c', - 'duk_util_hashprime.c', 'duk_util_misc.c', 'duk_util_tinyrandom.c', 'duk_util_bufwriter.c', @@ -896,7 +895,6 @@ def select_combined_sources(): 'duk_error_macros.c', 'duk_unicode_support.c', 'duk_util_misc.c', - 'duk_util_hashprime.c', 'duk_hobject_class.c' ] diff --git a/util/dist.py b/util/dist.py index ba1ca4c941..2adb6b06fc 100644 --- a/util/dist.py +++ b/util/dist.py @@ -423,7 +423,6 @@ def main(): 'duk_util_bufwriter.c', 'duk_util.h', 'duk_util_hashbytes.c', - 'duk_util_hashprime.c', 'duk_util_misc.c', 'duk_util_tinyrandom.c', 'strings.yaml', diff --git a/util/genhashsizes.py b/util/genhashsizes.py deleted file mode 100644 index bb2236cd74..0000000000 --- a/util/genhashsizes.py +++ /dev/null @@ -1,125 +0,0 @@ -#!/usr/bin/env python2 -# -# Find a sequence of duk_hobject hash sizes which have a desired 'ratio' -# and are primes. Prime hash sizes ensure that all probe sequence values -# (less than hash size) are relatively prime to hash size, i.e. cover the -# entire hash. Prime data is packed into about 1 byte/prime using a -# prediction-correction model. -# -# Also generates a set of probe steps which are relatively prime to every -# hash size. -# - -import sys -import math - -def is_prime(n): - if n == 0: - return False - if n == 1 or n == 2: - return True - - n_limit = int(math.ceil(float(n) ** 0.5)) + 1 - n_limit += 100 # paranoia - if n_limit >= n: - n_limit = n - 1 - for i in xrange(2,n_limit + 1): - if (n % i) == 0: - return False - return True - -def next_prime(n): - while True: - n += 1 - if is_prime(n): - return n - -def generate_sizes(min_size, max_size, step_ratio): - "Generate a set of hash sizes following a nice ratio." - - sizes = [] - ratios = [] - curr = next_prime(min_size) - next = curr - sizes.append(curr) - - step_ratio = float(step_ratio) / 1024 - - while True: - if next > max_size: - break - ratio = float(next) / float(curr) - if ratio < step_ratio: - next = next_prime(next) - continue - sys.stdout.write('.'); sys.stdout.flush() - sizes.append(next) - ratios.append(ratio) - curr = next - next = next_prime(int(next * step_ratio)) - - sys.stdout.write('\n'); sys.stdout.flush() - return sizes, ratios - -def generate_corrections(sizes, step_ratio): - "Generate a set of correction from a ratio-based predictor." - - # Generate a correction list for size list, assuming steps follow a certain - # ratio; this allows us to pack size list into one byte per size - - res = [] - - res.append(sizes[0]) # first entry is first size - - for i in xrange(1, len(sizes)): - prev = sizes[i - 1] - pred = int(prev * step_ratio) >> 10 - diff = int(sizes[i] - pred) - res.append(diff) - - if diff < 0 or diff > 127: - raise Exception('correction does not fit into 8 bits') - - res.append(-1) # negative denotes last end of list - return res - -def generate_probes(count, sizes): - res = [] - - # Generate probe values which are guaranteed to be relatively prime to - # all generated hash size primes. These don't have to be primes, but - # we currently use smallest non-conflicting primes here. - - i = 2 - while len(res) < count: - if is_prime(i) and (i not in sizes): - if i > 255: - raise Exception('probe step does not fit into 8 bits') - res.append(i) - i += 1 - continue - i += 1 - - return res - -# NB: these must match duk_hobject defines and code -step_ratio = 1177 # approximately (1.15 * (1 << 10)) -min_size = 16 -max_size = 2**32 - 1 - -sizes, ratios = generate_sizes(min_size, max_size, step_ratio) -corrections = generate_corrections(sizes, step_ratio) -probes = generate_probes(32, sizes) -print len(sizes) -print 'SIZES: ' + repr(sizes) -print 'RATIOS: ' + repr(ratios) -print 'CORRECTIONS: ' + repr(corrections) -print 'PROBES: ' + repr(probes) - -# highest 32-bit prime -i = 2**32 -while True: - i -= 1 - if is_prime(i): - print 'highest 32-bit prime is: %d (0x%08x)' % (i, i) - break