Ruby 3.2.2p53 (2023-03-30 revision e51014f9c05aa65cbf203442d37fef7c12390015)
st.c
1/* This is a public domain general purpose hash table package
2 originally written by Peter Moore @ UCB.
3
4 The hash table data structures were redesigned and the package was
5 rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
6
7/* The original package implemented classic bucket-based hash tables
8 with entries doubly linked for an access by their insertion order.
9 To decrease pointer chasing and as a consequence to improve a data
10 locality the current implementation is based on storing entries in
11 an array and using hash tables with open addressing. The current
12 entries are more compact in comparison with the original ones and
13 this also improves the data locality.
14
15 The hash table has two arrays called *bins* and *entries*.
16
17 bins:
18 -------
19 | | entries array:
20 |-------| --------------------------------
21 | index | | | entry: | | |
22 |-------| | | | | |
23 | ... | | ... | hash | ... | ... |
24 |-------| | | key | | |
25 | empty | | | record | | |
26 |-------| --------------------------------
27 | ... | ^ ^
28 |-------| |_ entries start |_ entries bound
29 |deleted|
30 -------
31
32 o The entry array contains table entries in the same order as they
33 were inserted.
34
35 When the first entry is deleted, a variable containing index of
36 the current first entry (*entries start*) is changed. In all
37 other cases of the deletion, we just mark the entry as deleted by
38 using a reserved hash value.
39
40 Such organization of the entry storage makes operations of the
41 table shift and the entries traversal very fast.
42
43 o The bins provide access to the entries by their keys. The
44 key hash is mapped to a bin containing *index* of the
45 corresponding entry in the entry array.
46
47 The bin array size is always power of two, it makes mapping very
48 fast by using the corresponding lower bits of the hash.
49 Generally it is not a good idea to ignore some part of the hash.
50 But alternative approach is worse. For example, we could use a
51 modulo operation for mapping and a prime number for the size of
52 the bin array. Unfortunately, the modulo operation for big
53 64-bit numbers are extremely slow (it takes more than 100 cycles
54 on modern Intel CPUs).
55
56 Still other bits of the hash value are used when the mapping
57 results in a collision. In this case we use a secondary hash
58 value which is a result of a function of the collision bin
59 index and the original hash value. The function choice
60 guarantees that we can traverse all bins and finally find the
61 corresponding bin as after several iterations the function
62 becomes a full cycle linear congruential generator because it
63 satisfies requirements of the Hull-Dobell theorem.
64
65 When an entry is removed from the table besides marking the
66 hash in the corresponding entry described above, we also mark
67 the bin by a special value in order to find entries which had
68 a collision with the removed entries.
69
70 There are two reserved values for the bins. One denotes an
71 empty bin, another one denotes a bin for a deleted entry.
72
73 o The length of the bin array is at least two times more than the
74 entry array length. This keeps the table load factor healthy.
75 The trigger of rebuilding the table is always a case when we can
76 not insert an entry anymore at the entries bound. We could
77 change the entries bound too in case of deletion but than we need
78 a special code to count bins with corresponding deleted entries
79 and reset the bin values when there are too many bins
80 corresponding deleted entries
81
82 Table rebuilding is done by creation of a new entry array and
83 bins of an appropriate size. We also try to reuse the arrays
84 in some cases by compacting the array and removing deleted
85 entries.
86
87 o To save memory very small tables have no allocated arrays
88 bins. We use a linear search for an access by a key.
89
90 o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
91 bins depending on the current hash table size.
92
93 o The implementation takes into account that the table can be
94 rebuilt during hashing or comparison functions. It can happen if
95 the functions are implemented in Ruby and a thread switch occurs
96 during their execution.
97
98 This implementation speeds up the Ruby hash table benchmarks in
99 average by more 40% on Intel Haswell CPU.
100
101*/
102
103#ifdef NOT_RUBY
104#include "regint.h"
105#include "st.h"
106#else
107#include "internal.h"
108#include "internal/bits.h"
109#include "internal/hash.h"
110#include "internal/sanitizers.h"
111#endif
112
113#include <stdio.h>
114#ifdef HAVE_STDLIB_H
115#include <stdlib.h>
116#endif
117#include <string.h>
118#include <assert.h>
119
120#ifdef __GNUC__
121#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
122#define EXPECT(expr, val) __builtin_expect(expr, val)
123#define ATTRIBUTE_UNUSED __attribute__((unused))
124#else
125#define PREFETCH(addr, write_p)
126#define EXPECT(expr, val) (expr)
127#define ATTRIBUTE_UNUSED
128#endif
129
130/* The type of hashes. */
131typedef st_index_t st_hash_t;
132
134 st_hash_t hash;
135 st_data_t key;
136 st_data_t record;
137};
138
139#define type_numhash st_hashtype_num
140static const struct st_hash_type st_hashtype_num = {
141 st_numcmp,
142 st_numhash,
143};
144
145static int st_strcmp(st_data_t, st_data_t);
146static st_index_t strhash(st_data_t);
147static const struct st_hash_type type_strhash = {
148 st_strcmp,
149 strhash,
150};
151
152static int st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs);
153static st_index_t strcasehash(st_data_t);
154static const struct st_hash_type type_strcasehash = {
155 st_locale_insensitive_strcasecmp_i,
156 strcasehash,
157};
158
159/* Value used to catch uninitialized entries/bins during debugging.
160 There is a possibility for a false alarm, but its probability is
161 extremely small. */
162#define ST_INIT_VAL 0xafafafafafafafaf
163#define ST_INIT_VAL_BYTE 0xafa
164
165#ifdef RUBY
166#undef malloc
167#undef realloc
168#undef calloc
169#undef free
170#define malloc ruby_xmalloc
171#define calloc ruby_xcalloc
172#define realloc ruby_xrealloc
173#define free ruby_xfree
174#endif
175
176#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
177#define PTR_EQUAL(tab, ptr, hash_val, key_) \
178 ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
179
180/* As PTR_EQUAL only its result is returned in RES. REBUILT_P is set
181 up to TRUE if the table is rebuilt during the comparison. */
182#define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
183 do { \
184 unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \
185 res = PTR_EQUAL(tab, ptr, hash_val, key); \
186 rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \
187 } while (FALSE)
188
189/* Features of a table. */
191 /* Power of 2 used for number of allocated entries. */
192 unsigned char entry_power;
193 /* Power of 2 used for number of allocated bins. Depending on the
194 table size, the number of bins is 2-4 times more than the
195 number of entries. */
196 unsigned char bin_power;
197 /* Enumeration of sizes of bins (8-bit, 16-bit etc). */
198 unsigned char size_ind;
199 /* Bins are packed in words of type st_index_t. The following is
200 a size of bins counted by words. */
201 st_index_t bins_words;
202};
203
204/* Features of all possible size tables. */
205#if SIZEOF_ST_INDEX_T == 8
206#define MAX_POWER2 62
207static const struct st_features features[] = {
208 {0, 1, 0, 0x0},
209 {1, 2, 0, 0x1},
210 {2, 3, 0, 0x1},
211 {3, 4, 0, 0x2},
212 {4, 5, 0, 0x4},
213 {5, 6, 0, 0x8},
214 {6, 7, 0, 0x10},
215 {7, 8, 0, 0x20},
216 {8, 9, 1, 0x80},
217 {9, 10, 1, 0x100},
218 {10, 11, 1, 0x200},
219 {11, 12, 1, 0x400},
220 {12, 13, 1, 0x800},
221 {13, 14, 1, 0x1000},
222 {14, 15, 1, 0x2000},
223 {15, 16, 1, 0x4000},
224 {16, 17, 2, 0x10000},
225 {17, 18, 2, 0x20000},
226 {18, 19, 2, 0x40000},
227 {19, 20, 2, 0x80000},
228 {20, 21, 2, 0x100000},
229 {21, 22, 2, 0x200000},
230 {22, 23, 2, 0x400000},
231 {23, 24, 2, 0x800000},
232 {24, 25, 2, 0x1000000},
233 {25, 26, 2, 0x2000000},
234 {26, 27, 2, 0x4000000},
235 {27, 28, 2, 0x8000000},
236 {28, 29, 2, 0x10000000},
237 {29, 30, 2, 0x20000000},
238 {30, 31, 2, 0x40000000},
239 {31, 32, 2, 0x80000000},
240 {32, 33, 3, 0x200000000},
241 {33, 34, 3, 0x400000000},
242 {34, 35, 3, 0x800000000},
243 {35, 36, 3, 0x1000000000},
244 {36, 37, 3, 0x2000000000},
245 {37, 38, 3, 0x4000000000},
246 {38, 39, 3, 0x8000000000},
247 {39, 40, 3, 0x10000000000},
248 {40, 41, 3, 0x20000000000},
249 {41, 42, 3, 0x40000000000},
250 {42, 43, 3, 0x80000000000},
251 {43, 44, 3, 0x100000000000},
252 {44, 45, 3, 0x200000000000},
253 {45, 46, 3, 0x400000000000},
254 {46, 47, 3, 0x800000000000},
255 {47, 48, 3, 0x1000000000000},
256 {48, 49, 3, 0x2000000000000},
257 {49, 50, 3, 0x4000000000000},
258 {50, 51, 3, 0x8000000000000},
259 {51, 52, 3, 0x10000000000000},
260 {52, 53, 3, 0x20000000000000},
261 {53, 54, 3, 0x40000000000000},
262 {54, 55, 3, 0x80000000000000},
263 {55, 56, 3, 0x100000000000000},
264 {56, 57, 3, 0x200000000000000},
265 {57, 58, 3, 0x400000000000000},
266 {58, 59, 3, 0x800000000000000},
267 {59, 60, 3, 0x1000000000000000},
268 {60, 61, 3, 0x2000000000000000},
269 {61, 62, 3, 0x4000000000000000},
270 {62, 63, 3, 0x8000000000000000},
271};
272
273#else
274#define MAX_POWER2 30
275
276static const struct st_features features[] = {
277 {0, 1, 0, 0x1},
278 {1, 2, 0, 0x1},
279 {2, 3, 0, 0x2},
280 {3, 4, 0, 0x4},
281 {4, 5, 0, 0x8},
282 {5, 6, 0, 0x10},
283 {6, 7, 0, 0x20},
284 {7, 8, 0, 0x40},
285 {8, 9, 1, 0x100},
286 {9, 10, 1, 0x200},
287 {10, 11, 1, 0x400},
288 {11, 12, 1, 0x800},
289 {12, 13, 1, 0x1000},
290 {13, 14, 1, 0x2000},
291 {14, 15, 1, 0x4000},
292 {15, 16, 1, 0x8000},
293 {16, 17, 2, 0x20000},
294 {17, 18, 2, 0x40000},
295 {18, 19, 2, 0x80000},
296 {19, 20, 2, 0x100000},
297 {20, 21, 2, 0x200000},
298 {21, 22, 2, 0x400000},
299 {22, 23, 2, 0x800000},
300 {23, 24, 2, 0x1000000},
301 {24, 25, 2, 0x2000000},
302 {25, 26, 2, 0x4000000},
303 {26, 27, 2, 0x8000000},
304 {27, 28, 2, 0x10000000},
305 {28, 29, 2, 0x20000000},
306 {29, 30, 2, 0x40000000},
307 {30, 31, 2, 0x80000000},
308};
309
310#endif
311
312/* The reserved hash value and its substitution. */
313#define RESERVED_HASH_VAL (~(st_hash_t) 0)
314#define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
315
316/* Return hash value of KEY for table TAB. */
317static inline st_hash_t
318do_hash(st_data_t key, st_table *tab)
319{
320 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
321
322 /* RESERVED_HASH_VAL is used for a deleted entry. Map it into
323 another value. Such mapping should be extremely rare. */
324 return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
325}
326
327/* Power of 2 defining the minimal number of allocated entries. */
328#define MINIMAL_POWER2 2
329
330#if MINIMAL_POWER2 < 2
331#error "MINIMAL_POWER2 should be >= 2"
332#endif
333
334/* If the power2 of the allocated `entries` is less than the following
335 value, don't allocate bins and use a linear search. */
336#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
337
338/* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */
339static int
340get_power2(st_index_t size)
341{
342 unsigned int n = ST_INDEX_BITS - nlz_intptr(size);
343 if (n <= MAX_POWER2)
344 return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
345#ifndef NOT_RUBY
346 /* Ran out of the table entries */
347 rb_raise(rb_eRuntimeError, "st_table too big");
348#endif
349 /* should raise exception */
350 return -1;
351}
352
353/* Return value of N-th bin in array BINS of table with bins size
354 index S. */
355static inline st_index_t
356get_bin(st_index_t *bins, int s, st_index_t n)
357{
358 return (s == 0 ? ((unsigned char *) bins)[n]
359 : s == 1 ? ((unsigned short *) bins)[n]
360 : s == 2 ? ((unsigned int *) bins)[n]
361 : ((st_index_t *) bins)[n]);
362}
363
364/* Set up N-th bin in array BINS of table with bins size index S to
365 value V. */
366static inline void
367set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
368{
369 if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v;
370 else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v;
371 else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v;
372 else ((st_index_t *) bins)[n] = v;
373}
374
375/* These macros define reserved values for empty table bin and table
376 bin which contains a deleted entry. We will never use such values
377 for an entry index in bins. */
378#define EMPTY_BIN 0
379#define DELETED_BIN 1
380/* Base of a real entry index in the bins. */
381#define ENTRY_BASE 2
382
383/* Mark I-th bin of table TAB as empty, in other words not
384 corresponding to any entry. */
385#define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
386
387/* Values used for not found entry and bin with given
388 characteristics. */
389#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
390#define UNDEFINED_BIN_IND (~(st_index_t) 0)
391
392/* Entry and bin values returned when we found a table rebuild during
393 the search. */
394#define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
395#define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
396
397/* Mark I-th bin of table TAB as corresponding to a deleted table
398 entry. Update number of entries in the table and number of bins
399 corresponding to deleted entries. */
400#define MARK_BIN_DELETED(tab, i) \
401 do { \
402 set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
403 } while (0)
404
405/* Macros to check that value B is used empty bins and bins
406 corresponding deleted entries. */
407#define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
408#define DELETED_BIN_P(b) ((b) == DELETED_BIN)
409#define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
410
411/* Macros to check empty bins and bins corresponding to deleted
412 entries. Bins are given by their index I in table TAB. */
413#define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
414#define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
415#define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
416
417/* Macros for marking and checking deleted entries given by their
418 pointer E_PTR. */
419#define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
420#define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
421
422/* Return bin size index of table TAB. */
423static inline unsigned int
424get_size_ind(const st_table *tab)
425{
426 return tab->size_ind;
427}
428
429/* Return the number of allocated bins of table TAB. */
430static inline st_index_t
431get_bins_num(const st_table *tab)
432{
433 return ((st_index_t) 1)<<tab->bin_power;
434}
435
436/* Return mask for a bin index in table TAB. */
437static inline st_index_t
438bins_mask(const st_table *tab)
439{
440 return get_bins_num(tab) - 1;
441}
442
443/* Return the index of table TAB bin corresponding to
444 HASH_VALUE. */
445static inline st_index_t
446hash_bin(st_hash_t hash_value, st_table *tab)
447{
448 return hash_value & bins_mask(tab);
449}
450
451/* Return the number of allocated entries of table TAB. */
452static inline st_index_t
453get_allocated_entries(const st_table *tab)
454{
455 return ((st_index_t) 1)<<tab->entry_power;
456}
457
458/* Return size of the allocated bins of table TAB. */
459static inline st_index_t
460bins_size(const st_table *tab)
461{
462 return features[tab->entry_power].bins_words * sizeof (st_index_t);
463}
464
465/* Mark all bins of table TAB as empty. */
466static void
467initialize_bins(st_table *tab)
468{
469 memset(tab->bins, 0, bins_size(tab));
470}
471
472/* Make table TAB empty. */
473static void
474make_tab_empty(st_table *tab)
475{
476 tab->num_entries = 0;
477 tab->entries_start = tab->entries_bound = 0;
478 if (tab->bins != NULL)
479 initialize_bins(tab);
480}
481
482#ifdef HASH_LOG
483#ifdef HAVE_UNISTD_H
484#include <unistd.h>
485#endif
486static struct {
487 int all, total, num, str, strcase;
488} collision;
489
490/* Flag switching off output of package statistics at the end of
491 program. */
492static int init_st = 0;
493
494/* Output overall number of table searches and collisions into a
495 temporary file. */
496static void
497stat_col(void)
498{
499 char fname[10+sizeof(long)*3];
500 FILE *f;
501 if (!collision.total) return;
502 f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w");
503 if (f == NULL)
504 return;
505 fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total,
506 ((double)collision.all / (collision.total)) * 100);
507 fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
508 fclose(f);
509}
510#endif
511
512/* Create and return table with TYPE which can hold at least SIZE
513 entries. The real number of entries which the table can hold is
514 the nearest power of two for SIZE. */
515st_table *
516st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
517{
518 st_table *tab;
519 int n;
520
521#ifdef HASH_LOG
522#if HASH_LOG+0 < 0
523 {
524 const char *e = getenv("ST_HASH_LOG");
525 if (!e || !*e) init_st = 1;
526 }
527#endif
528 if (init_st == 0) {
529 init_st = 1;
530 atexit(stat_col);
531 }
532#endif
533
534 n = get_power2(size);
535#ifndef RUBY
536 if (n < 0)
537 return NULL;
538#endif
539 tab = (st_table *) malloc(sizeof (st_table));
540#ifndef RUBY
541 if (tab == NULL)
542 return NULL;
543#endif
544 tab->type = type;
545 tab->entry_power = n;
546 tab->bin_power = features[n].bin_power;
547 tab->size_ind = features[n].size_ind;
548 if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
549 tab->bins = NULL;
550 else {
551 tab->bins = (st_index_t *) malloc(bins_size(tab));
552#ifndef RUBY
553 if (tab->bins == NULL) {
554 free(tab);
555 return NULL;
556 }
557#endif
558 }
559 tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab)
560 * sizeof(st_table_entry));
561#ifndef RUBY
562 if (tab->entries == NULL) {
563 st_free_table(tab);
564 return NULL;
565 }
566#endif
567 make_tab_empty(tab);
568 tab->rebuilds_num = 0;
569 return tab;
570}
571
572/* Create and return table with TYPE which can hold a minimal number
573 of entries (see comments for get_power2). */
574st_table *
575st_init_table(const struct st_hash_type *type)
576{
577 return st_init_table_with_size(type, 0);
578}
579
580/* Create and return table which can hold a minimal number of
581 numbers. */
582st_table *
583st_init_numtable(void)
584{
585 return st_init_table(&type_numhash);
586}
587
588/* Create and return table which can hold SIZE numbers. */
589st_table *
590st_init_numtable_with_size(st_index_t size)
591{
592 return st_init_table_with_size(&type_numhash, size);
593}
594
595/* Create and return table which can hold a minimal number of
596 strings. */
597st_table *
598st_init_strtable(void)
599{
600 return st_init_table(&type_strhash);
601}
602
603/* Create and return table which can hold SIZE strings. */
604st_table *
605st_init_strtable_with_size(st_index_t size)
606{
607 return st_init_table_with_size(&type_strhash, size);
608}
609
610/* Create and return table which can hold a minimal number of strings
611 whose character case is ignored. */
612st_table *
613st_init_strcasetable(void)
614{
615 return st_init_table(&type_strcasehash);
616}
617
618/* Create and return table which can hold SIZE strings whose character
619 case is ignored. */
620st_table *
621st_init_strcasetable_with_size(st_index_t size)
622{
623 return st_init_table_with_size(&type_strcasehash, size);
624}
625
626/* Make table TAB empty. */
627void
628st_clear(st_table *tab)
629{
630 make_tab_empty(tab);
631 tab->rebuilds_num++;
632}
633
634/* Free table TAB space. */
635void
636st_free_table(st_table *tab)
637{
638 if (tab->bins != NULL)
639 free(tab->bins);
640 free(tab->entries);
641 free(tab);
642}
643
644/* Return byte size of memory allocated for table TAB. */
645size_t
646st_memsize(const st_table *tab)
647{
648 return(sizeof(st_table)
649 + (tab->bins == NULL ? 0 : bins_size(tab))
650 + get_allocated_entries(tab) * sizeof(st_table_entry));
651}
652
653static st_index_t
654find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
655
656static st_index_t
657find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
658
659static st_index_t
660find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
661
662static st_index_t
663find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
664 st_data_t key, st_index_t *bin_ind);
665
666#ifdef HASH_LOG
667static void
668count_collision(const struct st_hash_type *type)
669{
670 collision.all++;
671 if (type == &type_numhash) {
672 collision.num++;
673 }
674 else if (type == &type_strhash) {
675 collision.strcase++;
676 }
677 else if (type == &type_strcasehash) {
678 collision.str++;
679 }
680}
681
682#define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
683#define FOUND_BIN (collision_check ? collision.total++ : (void)0)
684#define collision_check 0
685#else
686#define COLLISION
687#define FOUND_BIN
688#endif
689
690/* If the number of entries in the table is at least REBUILD_THRESHOLD
691 times less than the entry array length, decrease the table
692 size. */
693#define REBUILD_THRESHOLD 4
694
695#if REBUILD_THRESHOLD < 2
696#error "REBUILD_THRESHOLD should be >= 2"
697#endif
698
699/* Rebuild table TAB. Rebuilding removes all deleted bins and entries
700 and can change size of the table entries and bins arrays.
701 Rebuilding is implemented by creation of a new table or by
702 compaction of the existing one. */
703static void
704rebuild_table(st_table *tab)
705{
706 st_index_t i, ni;
707 unsigned int size_ind;
708 st_table *new_tab;
709 st_table_entry *new_entries;
710 st_table_entry *curr_entry_ptr;
711 st_index_t *bins;
712 st_index_t bin_ind;
713
714 if ((2 * tab->num_entries <= get_allocated_entries(tab)
715 && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
716 || tab->num_entries < (1 << MINIMAL_POWER2)) {
717 /* Compaction: */
718 tab->num_entries = 0;
719 if (tab->bins != NULL)
720 initialize_bins(tab);
721 new_tab = tab;
722 new_entries = tab->entries;
723 }
724 else {
725 /* This allocation could trigger GC and compaction. If tab is the
726 * gen_iv_tbl, then tab could have changed in size due to objects being
727 * freed and/or moved. Do not store attributes of tab before this line. */
728 new_tab = st_init_table_with_size(tab->type,
729 2 * tab->num_entries - 1);
730 new_entries = new_tab->entries;
731 }
732
733 ni = 0;
734 bins = new_tab->bins;
735 size_ind = get_size_ind(new_tab);
736 st_index_t bound = tab->entries_bound;
737 st_table_entry *entries = tab->entries;
738
739 for (i = tab->entries_start; i < bound; i++) {
740 curr_entry_ptr = &entries[i];
741 PREFETCH(entries + i + 1, 0);
742 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
743 continue;
744 if (&new_entries[ni] != curr_entry_ptr)
745 new_entries[ni] = *curr_entry_ptr;
746 if (EXPECT(bins != NULL, 1)) {
747 bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
748 curr_entry_ptr->key);
749 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
750 }
751 new_tab->num_entries++;
752 ni++;
753 }
754 if (new_tab != tab) {
755 tab->entry_power = new_tab->entry_power;
756 tab->bin_power = new_tab->bin_power;
757 tab->size_ind = new_tab->size_ind;
758 if (tab->bins != NULL)
759 free(tab->bins);
760 tab->bins = new_tab->bins;
761 free(tab->entries);
762 tab->entries = new_tab->entries;
763 free(new_tab);
764 }
765 tab->entries_start = 0;
766 tab->entries_bound = tab->num_entries;
767 tab->rebuilds_num++;
768}
769
770/* Return the next secondary hash index for table TAB using previous
771 index IND and PERTERB. Finally modulo of the function becomes a
772 full *cycle linear congruential generator*, in other words it
773 guarantees traversing all table bins in extreme case.
774
775 According the Hull-Dobell theorem a generator
776 "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
777 o m and c are relatively prime
778 o a-1 is divisible by all prime factors of m
779 o a-1 is divisible by 4 if m is divisible by 4.
780
781 For our case a is 5, c is 1, and m is a power of two. */
782static inline st_index_t
783secondary_hash(st_index_t ind, st_table *tab, st_index_t *perterb)
784{
785 *perterb >>= 11;
786 ind = (ind << 2) + ind + *perterb + 1;
787 return hash_bin(ind, tab);
788}
789
790/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
791 search. Return the index of the found entry in array `entries`.
792 If it is not found, return UNDEFINED_ENTRY_IND. If the table was
793 rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
794static inline st_index_t
795find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
796{
797 int eq_p, rebuilt_p;
798 st_index_t i, bound;
799 st_table_entry *entries;
800
801 bound = tab->entries_bound;
802 entries = tab->entries;
803 for (i = tab->entries_start; i < bound; i++) {
804 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
805 if (EXPECT(rebuilt_p, 0))
806 return REBUILT_TABLE_ENTRY_IND;
807 if (eq_p)
808 return i;
809 }
810 return UNDEFINED_ENTRY_IND;
811}
812
813/* Use the quadratic probing. The method has a better data locality
814 but more collisions than the current approach. In average it
815 results in a bit slower search. */
816/*#define QUADRATIC_PROBE*/
817
818/* Return index of entry with HASH_VALUE and KEY in table TAB. If
819 there is no such entry, return UNDEFINED_ENTRY_IND. If the table
820 was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
821static st_index_t
822find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
823{
824 int eq_p, rebuilt_p;
825 st_index_t ind;
826#ifdef QUADRATIC_PROBE
827 st_index_t d;
828#else
829 st_index_t peterb;
830#endif
831 st_index_t bin;
832 st_table_entry *entries = tab->entries;
833
834 ind = hash_bin(hash_value, tab);
835#ifdef QUADRATIC_PROBE
836 d = 1;
837#else
838 peterb = hash_value;
839#endif
840 FOUND_BIN;
841 for (;;) {
842 bin = get_bin(tab->bins, get_size_ind(tab), ind);
843 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
844 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
845 if (EXPECT(rebuilt_p, 0))
846 return REBUILT_TABLE_ENTRY_IND;
847 if (eq_p)
848 break;
849 }
850 else if (EMPTY_BIN_P(bin))
851 return UNDEFINED_ENTRY_IND;
852#ifdef QUADRATIC_PROBE
853 ind = hash_bin(ind + d, tab);
854 d++;
855#else
856 ind = secondary_hash(ind, tab, &peterb);
857#endif
858 COLLISION;
859 }
860 return bin;
861}
862
863/* Find and return index of table TAB bin corresponding to an entry
864 with HASH_VALUE and KEY. If there is no such bin, return
865 UNDEFINED_BIN_IND. If the table was rebuilt during the search,
866 return REBUILT_TABLE_BIN_IND. */
867static st_index_t
868find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
869{
870 int eq_p, rebuilt_p;
871 st_index_t ind;
872#ifdef QUADRATIC_PROBE
873 st_index_t d;
874#else
875 st_index_t peterb;
876#endif
877 st_index_t bin;
878 st_table_entry *entries = tab->entries;
879
880 ind = hash_bin(hash_value, tab);
881#ifdef QUADRATIC_PROBE
882 d = 1;
883#else
884 peterb = hash_value;
885#endif
886 FOUND_BIN;
887 for (;;) {
888 bin = get_bin(tab->bins, get_size_ind(tab), ind);
889 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
890 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
891 if (EXPECT(rebuilt_p, 0))
892 return REBUILT_TABLE_BIN_IND;
893 if (eq_p)
894 break;
895 }
896 else if (EMPTY_BIN_P(bin))
897 return UNDEFINED_BIN_IND;
898#ifdef QUADRATIC_PROBE
899 ind = hash_bin(ind + d, tab);
900 d++;
901#else
902 ind = secondary_hash(ind, tab, &peterb);
903#endif
904 COLLISION;
905 }
906 return ind;
907}
908
909/* Find and return index of table TAB bin corresponding to an entry
910 with HASH_VALUE and KEY. The entry should be in the table
911 already. */
912static st_index_t
913find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
914{
915 st_index_t ind;
916#ifdef QUADRATIC_PROBE
917 st_index_t d;
918#else
919 st_index_t peterb;
920#endif
921 st_index_t bin;
922
923 ind = hash_bin(hash_value, tab);
924#ifdef QUADRATIC_PROBE
925 d = 1;
926#else
927 peterb = hash_value;
928#endif
929 FOUND_BIN;
930 for (;;) {
931 bin = get_bin(tab->bins, get_size_ind(tab), ind);
932 if (EMPTY_OR_DELETED_BIN_P(bin))
933 return ind;
934#ifdef QUADRATIC_PROBE
935 ind = hash_bin(ind + d, tab);
936 d++;
937#else
938 ind = secondary_hash(ind, tab, &peterb);
939#endif
940 COLLISION;
941 }
942}
943
944/* Return index of table TAB bin for HASH_VALUE and KEY through
945 BIN_IND and the pointed value as the function result. Reserve the
946 bin for inclusion of the corresponding entry into the table if it
947 is not there yet. We always find such bin as bins array length is
948 bigger entries array. Although we can reuse a deleted bin, the
949 result bin value is always empty if the table has no entry with
950 KEY. Return the entries array index of the found entry or
951 UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
952 during the search, return REBUILT_TABLE_ENTRY_IND. */
953static st_index_t
954find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
955 st_data_t key, st_index_t *bin_ind)
956{
957 int eq_p, rebuilt_p;
958 st_index_t ind;
959 st_hash_t curr_hash_value = *hash_value;
960#ifdef QUADRATIC_PROBE
961 st_index_t d;
962#else
963 st_index_t peterb;
964#endif
965 st_index_t entry_index;
966 st_index_t first_deleted_bin_ind;
967 st_table_entry *entries;
968
969 ind = hash_bin(curr_hash_value, tab);
970#ifdef QUADRATIC_PROBE
971 d = 1;
972#else
973 peterb = curr_hash_value;
974#endif
975 FOUND_BIN;
976 first_deleted_bin_ind = UNDEFINED_BIN_IND;
977 entries = tab->entries;
978 for (;;) {
979 entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
980 if (EMPTY_BIN_P(entry_index)) {
981 tab->num_entries++;
982 entry_index = UNDEFINED_ENTRY_IND;
983 if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
984 /* We can reuse bin of a deleted entry. */
985 ind = first_deleted_bin_ind;
986 MARK_BIN_EMPTY(tab, ind);
987 }
988 break;
989 }
990 else if (! DELETED_BIN_P(entry_index)) {
991 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
992 if (EXPECT(rebuilt_p, 0))
993 return REBUILT_TABLE_ENTRY_IND;
994 if (eq_p)
995 break;
996 }
997 else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
998 first_deleted_bin_ind = ind;
999#ifdef QUADRATIC_PROBE
1000 ind = hash_bin(ind + d, tab);
1001 d++;
1002#else
1003 ind = secondary_hash(ind, tab, &peterb);
1004#endif
1005 COLLISION;
1006 }
1007 *bin_ind = ind;
1008 return entry_index;
1009}
1010
1011/* Find an entry with KEY in table TAB. Return non-zero if we found
1012 it. Set up *RECORD to the found entry record. */
1013int
1014st_lookup(st_table *tab, st_data_t key, st_data_t *value)
1015{
1016 st_index_t bin;
1017 st_hash_t hash = do_hash(key, tab);
1018
1019 retry:
1020 if (tab->bins == NULL) {
1021 bin = find_entry(tab, hash, key);
1022 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1023 goto retry;
1024 if (bin == UNDEFINED_ENTRY_IND)
1025 return 0;
1026 }
1027 else {
1028 bin = find_table_entry_ind(tab, hash, key);
1029 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1030 goto retry;
1031 if (bin == UNDEFINED_ENTRY_IND)
1032 return 0;
1033 bin -= ENTRY_BASE;
1034 }
1035 if (value != 0)
1036 *value = tab->entries[bin].record;
1037 return 1;
1038}
1039
1040/* Find an entry with KEY in table TAB. Return non-zero if we found
1041 it. Set up *RESULT to the found table entry key. */
1042int
1043st_get_key(st_table *tab, st_data_t key, st_data_t *result)
1044{
1045 st_index_t bin;
1046 st_hash_t hash = do_hash(key, tab);
1047
1048 retry:
1049 if (tab->bins == NULL) {
1050 bin = find_entry(tab, hash, key);
1051 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1052 goto retry;
1053 if (bin == UNDEFINED_ENTRY_IND)
1054 return 0;
1055 }
1056 else {
1057 bin = find_table_entry_ind(tab, hash, key);
1058 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1059 goto retry;
1060 if (bin == UNDEFINED_ENTRY_IND)
1061 return 0;
1062 bin -= ENTRY_BASE;
1063 }
1064 if (result != 0)
1065 *result = tab->entries[bin].key;
1066 return 1;
1067}
1068
1069/* Check the table and rebuild it if it is necessary. */
1070static inline void
1071rebuild_table_if_necessary (st_table *tab)
1072{
1073 st_index_t bound = tab->entries_bound;
1074
1075 if (bound == get_allocated_entries(tab))
1076 rebuild_table(tab);
1077}
1078
1079/* Insert (KEY, VALUE) into table TAB and return zero. If there is
1080 already entry with KEY in the table, return nonzero and update
1081 the value of the found entry. */
1082int
1083st_insert(st_table *tab, st_data_t key, st_data_t value)
1084{
1085 st_table_entry *entry;
1086 st_index_t bin;
1087 st_index_t ind;
1088 st_hash_t hash_value;
1089 st_index_t bin_ind;
1090 int new_p;
1091
1092 hash_value = do_hash(key, tab);
1093 retry:
1094 rebuild_table_if_necessary(tab);
1095 if (tab->bins == NULL) {
1096 bin = find_entry(tab, hash_value, key);
1097 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1098 goto retry;
1099 new_p = bin == UNDEFINED_ENTRY_IND;
1100 if (new_p)
1101 tab->num_entries++;
1102 bin_ind = UNDEFINED_BIN_IND;
1103 }
1104 else {
1105 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1106 key, &bin_ind);
1107 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1108 goto retry;
1109 new_p = bin == UNDEFINED_ENTRY_IND;
1110 bin -= ENTRY_BASE;
1111 }
1112 if (new_p) {
1113 ind = tab->entries_bound++;
1114 entry = &tab->entries[ind];
1115 entry->hash = hash_value;
1116 entry->key = key;
1117 entry->record = value;
1118 if (bin_ind != UNDEFINED_BIN_IND)
1119 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1120 return 0;
1121 }
1122 tab->entries[bin].record = value;
1123 return 1;
1124}
1125
1126/* Insert (KEY, VALUE, HASH) into table TAB. The table should not have
1127 entry with KEY before the insertion. */
1128static inline void
1129st_add_direct_with_hash(st_table *tab,
1130 st_data_t key, st_data_t value, st_hash_t hash)
1131{
1132 st_table_entry *entry;
1133 st_index_t ind;
1134 st_index_t bin_ind;
1135
1136 rebuild_table_if_necessary(tab);
1137 ind = tab->entries_bound++;
1138 entry = &tab->entries[ind];
1139 entry->hash = hash;
1140 entry->key = key;
1141 entry->record = value;
1142 tab->num_entries++;
1143 if (tab->bins != NULL) {
1144 bin_ind = find_table_bin_ind_direct(tab, hash, key);
1145 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1146 }
1147}
1148
1149/* Insert (KEY, VALUE) into table TAB. The table should not have
1150 entry with KEY before the insertion. */
1151void
1152st_add_direct(st_table *tab, st_data_t key, st_data_t value)
1153{
1154 st_hash_t hash_value;
1155
1156 hash_value = do_hash(key, tab);
1157 st_add_direct_with_hash(tab, key, value, hash_value);
1158}
1159
1160/* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If
1161 there is already entry with KEY in the table, return nonzero and
1162 update the value of the found entry. */
1163int
1164st_insert2(st_table *tab, st_data_t key, st_data_t value,
1165 st_data_t (*func)(st_data_t))
1166{
1167 st_table_entry *entry;
1168 st_index_t bin;
1169 st_index_t ind;
1170 st_hash_t hash_value;
1171 st_index_t bin_ind;
1172 int new_p;
1173
1174 hash_value = do_hash(key, tab);
1175 retry:
1176 rebuild_table_if_necessary (tab);
1177 if (tab->bins == NULL) {
1178 bin = find_entry(tab, hash_value, key);
1179 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1180 goto retry;
1181 new_p = bin == UNDEFINED_ENTRY_IND;
1182 if (new_p)
1183 tab->num_entries++;
1184 bin_ind = UNDEFINED_BIN_IND;
1185 }
1186 else {
1187 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1188 key, &bin_ind);
1189 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1190 goto retry;
1191 new_p = bin == UNDEFINED_ENTRY_IND;
1192 bin -= ENTRY_BASE;
1193 }
1194 if (new_p) {
1195 key = (*func)(key);
1196 ind = tab->entries_bound++;
1197 entry = &tab->entries[ind];
1198 entry->hash = hash_value;
1199 entry->key = key;
1200 entry->record = value;
1201 if (bin_ind != UNDEFINED_BIN_IND)
1202 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1203 return 0;
1204 }
1205 tab->entries[bin].record = value;
1206 return 1;
1207}
1208
1209/* Create and return a copy of table OLD_TAB. */
1210st_table *
1211st_copy(st_table *old_tab)
1212{
1213 st_table *new_tab;
1214
1215 new_tab = (st_table *) malloc(sizeof(st_table));
1216#ifndef RUBY
1217 if (new_tab == NULL)
1218 return NULL;
1219#endif
1220 *new_tab = *old_tab;
1221 if (old_tab->bins == NULL)
1222 new_tab->bins = NULL;
1223 else {
1224 new_tab->bins = (st_index_t *) malloc(bins_size(old_tab));
1225#ifndef RUBY
1226 if (new_tab->bins == NULL) {
1227 free(new_tab);
1228 return NULL;
1229 }
1230#endif
1231 }
1232 new_tab->entries = (st_table_entry *) malloc(get_allocated_entries(old_tab)
1233 * sizeof(st_table_entry));
1234#ifndef RUBY
1235 if (new_tab->entries == NULL) {
1236 st_free_table(new_tab);
1237 return NULL;
1238 }
1239#endif
1240 MEMCPY(new_tab->entries, old_tab->entries, st_table_entry,
1241 get_allocated_entries(old_tab));
1242 if (old_tab->bins != NULL)
1243 MEMCPY(new_tab->bins, old_tab->bins, char, bins_size(old_tab));
1244 return new_tab;
1245}
1246
1247/* Update the entries start of table TAB after removing an entry
1248 with index N in the array entries. */
1249static inline void
1250update_range_for_deleted(st_table *tab, st_index_t n)
1251{
1252 /* Do not update entries_bound here. Otherwise, we can fill all
1253 bins by deleted entry value before rebuilding the table. */
1254 if (tab->entries_start == n) {
1255 st_index_t start = n + 1;
1256 st_index_t bound = tab->entries_bound;
1257 st_table_entry *entries = tab->entries;
1258 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
1259 tab->entries_start = start;
1260 }
1261}
1262
1263/* Delete entry with KEY from table TAB, set up *VALUE (unless
1264 VALUE is zero) from deleted table entry, and return non-zero. If
1265 there is no entry with KEY in the table, clear *VALUE (unless VALUE
1266 is zero), and return zero. */
1267static int
1268st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
1269{
1270 st_table_entry *entry;
1271 st_index_t bin;
1272 st_index_t bin_ind;
1273 st_hash_t hash;
1274
1275 hash = do_hash(*key, tab);
1276 retry:
1277 if (tab->bins == NULL) {
1278 bin = find_entry(tab, hash, *key);
1279 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1280 goto retry;
1281 if (bin == UNDEFINED_ENTRY_IND) {
1282 if (value != 0) *value = 0;
1283 return 0;
1284 }
1285 }
1286 else {
1287 bin_ind = find_table_bin_ind(tab, hash, *key);
1288 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1289 goto retry;
1290 if (bin_ind == UNDEFINED_BIN_IND) {
1291 if (value != 0) *value = 0;
1292 return 0;
1293 }
1294 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1295 MARK_BIN_DELETED(tab, bin_ind);
1296 }
1297 entry = &tab->entries[bin];
1298 *key = entry->key;
1299 if (value != 0) *value = entry->record;
1300 MARK_ENTRY_DELETED(entry);
1301 tab->num_entries--;
1302 update_range_for_deleted(tab, bin);
1303 return 1;
1304}
1305
1306int
1307st_delete(st_table *tab, st_data_t *key, st_data_t *value)
1308{
1309 return st_general_delete(tab, key, value);
1310}
1311
1312/* The function and other functions with suffix '_safe' or '_check'
1313 are originated from the previous implementation of the hash tables.
1314 It was necessary for correct deleting entries during traversing
1315 tables. The current implementation permits deletion during
1316 traversing without a specific way to do this. */
1317int
1318st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value,
1319 st_data_t never ATTRIBUTE_UNUSED)
1320{
1321 return st_general_delete(tab, key, value);
1322}
1323
1324/* If table TAB is empty, clear *VALUE (unless VALUE is zero), and
1325 return zero. Otherwise, remove the first entry in the table.
1326 Return its key through KEY and its record through VALUE (unless
1327 VALUE is zero). */
1328int
1329st_shift(st_table *tab, st_data_t *key, st_data_t *value)
1330{
1331 st_index_t i, bound;
1332 st_index_t bin;
1333 st_table_entry *entries, *curr_entry_ptr;
1334 st_index_t bin_ind;
1335
1336 entries = tab->entries;
1337 bound = tab->entries_bound;
1338 for (i = tab->entries_start; i < bound; i++) {
1339 curr_entry_ptr = &entries[i];
1340 if (! DELETED_ENTRY_P(curr_entry_ptr)) {
1341 st_hash_t entry_hash = curr_entry_ptr->hash;
1342 st_data_t entry_key = curr_entry_ptr->key;
1343
1344 if (value != 0) *value = curr_entry_ptr->record;
1345 *key = entry_key;
1346 retry:
1347 if (tab->bins == NULL) {
1348 bin = find_entry(tab, entry_hash, entry_key);
1349 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) {
1350 entries = tab->entries;
1351 goto retry;
1352 }
1353 curr_entry_ptr = &entries[bin];
1354 }
1355 else {
1356 bin_ind = find_table_bin_ind(tab, entry_hash, entry_key);
1357 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) {
1358 entries = tab->entries;
1359 goto retry;
1360 }
1361 curr_entry_ptr = &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
1362 - ENTRY_BASE];
1363 MARK_BIN_DELETED(tab, bin_ind);
1364 }
1365 MARK_ENTRY_DELETED(curr_entry_ptr);
1366 tab->num_entries--;
1367 update_range_for_deleted(tab, i);
1368 return 1;
1369 }
1370 }
1371 if (value != 0) *value = 0;
1372 return 0;
1373}
1374
1375/* See comments for function st_delete_safe. */
1376void
1377st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED,
1378 st_data_t never ATTRIBUTE_UNUSED)
1379{
1380}
1381
1382/* Find entry with KEY in table TAB, call FUNC with pointers to copies
1383 of the key and the value of the found entry, and non-zero as the
1384 3rd argument. If the entry is not found, call FUNC with a pointer
1385 to KEY, a pointer to zero, and a zero argument. If the call
1386 returns ST_CONTINUE, the table will have an entry with key and
1387 value returned by FUNC through the 1st and 2nd parameters. If the
1388 call of FUNC returns ST_DELETE, the table will not have entry with
1389 KEY. The function returns flag of that the entry with KEY was in
1390 the table before the call. */
1391int
1392st_update(st_table *tab, st_data_t key,
1393 st_update_callback_func *func, st_data_t arg)
1394{
1395 st_table_entry *entry = NULL; /* to avoid uninitialized value warning */
1396 st_index_t bin = 0; /* Ditto */
1397 st_table_entry *entries;
1398 st_index_t bin_ind;
1399 st_data_t value = 0, old_key;
1400 int retval, existing;
1401 st_hash_t hash = do_hash(key, tab);
1402
1403 retry:
1404 entries = tab->entries;
1405 if (tab->bins == NULL) {
1406 bin = find_entry(tab, hash, key);
1407 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1408 goto retry;
1409 existing = bin != UNDEFINED_ENTRY_IND;
1410 entry = &entries[bin];
1411 bin_ind = UNDEFINED_BIN_IND;
1412 }
1413 else {
1414 bin_ind = find_table_bin_ind(tab, hash, key);
1415 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1416 goto retry;
1417 existing = bin_ind != UNDEFINED_BIN_IND;
1418 if (existing) {
1419 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1420 entry = &entries[bin];
1421 }
1422 }
1423 if (existing) {
1424 key = entry->key;
1425 value = entry->record;
1426 }
1427 old_key = key;
1428 retval = (*func)(&key, &value, arg, existing);
1429 switch (retval) {
1430 case ST_CONTINUE:
1431 if (! existing) {
1432 st_add_direct_with_hash(tab, key, value, hash);
1433 break;
1434 }
1435 if (old_key != key) {
1436 entry->key = key;
1437 }
1438 entry->record = value;
1439 break;
1440 case ST_DELETE:
1441 if (existing) {
1442 if (bin_ind != UNDEFINED_BIN_IND)
1443 MARK_BIN_DELETED(tab, bin_ind);
1444 MARK_ENTRY_DELETED(entry);
1445 tab->num_entries--;
1446 update_range_for_deleted(tab, bin);
1447 }
1448 break;
1449 }
1450 return existing;
1451}
1452
1453/* Traverse all entries in table TAB calling FUNC with current entry
1454 key and value and zero. If the call returns ST_STOP, stop
1455 traversing. If the call returns ST_DELETE, delete the current
1456 entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
1457 traversing. The function returns zero unless an error is found.
1458 CHECK_P is flag of st_foreach_check call. The behavior is a bit
1459 different for ST_CHECK and when the current element is removed
1460 during traversing. */
1461static inline int
1462st_general_foreach(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg,
1463 int check_p)
1464{
1465 st_index_t bin;
1466 st_index_t bin_ind;
1467 st_table_entry *entries, *curr_entry_ptr;
1468 enum st_retval retval;
1469 st_index_t i, rebuilds_num;
1470 st_hash_t hash;
1471 st_data_t key;
1472 int error_p, packed_p = tab->bins == NULL;
1473
1474 entries = tab->entries;
1475 /* The bound can change inside the loop even without rebuilding
1476 the table, e.g. by an entry insertion. */
1477 for (i = tab->entries_start; i < tab->entries_bound; i++) {
1478 curr_entry_ptr = &entries[i];
1479 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
1480 continue;
1481 key = curr_entry_ptr->key;
1482 rebuilds_num = tab->rebuilds_num;
1483 hash = curr_entry_ptr->hash;
1484 retval = (*func)(key, curr_entry_ptr->record, arg, 0);
1485
1486 if (retval == ST_REPLACE && replace) {
1487 st_data_t value;
1488 value = curr_entry_ptr->record;
1489 retval = (*replace)(&key, &value, arg, TRUE);
1490 curr_entry_ptr->key = key;
1491 curr_entry_ptr->record = value;
1492 }
1493
1494 if (rebuilds_num != tab->rebuilds_num) {
1495 retry:
1496 entries = tab->entries;
1497 packed_p = tab->bins == NULL;
1498 if (packed_p) {
1499 i = find_entry(tab, hash, key);
1500 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1501 goto retry;
1502 error_p = i == UNDEFINED_ENTRY_IND;
1503 }
1504 else {
1505 i = find_table_entry_ind(tab, hash, key);
1506 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1507 goto retry;
1508 error_p = i == UNDEFINED_ENTRY_IND;
1509 i -= ENTRY_BASE;
1510 }
1511 if (error_p && check_p) {
1512 /* call func with error notice */
1513 retval = (*func)(0, 0, arg, 1);
1514 return 1;
1515 }
1516 curr_entry_ptr = &entries[i];
1517 }
1518 switch (retval) {
1519 case ST_REPLACE:
1520 break;
1521 case ST_CONTINUE:
1522 break;
1523 case ST_CHECK:
1524 if (check_p)
1525 break;
1526 case ST_STOP:
1527 return 0;
1528 case ST_DELETE: {
1529 st_data_t key = curr_entry_ptr->key;
1530
1531 again:
1532 if (packed_p) {
1533 bin = find_entry(tab, hash, key);
1534 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1535 goto again;
1536 if (bin == UNDEFINED_ENTRY_IND)
1537 break;
1538 }
1539 else {
1540 bin_ind = find_table_bin_ind(tab, hash, key);
1541 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1542 goto again;
1543 if (bin_ind == UNDEFINED_BIN_IND)
1544 break;
1545 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1546 MARK_BIN_DELETED(tab, bin_ind);
1547 }
1548 curr_entry_ptr = &entries[bin];
1549 MARK_ENTRY_DELETED(curr_entry_ptr);
1550 tab->num_entries--;
1551 update_range_for_deleted(tab, bin);
1552 break;
1553 }
1554 }
1555 }
1556 return 0;
1557}
1558
1559int
1560st_foreach_with_replace(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg)
1561{
1562 return st_general_foreach(tab, func, replace, arg, TRUE);
1563}
1564
1565struct functor {
1566 st_foreach_callback_func *func;
1567 st_data_t arg;
1568};
1569
1570static int
1571apply_functor(st_data_t k, st_data_t v, st_data_t d, int _)
1572{
1573 const struct functor *f = (void *)d;
1574 return f->func(k, v, f->arg);
1575}
1576
1577int
1578st_foreach(st_table *tab, st_foreach_callback_func *func, st_data_t arg)
1579{
1580 const struct functor f = { func, arg };
1581 return st_general_foreach(tab, apply_functor, 0, (st_data_t)&f, FALSE);
1582}
1583
1584/* See comments for function st_delete_safe. */
1585int
1586st_foreach_check(st_table *tab, st_foreach_check_callback_func *func, st_data_t arg,
1587 st_data_t never ATTRIBUTE_UNUSED)
1588{
1589 return st_general_foreach(tab, func, 0, arg, TRUE);
1590}
1591
1592/* Set up array KEYS by at most SIZE keys of head table TAB entries.
1593 Return the number of keys set up in array KEYS. */
1594static inline st_index_t
1595st_general_keys(st_table *tab, st_data_t *keys, st_index_t size)
1596{
1597 st_index_t i, bound;
1598 st_data_t key, *keys_start, *keys_end;
1599 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1600
1601 bound = tab->entries_bound;
1602 keys_start = keys;
1603 keys_end = keys + size;
1604 for (i = tab->entries_start; i < bound; i++) {
1605 if (keys == keys_end)
1606 break;
1607 curr_entry_ptr = &entries[i];
1608 key = curr_entry_ptr->key;
1609 if (! DELETED_ENTRY_P(curr_entry_ptr))
1610 *keys++ = key;
1611 }
1612
1613 return keys - keys_start;
1614}
1615
1616st_index_t
1617st_keys(st_table *tab, st_data_t *keys, st_index_t size)
1618{
1619 return st_general_keys(tab, keys, size);
1620}
1621
1622/* See comments for function st_delete_safe. */
1623st_index_t
1624st_keys_check(st_table *tab, st_data_t *keys, st_index_t size,
1625 st_data_t never ATTRIBUTE_UNUSED)
1626{
1627 return st_general_keys(tab, keys, size);
1628}
1629
1630/* Set up array VALUES by at most SIZE values of head table TAB
1631 entries. Return the number of values set up in array VALUES. */
1632static inline st_index_t
1633st_general_values(st_table *tab, st_data_t *values, st_index_t size)
1634{
1635 st_index_t i, bound;
1636 st_data_t *values_start, *values_end;
1637 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1638
1639 values_start = values;
1640 values_end = values + size;
1641 bound = tab->entries_bound;
1642 for (i = tab->entries_start; i < bound; i++) {
1643 if (values == values_end)
1644 break;
1645 curr_entry_ptr = &entries[i];
1646 if (! DELETED_ENTRY_P(curr_entry_ptr))
1647 *values++ = curr_entry_ptr->record;
1648 }
1649
1650 return values - values_start;
1651}
1652
1653st_index_t
1654st_values(st_table *tab, st_data_t *values, st_index_t size)
1655{
1656 return st_general_values(tab, values, size);
1657}
1658
1659/* See comments for function st_delete_safe. */
1660st_index_t
1661st_values_check(st_table *tab, st_data_t *values, st_index_t size,
1662 st_data_t never ATTRIBUTE_UNUSED)
1663{
1664 return st_general_values(tab, values, size);
1665}
1666
1667#define FNV1_32A_INIT 0x811c9dc5
1668
1669/*
1670 * 32 bit magic FNV-1a prime
1671 */
1672#define FNV_32_PRIME 0x01000193
1673
1674/* __POWERPC__ added to accommodate Darwin case. */
1675#ifndef UNALIGNED_WORD_ACCESS
1676# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
1677 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
1678 defined(__powerpc64__) || defined(__POWERPC__) || defined(__aarch64__) || \
1679 defined(__mc68020__)
1680# define UNALIGNED_WORD_ACCESS 1
1681# endif
1682#endif
1683#ifndef UNALIGNED_WORD_ACCESS
1684# define UNALIGNED_WORD_ACCESS 0
1685#endif
1686
1687/* This hash function is quite simplified MurmurHash3
1688 * Simplification is legal, cause most of magic still happens in finalizator.
1689 * And finalizator is almost the same as in MurmurHash3 */
1690#define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
1691#define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
1692
1693#if ST_INDEX_BITS <= 32
1694#define C1 (st_index_t)0xcc9e2d51
1695#define C2 (st_index_t)0x1b873593
1696#else
1697#define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
1698#define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
1699#endif
1700NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_step(st_index_t h, st_index_t k));
1701NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_finish(st_index_t h));
1702NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash(const void *ptr, size_t len, st_index_t h));
1703
1704static inline st_index_t
1705murmur_step(st_index_t h, st_index_t k)
1706{
1707#if ST_INDEX_BITS <= 32
1708#define r1 (17)
1709#define r2 (11)
1710#else
1711#define r1 (33)
1712#define r2 (24)
1713#endif
1714 k *= C1;
1715 h ^= ROTL(k, r1);
1716 h *= C2;
1717 h = ROTL(h, r2);
1718 return h;
1719}
1720#undef r1
1721#undef r2
1722
1723static inline st_index_t
1724murmur_finish(st_index_t h)
1725{
1726#if ST_INDEX_BITS <= 32
1727#define r1 (16)
1728#define r2 (13)
1729#define r3 (16)
1730 const st_index_t c1 = 0x85ebca6b;
1731 const st_index_t c2 = 0xc2b2ae35;
1732#else
1733/* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */
1734#define r1 (30)
1735#define r2 (27)
1736#define r3 (31)
1737 const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
1738 const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
1739#endif
1740#if ST_INDEX_BITS > 64
1741 h ^= h >> 64;
1742 h *= c2;
1743 h ^= h >> 65;
1744#endif
1745 h ^= h >> r1;
1746 h *= c1;
1747 h ^= h >> r2;
1748 h *= c2;
1749 h ^= h >> r3;
1750 return h;
1751}
1752#undef r1
1753#undef r2
1754#undef r3
1755
1756st_index_t
1757st_hash(const void *ptr, size_t len, st_index_t h)
1758{
1759 const char *data = ptr;
1760 st_index_t t = 0;
1761 size_t l = len;
1762
1763#define data_at(n) (st_index_t)((unsigned char)data[(n)])
1764#define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
1765#if SIZEOF_ST_INDEX_T > 4
1766#define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
1767#if SIZEOF_ST_INDEX_T > 8
1768#define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
1769 UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
1770#define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
1771#endif
1772#define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
1773#else
1774#define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
1775#endif
1776#undef SKIP_TAIL
1777 if (len >= sizeof(st_index_t)) {
1778#if !UNALIGNED_WORD_ACCESS
1779 int align = (int)((st_data_t)data % sizeof(st_index_t));
1780 if (align) {
1781 st_index_t d = 0;
1782 int sl, sr, pack;
1783
1784 switch (align) {
1785#ifdef WORDS_BIGENDIAN
1786# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1787 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
1788#else
1789# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1790 t |= data_at(n) << CHAR_BIT*(n)
1791#endif
1792 UNALIGNED_ADD_ALL;
1793#undef UNALIGNED_ADD
1794 }
1795
1796#ifdef WORDS_BIGENDIAN
1797 t >>= (CHAR_BIT * align) - CHAR_BIT;
1798#else
1799 t <<= (CHAR_BIT * align);
1800#endif
1801
1802 data += sizeof(st_index_t)-align;
1803 len -= sizeof(st_index_t)-align;
1804
1805 sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
1806 sr = CHAR_BIT * align;
1807
1808 while (len >= sizeof(st_index_t)) {
1809 d = *(st_index_t *)data;
1810#ifdef WORDS_BIGENDIAN
1811 t = (t << sr) | (d >> sl);
1812#else
1813 t = (t >> sr) | (d << sl);
1814#endif
1815 h = murmur_step(h, t);
1816 t = d;
1817 data += sizeof(st_index_t);
1818 len -= sizeof(st_index_t);
1819 }
1820
1821 pack = len < (size_t)align ? (int)len : align;
1822 d = 0;
1823 switch (pack) {
1824#ifdef WORDS_BIGENDIAN
1825# define UNALIGNED_ADD(n) case (n) + 1: \
1826 d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1827#else
1828# define UNALIGNED_ADD(n) case (n) + 1: \
1829 d |= data_at(n) << CHAR_BIT*(n)
1830#endif
1831 UNALIGNED_ADD_ALL;
1832#undef UNALIGNED_ADD
1833 }
1834#ifdef WORDS_BIGENDIAN
1835 t = (t << sr) | (d >> sl);
1836#else
1837 t = (t >> sr) | (d << sl);
1838#endif
1839
1840 if (len < (size_t)align) goto skip_tail;
1841# define SKIP_TAIL 1
1842 h = murmur_step(h, t);
1843 data += pack;
1844 len -= pack;
1845 }
1846 else
1847#endif
1848#ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
1849#define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t))
1850#else
1851#define aligned_data data
1852#endif
1853 {
1854 do {
1855 h = murmur_step(h, *(st_index_t *)aligned_data);
1856 data += sizeof(st_index_t);
1857 len -= sizeof(st_index_t);
1858 } while (len >= sizeof(st_index_t));
1859 }
1860 }
1861
1862 t = 0;
1863 switch (len) {
1864#if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
1865 /* in this case byteorder doesn't really matter */
1866#if SIZEOF_ST_INDEX_T > 4
1867 case 7: t |= data_at(6) << 48;
1868 case 6: t |= data_at(5) << 40;
1869 case 5: t |= data_at(4) << 32;
1870 case 4:
1871 t |= (st_index_t)*(uint32_t*)aligned_data;
1872 goto skip_tail;
1873# define SKIP_TAIL 1
1874#endif
1875 case 3: t |= data_at(2) << 16;
1876 case 2: t |= data_at(1) << 8;
1877 case 1: t |= data_at(0);
1878#else
1879#ifdef WORDS_BIGENDIAN
1880# define UNALIGNED_ADD(n) case (n) + 1: \
1881 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1882#else
1883# define UNALIGNED_ADD(n) case (n) + 1: \
1884 t |= data_at(n) << CHAR_BIT*(n)
1885#endif
1886 UNALIGNED_ADD_ALL;
1887#undef UNALIGNED_ADD
1888#endif
1889#ifdef SKIP_TAIL
1890 skip_tail:
1891#endif
1892 h ^= t; h -= ROTL(t, 7);
1893 h *= C2;
1894 }
1895 h ^= l;
1896#undef aligned_data
1897
1898 return murmur_finish(h);
1899}
1900
1901st_index_t
1902st_hash_uint32(st_index_t h, uint32_t i)
1903{
1904 return murmur_step(h, i);
1905}
1906
1907NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash_uint(st_index_t h, st_index_t i));
1908st_index_t
1909st_hash_uint(st_index_t h, st_index_t i)
1910{
1911 i += h;
1912/* no matter if it is BigEndian or LittleEndian,
1913 * we hash just integers */
1914#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
1915 h = murmur_step(h, i >> 8*8);
1916#endif
1917 h = murmur_step(h, i);
1918 return h;
1919}
1920
1921st_index_t
1922st_hash_end(st_index_t h)
1923{
1924 h = murmur_finish(h);
1925 return h;
1926}
1927
1928#undef st_hash_start
1929st_index_t
1930rb_st_hash_start(st_index_t h)
1931{
1932 return h;
1933}
1934
1935static st_index_t
1936strhash(st_data_t arg)
1937{
1938 register const char *string = (const char *)arg;
1939 return st_hash(string, strlen(string), FNV1_32A_INIT);
1940}
1941
1942int
1943st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
1944{
1945 char c1, c2;
1946
1947 while (1) {
1948 c1 = *s1++;
1949 c2 = *s2++;
1950 if (c1 == '\0' || c2 == '\0') {
1951 if (c1 != '\0') return 1;
1952 if (c2 != '\0') return -1;
1953 return 0;
1954 }
1955 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
1956 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
1957 if (c1 != c2) {
1958 if (c1 > c2)
1959 return 1;
1960 else
1961 return -1;
1962 }
1963 }
1964}
1965
1966int
1967st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
1968{
1969 char c1, c2;
1970 size_t i;
1971
1972 for (i = 0; i < n; i++) {
1973 c1 = *s1++;
1974 c2 = *s2++;
1975 if (c1 == '\0' || c2 == '\0') {
1976 if (c1 != '\0') return 1;
1977 if (c2 != '\0') return -1;
1978 return 0;
1979 }
1980 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
1981 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
1982 if (c1 != c2) {
1983 if (c1 > c2)
1984 return 1;
1985 else
1986 return -1;
1987 }
1988 }
1989 return 0;
1990}
1991
1992static int
1993st_strcmp(st_data_t lhs, st_data_t rhs)
1994{
1995 const char *s1 = (char *)lhs;
1996 const char *s2 = (char *)rhs;
1997 return strcmp(s1, s2);
1998}
1999
2000static int
2001st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs)
2002{
2003 const char *s1 = (char *)lhs;
2004 const char *s2 = (char *)rhs;
2005 return st_locale_insensitive_strcasecmp(s1, s2);
2006}
2007
2008NO_SANITIZE("unsigned-integer-overflow", PUREFUNC(static st_index_t strcasehash(st_data_t)));
2009static st_index_t
2010strcasehash(st_data_t arg)
2011{
2012 register const char *string = (const char *)arg;
2013 register st_index_t hval = FNV1_32A_INIT;
2014
2015 /*
2016 * FNV-1a hash each octet in the buffer
2017 */
2018 while (*string) {
2019 unsigned int c = (unsigned char)*string++;
2020 if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
2021 hval ^= c;
2022
2023 /* multiply by the 32 bit FNV magic prime mod 2^32 */
2024 hval *= FNV_32_PRIME;
2025 }
2026 return hval;
2027}
2028
2029int
2030st_numcmp(st_data_t x, st_data_t y)
2031{
2032 return x != y;
2033}
2034
2035st_index_t
2036st_numhash(st_data_t n)
2037{
2038 enum {s1 = 11, s2 = 3};
2039 return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
2040}
2041
2042/* Expand TAB to be suitable for holding SIZ entries in total.
2043 Pre-existing entries remain not deleted inside of TAB, but its bins
2044 are cleared to expect future reconstruction. See rehash below. */
2045static void
2046st_expand_table(st_table *tab, st_index_t siz)
2047{
2048 st_table *tmp;
2049 st_index_t n;
2050
2051 if (siz <= get_allocated_entries(tab))
2052 return; /* enough room already */
2053
2054 tmp = st_init_table_with_size(tab->type, siz);
2055 n = get_allocated_entries(tab);
2056 MEMCPY(tmp->entries, tab->entries, st_table_entry, n);
2057 free(tab->entries);
2058 if (tab->bins != NULL)
2059 free(tab->bins);
2060 if (tmp->bins != NULL)
2061 free(tmp->bins);
2062 tab->entry_power = tmp->entry_power;
2063 tab->bin_power = tmp->bin_power;
2064 tab->size_ind = tmp->size_ind;
2065 tab->entries = tmp->entries;
2066 tab->bins = NULL;
2067 tab->rebuilds_num++;
2068 free(tmp);
2069}
2070
2071/* Rehash using linear search. Return TRUE if we found that the table
2072 was rebuilt. */
2073static int
2074st_rehash_linear(st_table *tab)
2075{
2076 int eq_p, rebuilt_p;
2077 st_index_t i, j;
2078 st_table_entry *p, *q;
2079 if (tab->bins) {
2080 free(tab->bins);
2081 tab->bins = NULL;
2082 }
2083 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2084 p = &tab->entries[i];
2085 if (DELETED_ENTRY_P(p))
2086 continue;
2087 for (j = i + 1; j < tab->entries_bound; j++) {
2088 q = &tab->entries[j];
2089 if (DELETED_ENTRY_P(q))
2090 continue;
2091 DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p);
2092 if (EXPECT(rebuilt_p, 0))
2093 return TRUE;
2094 if (eq_p) {
2095 *p = *q;
2096 MARK_ENTRY_DELETED(q);
2097 tab->num_entries--;
2098 update_range_for_deleted(tab, j);
2099 }
2100 }
2101 }
2102 return FALSE;
2103}
2104
2105/* Rehash using index. Return TRUE if we found that the table was
2106 rebuilt. */
2107static int
2108st_rehash_indexed(st_table *tab)
2109{
2110 int eq_p, rebuilt_p;
2111 st_index_t i;
2112 st_index_t const n = bins_size(tab);
2113 unsigned int const size_ind = get_size_ind(tab);
2114 st_index_t *bins = realloc(tab->bins, n);
2115 tab->bins = bins;
2116 initialize_bins(tab);
2117 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2118 st_table_entry *p = &tab->entries[i];
2119 st_index_t ind;
2120#ifdef QUADRATIC_PROBE
2121 st_index_t d = 1;
2122#else
2123 st_index_t peterb = p->hash;
2124#endif
2125
2126 if (DELETED_ENTRY_P(p))
2127 continue;
2128
2129 ind = hash_bin(p->hash, tab);
2130 for (;;) {
2131 st_index_t bin = get_bin(bins, size_ind, ind);
2132 if (EMPTY_OR_DELETED_BIN_P(bin)) {
2133 /* ok, new room */
2134 set_bin(bins, size_ind, ind, i + ENTRY_BASE);
2135 break;
2136 }
2137 else {
2138 st_table_entry *q = &tab->entries[bin - ENTRY_BASE];
2139 DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p);
2140 if (EXPECT(rebuilt_p, 0))
2141 return TRUE;
2142 if (eq_p) {
2143 /* duplicated key; delete it */
2144 q->record = p->record;
2145 MARK_ENTRY_DELETED(p);
2146 tab->num_entries--;
2147 update_range_for_deleted(tab, bin);
2148 break;
2149 }
2150 else {
2151 /* hash collision; skip it */
2152#ifdef QUADRATIC_PROBE
2153 ind = hash_bin(ind + d, tab);
2154 d++;
2155#else
2156 ind = secondary_hash(ind, tab, &peterb);
2157#endif
2158 }
2159 }
2160 }
2161 }
2162 return FALSE;
2163}
2164
2165/* Reconstruct TAB's bins according to TAB's entries. This function
2166 permits conflicting keys inside of entries. No errors are reported
2167 then. All but one of them are discarded silently. */
2168static void
2169st_rehash(st_table *tab)
2170{
2171 int rebuilt_p;
2172
2173 do {
2174 if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2175 rebuilt_p = st_rehash_linear(tab);
2176 else
2177 rebuilt_p = st_rehash_indexed(tab);
2178 } while (rebuilt_p);
2179}
2180
2181#ifdef RUBY
2182static st_data_t
2183st_stringify(VALUE key)
2184{
2185 return (rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) ?
2186 rb_hash_key_str(key) : key;
2187}
2188
2189static void
2190st_insert_single(st_table *tab, VALUE hash, VALUE key, VALUE val)
2191{
2192 st_data_t k = st_stringify(key);
2194 e.hash = do_hash(k, tab);
2195 e.key = k;
2196 e.record = val;
2197
2198 tab->entries[tab->entries_bound++] = e;
2199 tab->num_entries++;
2200 RB_OBJ_WRITTEN(hash, Qundef, k);
2201 RB_OBJ_WRITTEN(hash, Qundef, val);
2202}
2203
2204static void
2205st_insert_linear(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2206{
2207 long i;
2208
2209 for (i = 0; i < argc; /* */) {
2210 st_data_t k = st_stringify(argv[i++]);
2211 st_data_t v = argv[i++];
2212 st_insert(tab, k, v);
2213 RB_OBJ_WRITTEN(hash, Qundef, k);
2214 RB_OBJ_WRITTEN(hash, Qundef, v);
2215 }
2216}
2217
2218static void
2219st_insert_generic(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2220{
2221 long i;
2222
2223 /* push elems */
2224 for (i = 0; i < argc; /* */) {
2225 VALUE key = argv[i++];
2226 VALUE val = argv[i++];
2227 st_insert_single(tab, hash, key, val);
2228 }
2229
2230 /* reindex */
2231 st_rehash(tab);
2232}
2233
2234/* Mimics ruby's { foo => bar } syntax. This function is subpart
2235 of rb_hash_bulk_insert. */
2236void
2237rb_hash_bulk_insert_into_st_table(long argc, const VALUE *argv, VALUE hash)
2238{
2239 st_index_t n, size = argc / 2;
2240 st_table *tab = RHASH_ST_TABLE(hash);
2241
2242 tab = RHASH_TBL_RAW(hash);
2243 n = tab->entries_bound + size;
2244 st_expand_table(tab, n);
2245 if (UNLIKELY(tab->num_entries))
2246 st_insert_generic(tab, argc, argv, hash);
2247 else if (argc <= 2)
2248 st_insert_single(tab, hash, argv[0], argv[1]);
2249 else if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2250 st_insert_linear(tab, argc, argv, hash);
2251 else
2252 st_insert_generic(tab, argc, argv, hash);
2253}
2254
2255// to iterate iv_index_tbl
2256st_data_t
2257rb_st_nth_key(st_table *tab, st_index_t index)
2258{
2259 if (LIKELY(tab->entries_start == 0 &&
2260 tab->num_entries == tab->entries_bound &&
2261 index < tab->num_entries)) {
2262 return tab->entries[index].key;
2263 }
2264 else {
2265 rb_bug("unreachable");
2266 }
2267}
2268
2269#endif
#define Qundef
Old name of RUBY_Qundef.
void rb_raise(VALUE exc, const char *fmt,...)
Exception entry point.
Definition error.c:3148
void rb_bug(const char *fmt,...)
Interpreter panic switch.
Definition error.c:794
VALUE rb_eRuntimeError
RuntimeError exception.
Definition error.c:1089
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
Definition object.c:190
VALUE rb_cString
String class.
Definition string.c:79
#define RB_OBJ_WRITTEN(old, oldv, young)
Identical to RB_OBJ_WRITE(), except it doesn't write any values, but only a WB declaration.
Definition rgengc.h:232
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
Definition memory.h:366
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define _(args)
This was a transition path from K&R to ANSI.
Definition stdarg.h:35
Definition st.c:133
Definition st.h:79
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40