[331] | 1 | /* This is a public domain general purpose hash table package
|
---|
| 2 | originally written by Peter Moore @ UCB.
|
---|
[279] | 3 |
|
---|
[331] | 4 | The hash table data structures were redesigned and the package was
|
---|
| 5 | rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
|
---|
[279] | 6 |
|
---|
[331] | 7 | /* The original package implemented classic bucket-based hash tables
|
---|
| 8 | with entries doubly linked for an access by their insertion order.
|
---|
| 9 | To decrease pointer chasing and as a consequence to improve a data
|
---|
| 10 | locality the current implementation is based on storing entries in
|
---|
| 11 | an array and using hash tables with open addressing. The current
|
---|
| 12 | entries are more compact in comparison with the original ones and
|
---|
| 13 | this also improves the data locality.
|
---|
| 14 |
|
---|
| 15 | The hash table has two arrays called *bins* and *entries*.
|
---|
| 16 |
|
---|
| 17 | bins:
|
---|
| 18 | -------
|
---|
| 19 | | | entries array:
|
---|
| 20 | |-------| --------------------------------
|
---|
| 21 | | index | | | entry: | | |
|
---|
| 22 | |-------| | | | | |
|
---|
| 23 | | ... | | ... | hash | ... | ... |
|
---|
| 24 | |-------| | | key | | |
|
---|
| 25 | | empty | | | record | | |
|
---|
| 26 | |-------| --------------------------------
|
---|
| 27 | | ... | ^ ^
|
---|
| 28 | |-------| |_ entries start |_ entries bound
|
---|
| 29 | |deleted|
|
---|
| 30 | -------
|
---|
| 31 |
|
---|
| 32 | o The entry array contains table entries in the same order as they
|
---|
| 33 | were inserted.
|
---|
| 34 |
|
---|
| 35 | When the first entry is deleted, a variable containing index of
|
---|
| 36 | the current first entry (*entries start*) is changed. In all
|
---|
| 37 | other cases of the deletion, we just mark the entry as deleted by
|
---|
| 38 | using a reserved hash value.
|
---|
| 39 |
|
---|
| 40 | Such organization of the entry storage makes operations of the
|
---|
| 41 | table shift and the entries traversal very fast.
|
---|
| 42 |
|
---|
| 43 | o The bins provide access to the entries by their keys. The
|
---|
| 44 | key hash is mapped to a bin containing *index* of the
|
---|
| 45 | corresponding entry in the entry array.
|
---|
| 46 |
|
---|
| 47 | The bin array size is always power of two, it makes mapping very
|
---|
| 48 | fast by using the corresponding lower bits of the hash.
|
---|
| 49 | Generally it is not a good idea to ignore some part of the hash.
|
---|
| 50 | But alternative approach is worse. For example, we could use a
|
---|
| 51 | modulo operation for mapping and a prime number for the size of
|
---|
| 52 | the bin array. Unfortunately, the modulo operation for big
|
---|
| 53 | 64-bit numbers are extremely slow (it takes more than 100 cycles
|
---|
| 54 | on modern Intel CPUs).
|
---|
| 55 |
|
---|
| 56 | Still other bits of the hash value are used when the mapping
|
---|
| 57 | results in a collision. In this case we use a secondary hash
|
---|
| 58 | value which is a result of a function of the collision bin
|
---|
| 59 | index and the original hash value. The function choice
|
---|
| 60 | guarantees that we can traverse all bins and finally find the
|
---|
| 61 | corresponding bin as after several iterations the function
|
---|
| 62 | becomes a full cycle linear congruential generator because it
|
---|
| 63 | satisfies requirements of the Hull-Dobell theorem.
|
---|
| 64 |
|
---|
| 65 | When an entry is removed from the table besides marking the
|
---|
| 66 | hash in the corresponding entry described above, we also mark
|
---|
| 67 | the bin by a special value in order to find entries which had
|
---|
| 68 | a collision with the removed entries.
|
---|
| 69 |
|
---|
| 70 | There are two reserved values for the bins. One denotes an
|
---|
| 71 | empty bin, another one denotes a bin for a deleted entry.
|
---|
| 72 |
|
---|
| 73 | o The length of the bin array is at least two times more than the
|
---|
| 74 | entry array length. This keeps the table load factor healthy.
|
---|
| 75 | The trigger of rebuilding the table is always a case when we can
|
---|
| 76 | not insert an entry anymore at the entries bound. We could
|
---|
| 77 | change the entries bound too in case of deletion but than we need
|
---|
| 78 | a special code to count bins with corresponding deleted entries
|
---|
| 79 | and reset the bin values when there are too many bins
|
---|
| 80 | corresponding deleted entries
|
---|
| 81 |
|
---|
| 82 | Table rebuilding is done by creation of a new entry array and
|
---|
| 83 | bins of an appropriate size. We also try to reuse the arrays
|
---|
| 84 | in some cases by compacting the array and removing deleted
|
---|
| 85 | entries.
|
---|
| 86 |
|
---|
| 87 | o To save memory very small tables have no allocated arrays
|
---|
| 88 | bins. We use a linear search for an access by a key.
|
---|
| 89 |
|
---|
| 90 | o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
|
---|
| 91 | bins depending on the current hash table size.
|
---|
| 92 |
|
---|
| 93 | This implementation speeds up the Ruby hash table benchmarks in
|
---|
| 94 | average by more 40% on Intel Haswell CPU.
|
---|
| 95 |
|
---|
| 96 | */
|
---|
| 97 |
|
---|
| 98 | #ifdef RUBY
|
---|
| 99 | #include "internal.h"
|
---|
| 100 | #else
|
---|
| 101 | #include "regint.h"
|
---|
| 102 | #include "st.h"
|
---|
| 103 | #endif
|
---|
| 104 |
|
---|
[279] | 105 | #include <stdio.h>
|
---|
| 106 | #include <stdlib.h>
|
---|
| 107 | #include <string.h>
|
---|
[331] | 108 | #include <assert.h>
|
---|
[279] | 109 |
|
---|
[331] | 110 | #ifdef __GNUC__
|
---|
| 111 | #define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
|
---|
| 112 | #define EXPECT(expr, val) __builtin_expect(expr, val)
|
---|
| 113 | #define ATTRIBUTE_UNUSED __attribute__((unused))
|
---|
| 114 | #else
|
---|
| 115 | #define PREFETCH(addr, write_p)
|
---|
| 116 | #define EXPECT(expr, val) (expr)
|
---|
| 117 | #define ATTRIBUTE_UNUSED
|
---|
[279] | 118 | #endif
|
---|
| 119 |
|
---|
[331] | 120 | #ifdef ST_DEBUG
|
---|
| 121 | #define st_assert(cond) assert(cond)
|
---|
| 122 | #else
|
---|
| 123 | #define st_assert(cond) ((void)(0 && (cond)))
|
---|
| 124 | #endif
|
---|
[279] | 125 |
|
---|
[331] | 126 | /* The type of hashes. */
|
---|
| 127 | typedef st_index_t st_hash_t;
|
---|
[279] | 128 |
|
---|
| 129 | struct st_table_entry {
|
---|
[331] | 130 | st_hash_t hash;
|
---|
[279] | 131 | st_data_t key;
|
---|
| 132 | st_data_t record;
|
---|
| 133 | };
|
---|
| 134 |
|
---|
[331] | 135 | #ifdef RUBY
|
---|
| 136 | #define type_numhash st_hashtype_num
|
---|
| 137 | const struct st_hash_type st_hashtype_num = {
|
---|
| 138 | st_numcmp,
|
---|
| 139 | st_numhash,
|
---|
[279] | 140 | };
|
---|
| 141 |
|
---|
| 142 | /* extern int strcmp(const char *, const char *); */
|
---|
[331] | 143 | static st_index_t strhash(st_data_t);
|
---|
| 144 | static const struct st_hash_type type_strhash = {
|
---|
[279] | 145 | strcmp,
|
---|
| 146 | strhash,
|
---|
| 147 | };
|
---|
| 148 |
|
---|
[331] | 149 | static st_index_t strcasehash(st_data_t);
|
---|
| 150 | static const struct st_hash_type type_strcasehash = {
|
---|
| 151 | st_locale_insensitive_strcasecmp,
|
---|
| 152 | strcasehash,
|
---|
| 153 | };
|
---|
| 154 | #endif /* RUBY */
|
---|
[279] | 155 |
|
---|
[331] | 156 | /* Value used to catch uninitialized entries/bins during debugging.
|
---|
| 157 | There is a possibility for a false alarm, but its probability is
|
---|
| 158 | extremely small. */
|
---|
| 159 | #define ST_INIT_VAL 0xafafafafafafafaf
|
---|
| 160 | #define ST_INIT_VAL_BYTE 0xafa
|
---|
[279] | 161 |
|
---|
[331] | 162 | #ifdef RUBY
|
---|
| 163 | #undef malloc
|
---|
| 164 | #undef realloc
|
---|
| 165 | #undef calloc
|
---|
| 166 | #undef free
|
---|
| 167 | #define malloc ruby_xmalloc
|
---|
| 168 | #define calloc ruby_xcalloc
|
---|
| 169 | #define realloc ruby_xrealloc
|
---|
| 170 | #define free ruby_xfree
|
---|
| 171 | #else /* RUBY */
|
---|
| 172 | #define MEMCPY(p1,p2,type,n) memcpy((p1), (p2), sizeof(type)*(n))
|
---|
| 173 | #endif /* RUBY */
|
---|
[279] | 174 |
|
---|
[331] | 175 | #define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
|
---|
| 176 | #define PTR_EQUAL(tab, ptr, hash_val, key_) \
|
---|
| 177 | ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
|
---|
[279] | 178 |
|
---|
[331] | 179 | /* Features of a table. */
|
---|
| 180 | struct st_features {
|
---|
| 181 | /* Power of 2 used for number of allocated entries. */
|
---|
| 182 | unsigned char entry_power;
|
---|
| 183 | /* Power of 2 used for number of allocated bins. Depending on the
|
---|
| 184 | table size, the number of bins is 2-4 times more than the
|
---|
| 185 | number of entries. */
|
---|
| 186 | unsigned char bin_power;
|
---|
| 187 | /* Enumeration of sizes of bins (8-bit, 16-bit etc). */
|
---|
| 188 | unsigned char size_ind;
|
---|
| 189 | /* Bins are packed in words of type st_index_t. The following is
|
---|
| 190 | a size of bins counted by words. */
|
---|
| 191 | st_index_t bins_words;
|
---|
| 192 | };
|
---|
[279] | 193 |
|
---|
[331] | 194 | /* Features of all possible size tables. */
|
---|
| 195 | #if SIZEOF_ST_INDEX_T == 8
|
---|
| 196 | #define MAX_POWER2 62
|
---|
| 197 | static const struct st_features features[] = {
|
---|
| 198 | {0, 1, 0, 0x0},
|
---|
| 199 | {1, 2, 0, 0x1},
|
---|
| 200 | {2, 3, 0, 0x1},
|
---|
| 201 | {3, 4, 0, 0x2},
|
---|
| 202 | {4, 5, 0, 0x4},
|
---|
| 203 | {5, 6, 0, 0x8},
|
---|
| 204 | {6, 7, 0, 0x10},
|
---|
| 205 | {7, 8, 0, 0x20},
|
---|
| 206 | {8, 9, 1, 0x80},
|
---|
| 207 | {9, 10, 1, 0x100},
|
---|
| 208 | {10, 11, 1, 0x200},
|
---|
| 209 | {11, 12, 1, 0x400},
|
---|
| 210 | {12, 13, 1, 0x800},
|
---|
| 211 | {13, 14, 1, 0x1000},
|
---|
| 212 | {14, 15, 1, 0x2000},
|
---|
| 213 | {15, 16, 1, 0x4000},
|
---|
| 214 | {16, 17, 2, 0x10000},
|
---|
| 215 | {17, 18, 2, 0x20000},
|
---|
| 216 | {18, 19, 2, 0x40000},
|
---|
| 217 | {19, 20, 2, 0x80000},
|
---|
| 218 | {20, 21, 2, 0x100000},
|
---|
| 219 | {21, 22, 2, 0x200000},
|
---|
| 220 | {22, 23, 2, 0x400000},
|
---|
| 221 | {23, 24, 2, 0x800000},
|
---|
| 222 | {24, 25, 2, 0x1000000},
|
---|
| 223 | {25, 26, 2, 0x2000000},
|
---|
| 224 | {26, 27, 2, 0x4000000},
|
---|
| 225 | {27, 28, 2, 0x8000000},
|
---|
| 226 | {28, 29, 2, 0x10000000},
|
---|
| 227 | {29, 30, 2, 0x20000000},
|
---|
| 228 | {30, 31, 2, 0x40000000},
|
---|
| 229 | {31, 32, 2, 0x80000000},
|
---|
| 230 | {32, 33, 3, 0x200000000},
|
---|
| 231 | {33, 34, 3, 0x400000000},
|
---|
| 232 | {34, 35, 3, 0x800000000},
|
---|
| 233 | {35, 36, 3, 0x1000000000},
|
---|
| 234 | {36, 37, 3, 0x2000000000},
|
---|
| 235 | {37, 38, 3, 0x4000000000},
|
---|
| 236 | {38, 39, 3, 0x8000000000},
|
---|
| 237 | {39, 40, 3, 0x10000000000},
|
---|
| 238 | {40, 41, 3, 0x20000000000},
|
---|
| 239 | {41, 42, 3, 0x40000000000},
|
---|
| 240 | {42, 43, 3, 0x80000000000},
|
---|
| 241 | {43, 44, 3, 0x100000000000},
|
---|
| 242 | {44, 45, 3, 0x200000000000},
|
---|
| 243 | {45, 46, 3, 0x400000000000},
|
---|
| 244 | {46, 47, 3, 0x800000000000},
|
---|
| 245 | {47, 48, 3, 0x1000000000000},
|
---|
| 246 | {48, 49, 3, 0x2000000000000},
|
---|
| 247 | {49, 50, 3, 0x4000000000000},
|
---|
| 248 | {50, 51, 3, 0x8000000000000},
|
---|
| 249 | {51, 52, 3, 0x10000000000000},
|
---|
| 250 | {52, 53, 3, 0x20000000000000},
|
---|
| 251 | {53, 54, 3, 0x40000000000000},
|
---|
| 252 | {54, 55, 3, 0x80000000000000},
|
---|
| 253 | {55, 56, 3, 0x100000000000000},
|
---|
| 254 | {56, 57, 3, 0x200000000000000},
|
---|
| 255 | {57, 58, 3, 0x400000000000000},
|
---|
| 256 | {58, 59, 3, 0x800000000000000},
|
---|
| 257 | {59, 60, 3, 0x1000000000000000},
|
---|
| 258 | {60, 61, 3, 0x2000000000000000},
|
---|
| 259 | {61, 62, 3, 0x4000000000000000},
|
---|
| 260 | {62, 63, 3, 0x8000000000000000},
|
---|
| 261 | };
|
---|
[279] | 262 |
|
---|
[331] | 263 | #else
|
---|
| 264 | #define MAX_POWER2 30
|
---|
| 265 |
|
---|
| 266 | static const struct st_features features[] = {
|
---|
| 267 | {0, 1, 0, 0x1},
|
---|
| 268 | {1, 2, 0, 0x1},
|
---|
| 269 | {2, 3, 0, 0x2},
|
---|
| 270 | {3, 4, 0, 0x4},
|
---|
| 271 | {4, 5, 0, 0x8},
|
---|
| 272 | {5, 6, 0, 0x10},
|
---|
| 273 | {6, 7, 0, 0x20},
|
---|
| 274 | {7, 8, 0, 0x40},
|
---|
| 275 | {8, 9, 1, 0x100},
|
---|
| 276 | {9, 10, 1, 0x200},
|
---|
| 277 | {10, 11, 1, 0x400},
|
---|
| 278 | {11, 12, 1, 0x800},
|
---|
| 279 | {12, 13, 1, 0x1000},
|
---|
| 280 | {13, 14, 1, 0x2000},
|
---|
| 281 | {14, 15, 1, 0x4000},
|
---|
| 282 | {15, 16, 1, 0x8000},
|
---|
| 283 | {16, 17, 2, 0x20000},
|
---|
| 284 | {17, 18, 2, 0x40000},
|
---|
| 285 | {18, 19, 2, 0x80000},
|
---|
| 286 | {19, 20, 2, 0x100000},
|
---|
| 287 | {20, 21, 2, 0x200000},
|
---|
| 288 | {21, 22, 2, 0x400000},
|
---|
| 289 | {22, 23, 2, 0x800000},
|
---|
| 290 | {23, 24, 2, 0x1000000},
|
---|
| 291 | {24, 25, 2, 0x2000000},
|
---|
| 292 | {25, 26, 2, 0x4000000},
|
---|
| 293 | {26, 27, 2, 0x8000000},
|
---|
| 294 | {27, 28, 2, 0x10000000},
|
---|
| 295 | {28, 29, 2, 0x20000000},
|
---|
| 296 | {29, 30, 2, 0x40000000},
|
---|
| 297 | {30, 31, 2, 0x80000000},
|
---|
[279] | 298 | };
|
---|
| 299 |
|
---|
[331] | 300 | #endif
|
---|
| 301 |
|
---|
| 302 | /* The reserved hash value and its substitution. */
|
---|
| 303 | #define RESERVED_HASH_VAL (~(st_hash_t) 0)
|
---|
| 304 | #define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
|
---|
| 305 |
|
---|
| 306 | /* Return hash value of KEY for table TAB. */
|
---|
| 307 | static inline st_hash_t
|
---|
| 308 | do_hash(st_data_t key, st_table *tab)
|
---|
| 309 | {
|
---|
| 310 | st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
|
---|
| 311 |
|
---|
| 312 | /* RESERVED_HASH_VAL is used for a deleted entry. Map it into
|
---|
| 313 | another value. Such mapping should be extremely rare. */
|
---|
| 314 | return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
|
---|
| 315 | }
|
---|
| 316 |
|
---|
| 317 | /* Power of 2 defining the minimal number of allocated entries. */
|
---|
| 318 | #define MINIMAL_POWER2 2
|
---|
| 319 |
|
---|
| 320 | #if MINIMAL_POWER2 < 2
|
---|
| 321 | #error "MINIMAL_POWER2 should be >= 2"
|
---|
| 322 | #endif
|
---|
| 323 |
|
---|
| 324 | /* If the power2 of the allocated `entries` is less than the following
|
---|
| 325 | value, don't allocate bins and use a linear search. */
|
---|
| 326 | #define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
|
---|
| 327 |
|
---|
| 328 | /* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */
|
---|
[279] | 329 | static int
|
---|
[331] | 330 | get_power2(st_index_t size)
|
---|
[279] | 331 | {
|
---|
[331] | 332 | unsigned int n;
|
---|
[279] | 333 |
|
---|
[331] | 334 | for (n = 0; size != 0; n++)
|
---|
| 335 | size >>= 1;
|
---|
| 336 | if (n <= MAX_POWER2)
|
---|
| 337 | return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
|
---|
| 338 | #ifdef RUBY
|
---|
| 339 | /* Ran out of the table entries */
|
---|
| 340 | rb_raise(rb_eRuntimeError, "st_table too big");
|
---|
| 341 | #endif
|
---|
| 342 | /* should raise exception */
|
---|
[279] | 343 | return -1;
|
---|
[331] | 344 | }
|
---|
[279] | 345 |
|
---|
[331] | 346 | /* Return value of N-th bin in array BINS of table with bins size
|
---|
| 347 | index S. */
|
---|
| 348 | static inline st_index_t
|
---|
| 349 | get_bin(st_index_t *bins, int s, st_index_t n)
|
---|
| 350 | {
|
---|
| 351 | return (s == 0 ? ((unsigned char *) bins)[n]
|
---|
| 352 | : s == 1 ? ((unsigned short *) bins)[n]
|
---|
| 353 | : s == 2 ? ((unsigned int *) bins)[n]
|
---|
| 354 | : ((st_index_t *) bins)[n]);
|
---|
| 355 | }
|
---|
| 356 |
|
---|
| 357 | /* Set up N-th bin in array BINS of table with bins size index S to
|
---|
| 358 | value V. */
|
---|
| 359 | static inline void
|
---|
| 360 | set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
|
---|
| 361 | {
|
---|
| 362 | if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v;
|
---|
| 363 | else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v;
|
---|
| 364 | else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v;
|
---|
| 365 | else ((st_index_t *) bins)[n] = v;
|
---|
| 366 | }
|
---|
| 367 |
|
---|
| 368 | /* These macros define reserved values for empty table bin and table
|
---|
| 369 | bin which contains a deleted entry. We will never use such values
|
---|
| 370 | for an entry index in bins. */
|
---|
| 371 | #define EMPTY_BIN 0
|
---|
| 372 | #define DELETED_BIN 1
|
---|
| 373 | /* Base of a real entry index in the bins. */
|
---|
| 374 | #define ENTRY_BASE 2
|
---|
| 375 |
|
---|
| 376 | /* Mark I-th bin of table TAB as empty, in other words not
|
---|
| 377 | corresponding to any entry. */
|
---|
| 378 | #define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
|
---|
| 379 |
|
---|
| 380 | /* Values used for not found entry and bin with given
|
---|
| 381 | characteristics. */
|
---|
| 382 | #define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
|
---|
| 383 | #define UNDEFINED_BIN_IND (~(st_index_t) 0)
|
---|
| 384 |
|
---|
| 385 | /* Mark I-th bin of table TAB as corresponding to a deleted table
|
---|
| 386 | entry. Update number of entries in the table and number of bins
|
---|
| 387 | corresponding to deleted entries. */
|
---|
| 388 | #define MARK_BIN_DELETED(tab, i) \
|
---|
| 389 | do { \
|
---|
| 390 | st_assert(i != UNDEFINED_BIN_IND); \
|
---|
| 391 | st_assert(! IND_EMPTY_OR_DELETED_BIN_P(tab, i)); \
|
---|
| 392 | set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
|
---|
| 393 | } while (0)
|
---|
| 394 |
|
---|
| 395 | /* Macros to check that value B is used empty bins and bins
|
---|
| 396 | corresponding deleted entries. */
|
---|
| 397 | #define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
|
---|
| 398 | #define DELETED_BIN_P(b) ((b) == DELETED_BIN)
|
---|
| 399 | #define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
|
---|
| 400 |
|
---|
| 401 | /* Macros to check empty bins and bins corresponding to deleted
|
---|
| 402 | entries. Bins are given by their index I in table TAB. */
|
---|
| 403 | #define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
|
---|
| 404 | #define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
|
---|
| 405 | #define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
|
---|
| 406 |
|
---|
| 407 | /* Macros for marking and checking deleted entries given by their
|
---|
| 408 | pointer E_PTR. */
|
---|
| 409 | #define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
|
---|
| 410 | #define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
|
---|
| 411 |
|
---|
| 412 | /* Return bin size index of table TAB. */
|
---|
| 413 | static inline unsigned int
|
---|
| 414 | get_size_ind(const st_table *tab)
|
---|
| 415 | {
|
---|
| 416 | return tab->size_ind;
|
---|
| 417 | }
|
---|
| 418 |
|
---|
| 419 | /* Return the number of allocated bins of table TAB. */
|
---|
| 420 | static inline st_index_t
|
---|
| 421 | get_bins_num(const st_table *tab)
|
---|
| 422 | {
|
---|
| 423 | return ((st_index_t) 1)<<tab->bin_power;
|
---|
| 424 | }
|
---|
| 425 |
|
---|
| 426 | /* Return mask for a bin index in table TAB. */
|
---|
| 427 | static inline st_index_t
|
---|
| 428 | bins_mask(const st_table *tab)
|
---|
| 429 | {
|
---|
| 430 | return get_bins_num(tab) - 1;
|
---|
| 431 | }
|
---|
| 432 |
|
---|
| 433 | /* Return the index of table TAB bin corresponding to
|
---|
| 434 | HASH_VALUE. */
|
---|
| 435 | static inline st_index_t
|
---|
| 436 | hash_bin(st_hash_t hash_value, st_table *tab)
|
---|
| 437 | {
|
---|
| 438 | return hash_value & bins_mask(tab);
|
---|
| 439 | }
|
---|
| 440 |
|
---|
| 441 | /* Return the number of allocated entries of table TAB. */
|
---|
| 442 | static inline st_index_t
|
---|
| 443 | get_allocated_entries(const st_table *tab)
|
---|
| 444 | {
|
---|
| 445 | return ((st_index_t) 1)<<tab->entry_power;
|
---|
| 446 | }
|
---|
| 447 |
|
---|
| 448 | /* Return size of the allocated bins of table TAB. */
|
---|
| 449 | static inline st_index_t
|
---|
| 450 | bins_size(const st_table *tab)
|
---|
| 451 | {
|
---|
| 452 | return features[tab->entry_power].bins_words * sizeof (st_index_t);
|
---|
| 453 | }
|
---|
| 454 |
|
---|
| 455 | /* Mark all bins of table TAB as empty. */
|
---|
| 456 | static void
|
---|
| 457 | initialize_bins(st_table *tab)
|
---|
| 458 | {
|
---|
| 459 | memset(tab->bins, 0, bins_size(tab));
|
---|
| 460 | }
|
---|
| 461 |
|
---|
| 462 | /* Make table TAB empty. */
|
---|
| 463 | static void
|
---|
| 464 | make_tab_empty(st_table *tab)
|
---|
| 465 | {
|
---|
| 466 | tab->num_entries = 0;
|
---|
| 467 | tab->entries_start = tab->entries_bound = 0;
|
---|
| 468 | if (tab->bins != NULL)
|
---|
| 469 | initialize_bins(tab);
|
---|
| 470 | }
|
---|
| 471 |
|
---|
| 472 | #ifdef ST_DEBUG
|
---|
| 473 | /* Check the table T consistency. It can be extremely slow. So use
|
---|
| 474 | it only for debugging. */
|
---|
| 475 | static void
|
---|
| 476 | st_check(st_table *tab)
|
---|
| 477 | {
|
---|
| 478 | st_index_t d, e, i, n, p;
|
---|
| 479 |
|
---|
| 480 | for (p = get_allocated_entries(tab), i = 0; p > 1; i++, p>>=1)
|
---|
| 481 | ;
|
---|
| 482 | p = i;
|
---|
| 483 | assert(p >= MINIMAL_POWER2);
|
---|
| 484 | assert(tab->entries_bound <= get_allocated_entries(tab)
|
---|
| 485 | && tab->entries_start <= tab->entries_bound);
|
---|
| 486 | n = 0;
|
---|
| 487 | return;
|
---|
| 488 | if (tab->entries_bound != 0)
|
---|
| 489 | for (i = tab->entries_start; i < tab->entries_bound; i++) {
|
---|
| 490 | assert(tab->entries[i].hash != (st_hash_t) ST_INIT_VAL
|
---|
| 491 | && tab->entries[i].key != ST_INIT_VAL
|
---|
| 492 | && tab->entries[i].record != ST_INIT_VAL);
|
---|
| 493 | if (! DELETED_ENTRY_P(&tab->entries[i]))
|
---|
| 494 | n++;
|
---|
| 495 | }
|
---|
| 496 | assert(n == tab->num_entries);
|
---|
| 497 | if (tab->bins == NULL)
|
---|
| 498 | assert(p <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS);
|
---|
| 499 | else {
|
---|
| 500 | assert(p > MAX_POWER2_FOR_TABLES_WITHOUT_BINS);
|
---|
| 501 | for (n = d = i = 0; i < get_bins_num(tab); i++) {
|
---|
| 502 | assert(get_bin(tab->bins, tab->size_ind, i) != ST_INIT_VAL);
|
---|
| 503 | if (IND_DELETED_BIN_P(tab, i)) {
|
---|
| 504 | d++;
|
---|
| 505 | continue;
|
---|
| 506 | }
|
---|
| 507 | else if (IND_EMPTY_BIN_P(tab, i))
|
---|
| 508 | continue;
|
---|
| 509 | n++;
|
---|
| 510 | e = get_bin(tab->bins, tab->size_ind, i) - ENTRY_BASE;
|
---|
| 511 | assert(tab->entries_start <= e && e < tab->entries_bound);
|
---|
| 512 | assert(! DELETED_ENTRY_P(&tab->entries[e]));
|
---|
| 513 | assert(tab->entries[e].hash != (st_hash_t) ST_INIT_VAL
|
---|
| 514 | && tab->entries[e].key != ST_INIT_VAL
|
---|
| 515 | && tab->entries[e].record != ST_INIT_VAL);
|
---|
| 516 | }
|
---|
| 517 | assert(n == tab->num_entries);
|
---|
| 518 | assert(n + d < get_bins_num(tab));
|
---|
[279] | 519 | }
|
---|
[331] | 520 | }
|
---|
[279] | 521 | #endif
|
---|
| 522 |
|
---|
| 523 | #ifdef HASH_LOG
|
---|
[331] | 524 | #ifdef HAVE_UNISTD_H
|
---|
| 525 | #include <unistd.h>
|
---|
| 526 | #endif
|
---|
| 527 | static struct {
|
---|
| 528 | int all, total, num, str, strcase;
|
---|
| 529 | } collision;
|
---|
| 530 |
|
---|
| 531 | /* Flag switching off output of package statistics at the end of
|
---|
| 532 | program. */
|
---|
[279] | 533 | static int init_st = 0;
|
---|
| 534 |
|
---|
[331] | 535 | /* Output overall number of table searches and collisions into a
|
---|
| 536 | temporary file. */
|
---|
[279] | 537 | static void
|
---|
[331] | 538 | stat_col(void)
|
---|
[279] | 539 | {
|
---|
[331] | 540 | char fname[10+sizeof(long)*3];
|
---|
| 541 | FILE *f;
|
---|
| 542 | if (!collision.total) return;
|
---|
| 543 | f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w");
|
---|
| 544 | if (f == 0) return ;
|
---|
| 545 |
|
---|
| 546 | fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total,
|
---|
| 547 | ((double)collision.all / (collision.total)) * 100);
|
---|
| 548 | fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
|
---|
[279] | 549 | fclose(f);
|
---|
| 550 | }
|
---|
| 551 | #endif
|
---|
| 552 |
|
---|
[331] | 553 | /* Create and return table with TYPE which can hold at least SIZE
|
---|
| 554 | entries. The real number of entries which the table can hold is
|
---|
| 555 | the nearest power of two for SIZE. */
|
---|
| 556 | st_table *
|
---|
| 557 | st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
|
---|
[279] | 558 | {
|
---|
[331] | 559 | st_table *tab;
|
---|
| 560 | int n;
|
---|
[279] | 561 |
|
---|
| 562 | #ifdef HASH_LOG
|
---|
[331] | 563 | #if HASH_LOG+0 < 0
|
---|
| 564 | {
|
---|
| 565 | const char *e = getenv("ST_HASH_LOG");
|
---|
| 566 | if (!e || !*e) init_st = 1;
|
---|
| 567 | }
|
---|
| 568 | #endif
|
---|
[279] | 569 | if (init_st == 0) {
|
---|
[331] | 570 | init_st = 1;
|
---|
| 571 | atexit(stat_col);
|
---|
[279] | 572 | }
|
---|
| 573 | #endif
|
---|
| 574 |
|
---|
[331] | 575 | n = get_power2(size);
|
---|
| 576 | #ifndef RUBY
|
---|
| 577 | if (n < 0)
|
---|
| 578 | return NULL;
|
---|
| 579 | #endif
|
---|
| 580 | tab = (st_table *) malloc(sizeof (st_table));
|
---|
| 581 | if (tab == NULL)
|
---|
| 582 | return NULL;
|
---|
| 583 | tab->type = type;
|
---|
| 584 | tab->entry_power = n;
|
---|
| 585 | tab->bin_power = features[n].bin_power;
|
---|
| 586 | tab->size_ind = features[n].size_ind;
|
---|
| 587 | if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
|
---|
| 588 | tab->bins = NULL;
|
---|
| 589 | else {
|
---|
| 590 | tab->bins = (st_index_t *) malloc(bins_size(tab));
|
---|
| 591 | if (tab->bins == NULL) {
|
---|
| 592 | free(tab);
|
---|
| 593 | return NULL;
|
---|
| 594 | }
|
---|
| 595 | }
|
---|
| 596 | tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab)
|
---|
| 597 | * sizeof(st_table_entry));
|
---|
| 598 | if (tab->entries == NULL) {
|
---|
| 599 | st_free_table(tab);
|
---|
| 600 | return NULL;
|
---|
| 601 | }
|
---|
| 602 | #ifdef ST_DEBUG
|
---|
| 603 | memset(tab->entries, ST_INIT_VAL_BYTE,
|
---|
| 604 | get_allocated_entries(tab) * sizeof(st_table_entry));
|
---|
| 605 | if (tab->bins != NULL)
|
---|
| 606 | memset(tab->bins, ST_INIT_VAL_BYTE, bins_size(tab));
|
---|
| 607 | #endif
|
---|
| 608 | make_tab_empty(tab);
|
---|
| 609 | tab->rebuilds_num = 0;
|
---|
| 610 | #ifdef ST_DEBUG
|
---|
| 611 | st_check(tab);
|
---|
| 612 | #endif
|
---|
| 613 | return tab;
|
---|
[279] | 614 | }
|
---|
| 615 |
|
---|
[331] | 616 | #ifdef RUBY
|
---|
| 617 | /* Create and return table with TYPE which can hold a minimal number
|
---|
| 618 | of entries (see comments for get_power2). */
|
---|
| 619 | st_table *
|
---|
| 620 | st_init_table(const struct st_hash_type *type)
|
---|
[279] | 621 | {
|
---|
| 622 | return st_init_table_with_size(type, 0);
|
---|
| 623 | }
|
---|
| 624 |
|
---|
[331] | 625 | /* Create and return table which can hold a minimal number of
|
---|
| 626 | numbers. */
|
---|
| 627 | st_table *
|
---|
[279] | 628 | st_init_numtable(void)
|
---|
| 629 | {
|
---|
| 630 | return st_init_table(&type_numhash);
|
---|
| 631 | }
|
---|
| 632 |
|
---|
[331] | 633 | /* Create and return table which can hold SIZE numbers. */
|
---|
| 634 | st_table *
|
---|
| 635 | st_init_numtable_with_size(st_index_t size)
|
---|
[279] | 636 | {
|
---|
| 637 | return st_init_table_with_size(&type_numhash, size);
|
---|
| 638 | }
|
---|
| 639 |
|
---|
[331] | 640 | /* Create and return table which can hold a minimal number of
|
---|
| 641 | strings. */
|
---|
| 642 | st_table *
|
---|
[279] | 643 | st_init_strtable(void)
|
---|
| 644 | {
|
---|
| 645 | return st_init_table(&type_strhash);
|
---|
| 646 | }
|
---|
| 647 |
|
---|
[331] | 648 | /* Create and return table which can hold SIZE strings. */
|
---|
| 649 | st_table *
|
---|
| 650 | st_init_strtable_with_size(st_index_t size)
|
---|
[279] | 651 | {
|
---|
| 652 | return st_init_table_with_size(&type_strhash, size);
|
---|
| 653 | }
|
---|
| 654 |
|
---|
[331] | 655 | /* Create and return table which can hold a minimal number of strings
|
---|
| 656 | whose character case is ignored. */
|
---|
| 657 | st_table *
|
---|
| 658 | st_init_strcasetable(void)
|
---|
| 659 | {
|
---|
| 660 | return st_init_table(&type_strcasehash);
|
---|
| 661 | }
|
---|
| 662 |
|
---|
| 663 | /* Create and return table which can hold SIZE strings whose character
|
---|
| 664 | case is ignored. */
|
---|
| 665 | st_table *
|
---|
| 666 | st_init_strcasetable_with_size(st_index_t size)
|
---|
| 667 | {
|
---|
| 668 | return st_init_table_with_size(&type_strcasehash, size);
|
---|
| 669 | }
|
---|
| 670 |
|
---|
| 671 | /* Make table TAB empty. */
|
---|
[279] | 672 | void
|
---|
[331] | 673 | st_clear(st_table *tab)
|
---|
[279] | 674 | {
|
---|
[331] | 675 | make_tab_empty(tab);
|
---|
| 676 | tab->rebuilds_num++;
|
---|
| 677 | #ifdef ST_DEBUG
|
---|
| 678 | st_check(tab);
|
---|
| 679 | #endif
|
---|
| 680 | }
|
---|
| 681 | #endif /* RUBY */
|
---|
[279] | 682 |
|
---|
[331] | 683 | /* Free table TAB space. */
|
---|
| 684 | void
|
---|
| 685 | st_free_table(st_table *tab)
|
---|
| 686 | {
|
---|
| 687 | if (tab->bins != NULL)
|
---|
| 688 | free(tab->bins);
|
---|
| 689 | free(tab->entries);
|
---|
| 690 | free(tab);
|
---|
[279] | 691 | }
|
---|
| 692 |
|
---|
[331] | 693 | #ifdef RUBY
|
---|
| 694 | /* Return byte size of memory allocted for table TAB. */
|
---|
| 695 | size_t
|
---|
| 696 | st_memsize(const st_table *tab)
|
---|
| 697 | {
|
---|
| 698 | return(sizeof(st_table)
|
---|
| 699 | + (tab->bins == NULL ? 0 : bins_size(tab))
|
---|
| 700 | + get_allocated_entries(tab) * sizeof(st_table_entry));
|
---|
| 701 | }
|
---|
| 702 | #endif /* RUBY */
|
---|
[279] | 703 |
|
---|
[331] | 704 | static st_index_t
|
---|
| 705 | find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
|
---|
| 706 |
|
---|
| 707 | static st_index_t
|
---|
| 708 | find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
|
---|
| 709 |
|
---|
| 710 | static st_index_t
|
---|
| 711 | find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
|
---|
| 712 |
|
---|
| 713 | static st_index_t
|
---|
| 714 | find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
|
---|
| 715 | st_data_t key, st_index_t *bin_ind);
|
---|
| 716 |
|
---|
[279] | 717 | #ifdef HASH_LOG
|
---|
[331] | 718 | static void
|
---|
| 719 | count_collision(const struct st_hash_type *type)
|
---|
| 720 | {
|
---|
| 721 | collision.all++;
|
---|
| 722 | if (type == &type_numhash) {
|
---|
| 723 | collision.num++;
|
---|
| 724 | }
|
---|
| 725 | else if (type == &type_strhash) {
|
---|
| 726 | collision.strcase++;
|
---|
| 727 | }
|
---|
| 728 | else if (type == &type_strcasehash) {
|
---|
| 729 | collision.str++;
|
---|
| 730 | }
|
---|
| 731 | }
|
---|
| 732 |
|
---|
| 733 | #define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
|
---|
| 734 | #define FOUND_BIN (collision_check ? collision.total++ : (void)0)
|
---|
| 735 | #define collision_check 0
|
---|
[279] | 736 | #else
|
---|
| 737 | #define COLLISION
|
---|
[331] | 738 | #define FOUND_BIN
|
---|
[279] | 739 | #endif
|
---|
| 740 |
|
---|
[331] | 741 | /* If the number of entries in the table is at least REBUILD_THRESHOLD
|
---|
| 742 | times less than the entry array length, decrease the table
|
---|
| 743 | size. */
|
---|
| 744 | #define REBUILD_THRESHOLD 4
|
---|
[279] | 745 |
|
---|
[331] | 746 | #if REBUILD_THRESHOLD < 2
|
---|
| 747 | #error "REBUILD_THRESHOLD should be >= 2"
|
---|
| 748 | #endif
|
---|
| 749 |
|
---|
| 750 | /* Rebuild table TAB. Rebuilding removes all deleted bins and entries
|
---|
| 751 | and can change size of the table entries and bins arrays.
|
---|
| 752 | Rebuilding is implemented by creation of a new table or by
|
---|
| 753 | compaction of the existing one. */
|
---|
| 754 | static void
|
---|
| 755 | rebuild_table(st_table *tab)
|
---|
[279] | 756 | {
|
---|
[331] | 757 | st_index_t i, ni, bound;
|
---|
| 758 | unsigned int size_ind;
|
---|
| 759 | st_table *new_tab;
|
---|
| 760 | st_table_entry *entries, *new_entries;
|
---|
| 761 | st_table_entry *curr_entry_ptr;
|
---|
| 762 | st_index_t *bins;
|
---|
| 763 | st_index_t bin_ind;
|
---|
[279] | 764 |
|
---|
[331] | 765 | st_assert(tab != NULL);
|
---|
| 766 | bound = tab->entries_bound;
|
---|
| 767 | entries = tab->entries;
|
---|
| 768 | if ((2 * tab->num_entries <= get_allocated_entries(tab)
|
---|
| 769 | && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
|
---|
| 770 | || tab->num_entries < (1 << MINIMAL_POWER2)) {
|
---|
| 771 | /* Compaction: */
|
---|
| 772 | tab->num_entries = 0;
|
---|
| 773 | if (tab->bins != NULL)
|
---|
| 774 | initialize_bins(tab);
|
---|
| 775 | new_tab = tab;
|
---|
| 776 | new_entries = entries;
|
---|
[279] | 777 | }
|
---|
| 778 | else {
|
---|
[331] | 779 | new_tab = st_init_table_with_size(tab->type,
|
---|
| 780 | 2 * tab->num_entries - 1);
|
---|
| 781 | new_entries = new_tab->entries;
|
---|
[279] | 782 | }
|
---|
[331] | 783 | ni = 0;
|
---|
| 784 | bins = new_tab->bins;
|
---|
| 785 | size_ind = get_size_ind(new_tab);
|
---|
| 786 | for (i = tab->entries_start; i < bound; i++) {
|
---|
| 787 | curr_entry_ptr = &entries[i];
|
---|
| 788 | PREFETCH(entries + i + 1, 0);
|
---|
| 789 | if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
|
---|
| 790 | continue;
|
---|
| 791 | if (&new_entries[ni] != curr_entry_ptr)
|
---|
| 792 | new_entries[ni] = *curr_entry_ptr;
|
---|
| 793 | if (EXPECT(bins != NULL, 1)) {
|
---|
| 794 | bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
|
---|
| 795 | curr_entry_ptr->key);
|
---|
| 796 | st_assert(bin_ind != UNDEFINED_BIN_IND
|
---|
| 797 | && (tab == new_tab || new_tab->rebuilds_num == 0)
|
---|
| 798 | && IND_EMPTY_BIN_P(new_tab, bin_ind));
|
---|
| 799 | set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
|
---|
| 800 | }
|
---|
| 801 | new_tab->num_entries++;
|
---|
| 802 | ni++;
|
---|
| 803 | }
|
---|
| 804 | if (new_tab != tab) {
|
---|
| 805 | tab->entry_power = new_tab->entry_power;
|
---|
| 806 | tab->bin_power = new_tab->bin_power;
|
---|
| 807 | tab->size_ind = new_tab->size_ind;
|
---|
| 808 | st_assert (tab->num_entries == ni && new_tab->num_entries == ni);
|
---|
| 809 | if (tab->bins != NULL)
|
---|
| 810 | free(tab->bins);
|
---|
| 811 | tab->bins = new_tab->bins;
|
---|
| 812 | free(tab->entries);
|
---|
| 813 | tab->entries = new_tab->entries;
|
---|
| 814 | free(new_tab);
|
---|
| 815 | }
|
---|
| 816 | tab->entries_start = 0;
|
---|
| 817 | tab->entries_bound = tab->num_entries;
|
---|
| 818 | tab->rebuilds_num++;
|
---|
| 819 | #ifdef ST_DEBUG
|
---|
| 820 | st_check(tab);
|
---|
| 821 | #endif
|
---|
[279] | 822 | }
|
---|
| 823 |
|
---|
[331] | 824 | /* Return the next secondary hash index for table TAB using previous
|
---|
| 825 | index IND and PERTERB. Finally modulo of the function becomes a
|
---|
| 826 | full *cycle linear congruential generator*, in other words it
|
---|
| 827 | guarantees traversing all table bins in extreme case.
|
---|
[279] | 828 |
|
---|
[331] | 829 | According the Hull-Dobell theorem a generator
|
---|
| 830 | "Xnext = (a*Xprev + c) mod m" is a full cycle generator iff
|
---|
| 831 | o m and c are relatively prime
|
---|
| 832 | o a-1 is divisible by all prime factors of m
|
---|
| 833 | o a-1 is divisible by 4 if m is divisible by 4.
|
---|
| 834 |
|
---|
| 835 | For our case a is 5, c is 1, and m is a power of two. */
|
---|
| 836 | static inline st_index_t
|
---|
| 837 | secondary_hash(st_index_t ind, st_table *tab, st_index_t *perterb)
|
---|
[279] | 838 | {
|
---|
[331] | 839 | *perterb >>= 11;
|
---|
| 840 | ind = (ind << 2) + ind + *perterb + 1;
|
---|
| 841 | return hash_bin(ind, tab);
|
---|
| 842 | }
|
---|
[279] | 843 |
|
---|
[331] | 844 | /* Find an entry with HASH_VALUE and KEY in TABLE using a linear
|
---|
| 845 | search. Return the index of the found entry in array `entries`.
|
---|
| 846 | If it is not found, return UNDEFINED_ENTRY_IND. */
|
---|
| 847 | static inline st_index_t
|
---|
| 848 | find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
|
---|
| 849 | {
|
---|
| 850 | st_index_t i, bound;
|
---|
| 851 | st_table_entry *entries;
|
---|
[279] | 852 |
|
---|
[331] | 853 | bound = tab->entries_bound;
|
---|
| 854 | entries = tab->entries;
|
---|
| 855 | for (i = tab->entries_start; i < bound; i++) {
|
---|
| 856 | if (PTR_EQUAL(tab, &entries[i], hash_value, key))
|
---|
| 857 | return i;
|
---|
[279] | 858 | }
|
---|
[331] | 859 | return UNDEFINED_ENTRY_IND;
|
---|
| 860 | }
|
---|
| 861 |
|
---|
| 862 | /* Use the quadratic probing. The method has a better data locality
|
---|
| 863 | but more collisions than the current approach. In average it
|
---|
| 864 | results in a bit slower search. */
|
---|
| 865 | /*#define QUADRATIC_PROBE*/
|
---|
| 866 |
|
---|
| 867 | /* Return index of entry with HASH_VALUE and KEY in table TAB. If
|
---|
| 868 | there is no such entry, return UNDEFINED_ENTRY_IND. */
|
---|
| 869 | static st_index_t
|
---|
| 870 | find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
|
---|
| 871 | {
|
---|
| 872 | st_index_t ind;
|
---|
| 873 | #ifdef QUADRATIC_PROBE
|
---|
| 874 | st_index_t d;
|
---|
| 875 | #else
|
---|
| 876 | st_index_t peterb;
|
---|
| 877 | #endif
|
---|
| 878 | st_index_t bin;
|
---|
| 879 | st_table_entry *entries = tab->entries;
|
---|
| 880 |
|
---|
| 881 | st_assert(tab != NULL && tab->bins != NULL);
|
---|
| 882 | ind = hash_bin(hash_value, tab);
|
---|
| 883 | #ifdef QUADRATIC_PROBE
|
---|
| 884 | d = 1;
|
---|
| 885 | #else
|
---|
| 886 | peterb = hash_value;
|
---|
| 887 | #endif
|
---|
| 888 | FOUND_BIN;
|
---|
| 889 | for (;;) {
|
---|
| 890 | bin = get_bin(tab->bins, get_size_ind(tab), ind);
|
---|
| 891 | if (! EMPTY_OR_DELETED_BIN_P(bin)
|
---|
| 892 | && PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key))
|
---|
| 893 | break;
|
---|
| 894 | else if (EMPTY_BIN_P(bin))
|
---|
| 895 | return UNDEFINED_ENTRY_IND;
|
---|
| 896 | #ifdef QUADRATIC_PROBE
|
---|
| 897 | ind = hash_bin(ind + d, tab);
|
---|
| 898 | d++;
|
---|
| 899 | #else
|
---|
| 900 | ind = secondary_hash(ind, tab, &peterb);
|
---|
| 901 | #endif
|
---|
| 902 | COLLISION;
|
---|
[279] | 903 | }
|
---|
[331] | 904 | return bin;
|
---|
[279] | 905 | }
|
---|
| 906 |
|
---|
[331] | 907 | /* Find and return index of table TAB bin corresponding to an entry
|
---|
| 908 | with HASH_VALUE and KEY. If there is no such bin, return
|
---|
| 909 | UNDEFINED_BIN_IND. */
|
---|
| 910 | static st_index_t
|
---|
| 911 | find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
|
---|
[279] | 912 | {
|
---|
[331] | 913 | st_index_t ind;
|
---|
| 914 | #ifdef QUADRATIC_PROBE
|
---|
| 915 | st_index_t d;
|
---|
| 916 | #else
|
---|
| 917 | st_index_t peterb;
|
---|
| 918 | #endif
|
---|
| 919 | st_index_t bin;
|
---|
| 920 | st_table_entry *entries = tab->entries;
|
---|
[279] | 921 |
|
---|
[331] | 922 | st_assert(tab != NULL && tab->bins != NULL);
|
---|
| 923 | ind = hash_bin(hash_value, tab);
|
---|
| 924 | #ifdef QUADRATIC_PROBE
|
---|
| 925 | d = 1;
|
---|
| 926 | #else
|
---|
| 927 | peterb = hash_value;
|
---|
| 928 | #endif
|
---|
| 929 | FOUND_BIN;
|
---|
| 930 | for (;;) {
|
---|
| 931 | bin = get_bin(tab->bins, get_size_ind(tab), ind);
|
---|
| 932 | if (! EMPTY_OR_DELETED_BIN_P(bin)
|
---|
| 933 | && PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key))
|
---|
| 934 | break;
|
---|
| 935 | else if (EMPTY_BIN_P(bin))
|
---|
| 936 | return UNDEFINED_BIN_IND;
|
---|
| 937 | #ifdef QUADRATIC_PROBE
|
---|
| 938 | ind = hash_bin(ind + d, tab);
|
---|
| 939 | d++;
|
---|
| 940 | #else
|
---|
| 941 | ind = secondary_hash(ind, tab, &peterb);
|
---|
| 942 | #endif
|
---|
| 943 | COLLISION;
|
---|
| 944 | }
|
---|
| 945 | return ind;
|
---|
[279] | 946 | }
|
---|
| 947 |
|
---|
[331] | 948 | /* Find and return index of table TAB bin corresponding to an entry
|
---|
| 949 | with HASH_VALUE and KEY. The entry should be in the table
|
---|
| 950 | already. */
|
---|
| 951 | static st_index_t
|
---|
| 952 | find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
|
---|
[279] | 953 | {
|
---|
[331] | 954 | st_index_t ind;
|
---|
| 955 | #ifdef QUADRATIC_PROBE
|
---|
| 956 | st_index_t d;
|
---|
| 957 | #else
|
---|
| 958 | st_index_t peterb;
|
---|
| 959 | #endif
|
---|
| 960 | st_index_t bin;
|
---|
| 961 | st_table_entry *entries = tab->entries;
|
---|
[279] | 962 |
|
---|
[331] | 963 | st_assert(tab != NULL && tab->bins != NULL);
|
---|
| 964 | ind = hash_bin(hash_value, tab);
|
---|
| 965 | #ifdef QUADRATIC_PROBE
|
---|
| 966 | d = 1;
|
---|
| 967 | #else
|
---|
| 968 | peterb = hash_value;
|
---|
| 969 | #endif
|
---|
| 970 | FOUND_BIN;
|
---|
| 971 | for (;;) {
|
---|
| 972 | bin = get_bin(tab->bins, get_size_ind(tab), ind);
|
---|
| 973 | if (EMPTY_OR_DELETED_BIN_P(bin))
|
---|
| 974 | return ind;
|
---|
| 975 | st_assert (! PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key));
|
---|
| 976 | #ifdef QUADRATIC_PROBE
|
---|
| 977 | ind = hash_bin(ind + d, tab);
|
---|
| 978 | d++;
|
---|
| 979 | #else
|
---|
| 980 | ind = secondary_hash(ind, tab, &peterb);
|
---|
| 981 | #endif
|
---|
| 982 | COLLISION;
|
---|
| 983 | }
|
---|
| 984 | }
|
---|
[279] | 985 |
|
---|
[331] | 986 | /* Return index of table TAB bin for HASH_VALUE and KEY through
|
---|
| 987 | BIN_IND and the pointed value as the function result. Reserve the
|
---|
| 988 | bin for inclusion of the corresponding entry into the table if it
|
---|
| 989 | is not there yet. We always find such bin as bins array length is
|
---|
| 990 | bigger entries array. Although we can reuse a deleted bin, the
|
---|
| 991 | result bin value is always empty if the table has no entry with
|
---|
| 992 | KEY. Return the entries array index of the found entry or
|
---|
| 993 | UNDEFINED_ENTRY_IND if it is not found. */
|
---|
| 994 | static st_index_t
|
---|
| 995 | find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
|
---|
| 996 | st_data_t key, st_index_t *bin_ind) {
|
---|
| 997 | st_index_t ind;
|
---|
| 998 | st_hash_t curr_hash_value = *hash_value;
|
---|
| 999 | #ifdef QUADRATIC_PROBE
|
---|
| 1000 | st_index_t d;
|
---|
| 1001 | #else
|
---|
| 1002 | st_index_t peterb;
|
---|
| 1003 | #endif
|
---|
| 1004 | st_index_t entry_index;
|
---|
| 1005 | st_index_t first_deleted_bin_ind;
|
---|
| 1006 | st_table_entry *entries;
|
---|
| 1007 |
|
---|
| 1008 | st_assert(tab != NULL && tab->bins != NULL
|
---|
| 1009 | && tab->entries_bound <= get_allocated_entries(tab)
|
---|
| 1010 | && tab->entries_start <= tab->entries_bound);
|
---|
| 1011 | ind = hash_bin(curr_hash_value, tab);
|
---|
| 1012 | #ifdef QUADRATIC_PROBE
|
---|
| 1013 | d = 1;
|
---|
| 1014 | #else
|
---|
| 1015 | peterb = curr_hash_value;
|
---|
| 1016 | #endif
|
---|
| 1017 | FOUND_BIN;
|
---|
| 1018 | first_deleted_bin_ind = UNDEFINED_BIN_IND;
|
---|
| 1019 | entries = tab->entries;
|
---|
| 1020 | for (;;) {
|
---|
| 1021 | entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
|
---|
| 1022 | if (EMPTY_BIN_P(entry_index)) {
|
---|
| 1023 | tab->num_entries++;
|
---|
| 1024 | entry_index = UNDEFINED_ENTRY_IND;
|
---|
| 1025 | if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
|
---|
| 1026 | /* We can reuse bin of a deleted entry. */
|
---|
| 1027 | ind = first_deleted_bin_ind;
|
---|
| 1028 | MARK_BIN_EMPTY(tab, ind);
|
---|
| 1029 | }
|
---|
| 1030 | break;
|
---|
| 1031 | } else if (! DELETED_BIN_P(entry_index)) {
|
---|
| 1032 | if (PTR_EQUAL(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key))
|
---|
| 1033 | break;
|
---|
| 1034 | } else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
|
---|
| 1035 | first_deleted_bin_ind = ind;
|
---|
| 1036 | #ifdef QUADRATIC_PROBE
|
---|
| 1037 | ind = hash_bin(ind + d, tab);
|
---|
| 1038 | d++;
|
---|
| 1039 | #else
|
---|
| 1040 | ind = secondary_hash(ind, tab, &peterb);
|
---|
| 1041 | #endif
|
---|
| 1042 | COLLISION;
|
---|
[279] | 1043 | }
|
---|
[331] | 1044 | *bin_ind = ind;
|
---|
| 1045 | return entry_index;
|
---|
[279] | 1046 | }
|
---|
| 1047 |
|
---|
[331] | 1048 | /* Find an entry with KEY in table TAB. Return non-zero if we found
|
---|
| 1049 | it. Set up *RECORD to the found entry record. */
|
---|
| 1050 | int
|
---|
| 1051 | st_lookup(st_table *tab, st_data_t key, st_data_t *value)
|
---|
[279] | 1052 | {
|
---|
[331] | 1053 | st_index_t bin;
|
---|
| 1054 | st_hash_t hash = do_hash(key, tab);
|
---|
[279] | 1055 |
|
---|
[331] | 1056 | if (tab->bins == NULL) {
|
---|
| 1057 | bin = find_entry(tab, hash, key);
|
---|
| 1058 | if (bin == UNDEFINED_ENTRY_IND)
|
---|
| 1059 | return 0;
|
---|
| 1060 | } else {
|
---|
| 1061 | bin = find_table_entry_ind(tab, hash, key);
|
---|
| 1062 | if (bin == UNDEFINED_ENTRY_IND)
|
---|
| 1063 | return 0;
|
---|
| 1064 | bin -= ENTRY_BASE;
|
---|
[279] | 1065 | }
|
---|
[331] | 1066 | if (value != 0)
|
---|
| 1067 | *value = tab->entries[bin].record;
|
---|
| 1068 | return 1;
|
---|
| 1069 | }
|
---|
[279] | 1070 |
|
---|
[331] | 1071 | #ifdef RUBY
|
---|
| 1072 | /* Find an entry with KEY in table TAB. Return non-zero if we found
|
---|
| 1073 | it. Set up *RESULT to the found table entry key. */
|
---|
| 1074 | int
|
---|
| 1075 | st_get_key(st_table *tab, st_data_t key, st_data_t *result)
|
---|
| 1076 | {
|
---|
| 1077 | st_index_t bin;
|
---|
| 1078 | st_hash_t hash = do_hash(key, tab);
|
---|
[279] | 1079 |
|
---|
[331] | 1080 | if (tab->bins == NULL) {
|
---|
| 1081 | bin = find_entry(tab, hash, key);
|
---|
| 1082 | if (bin == UNDEFINED_ENTRY_IND)
|
---|
| 1083 | return 0;
|
---|
| 1084 | } else {
|
---|
| 1085 | bin = find_table_entry_ind(tab, hash, key);
|
---|
| 1086 | if (bin == UNDEFINED_ENTRY_IND)
|
---|
| 1087 | return 0;
|
---|
| 1088 | bin -= ENTRY_BASE;
|
---|
[279] | 1089 | }
|
---|
[331] | 1090 | if (result != 0)
|
---|
| 1091 | *result = tab->entries[bin].key;
|
---|
| 1092 | return 1;
|
---|
| 1093 | }
|
---|
| 1094 | #endif /* RUBY */
|
---|
[279] | 1095 |
|
---|
[331] | 1096 | /* Check the table and rebuild it if it is necessary. */
|
---|
| 1097 | static inline void
|
---|
| 1098 | rebuild_table_if_necessary (st_table *tab)
|
---|
| 1099 | {
|
---|
| 1100 | st_index_t bound = tab->entries_bound;
|
---|
| 1101 |
|
---|
| 1102 | if (bound == get_allocated_entries(tab))
|
---|
| 1103 | rebuild_table(tab);
|
---|
| 1104 | st_assert(tab->entries_bound < get_allocated_entries(tab));
|
---|
[279] | 1105 | }
|
---|
| 1106 |
|
---|
[331] | 1107 | /* Insert (KEY, VALUE) into table TAB and return zero. If there is
|
---|
| 1108 | already entry with KEY in the table, return nonzero and and update
|
---|
| 1109 | the value of the found entry. */
|
---|
[279] | 1110 | int
|
---|
[331] | 1111 | st_insert(st_table *tab, st_data_t key, st_data_t value)
|
---|
[279] | 1112 | {
|
---|
[331] | 1113 | st_table_entry *entry;
|
---|
| 1114 | st_index_t bin;
|
---|
| 1115 | st_index_t ind;
|
---|
| 1116 | st_hash_t hash_value;
|
---|
| 1117 | st_index_t bin_ind;
|
---|
| 1118 | int new_p;
|
---|
[279] | 1119 |
|
---|
[331] | 1120 | rebuild_table_if_necessary(tab);
|
---|
| 1121 | hash_value = do_hash(key, tab);
|
---|
| 1122 | if (tab->bins == NULL) {
|
---|
| 1123 | bin = find_entry(tab, hash_value, key);
|
---|
| 1124 | new_p = bin == UNDEFINED_ENTRY_IND;
|
---|
| 1125 | if (new_p)
|
---|
| 1126 | tab->num_entries++;
|
---|
| 1127 | bin_ind = UNDEFINED_BIN_IND;
|
---|
| 1128 | } else {
|
---|
| 1129 | bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
|
---|
| 1130 | key, &bin_ind);
|
---|
| 1131 | new_p = bin == UNDEFINED_ENTRY_IND;
|
---|
| 1132 | bin -= ENTRY_BASE;
|
---|
| 1133 | }
|
---|
| 1134 | if (new_p) {
|
---|
| 1135 | st_assert(tab->entries_bound < get_allocated_entries(tab));
|
---|
| 1136 | ind = tab->entries_bound++;
|
---|
| 1137 | entry = &tab->entries[ind];
|
---|
| 1138 | entry->hash = hash_value;
|
---|
| 1139 | entry->key = key;
|
---|
| 1140 | entry->record = value;
|
---|
| 1141 | if (bin_ind != UNDEFINED_BIN_IND)
|
---|
| 1142 | set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
|
---|
| 1143 | #ifdef ST_DEBUG
|
---|
| 1144 | st_check(tab);
|
---|
| 1145 | #endif
|
---|
| 1146 | return 0;
|
---|
| 1147 | }
|
---|
| 1148 | tab->entries[bin].record = value;
|
---|
| 1149 | #ifdef ST_DEBUG
|
---|
| 1150 | st_check(tab);
|
---|
| 1151 | #endif
|
---|
| 1152 | return 1;
|
---|
| 1153 | }
|
---|
[279] | 1154 |
|
---|
[331] | 1155 | #ifdef RUBY
|
---|
| 1156 | /* Insert (KEY, VALUE, HASH) into table TAB. The table should not have
|
---|
| 1157 | entry with KEY before the insertion. */
|
---|
| 1158 | static inline void
|
---|
| 1159 | st_add_direct_with_hash(st_table *tab,
|
---|
| 1160 | st_data_t key, st_data_t value, st_hash_t hash) {
|
---|
| 1161 | st_table_entry *entry;
|
---|
| 1162 | st_index_t ind;
|
---|
| 1163 | st_index_t bin_ind;
|
---|
| 1164 |
|
---|
| 1165 | rebuild_table_if_necessary(tab);
|
---|
| 1166 | ind = tab->entries_bound++;
|
---|
| 1167 | entry = &tab->entries[ind];
|
---|
| 1168 | entry->hash = hash;
|
---|
| 1169 | entry->key = key;
|
---|
| 1170 | entry->record = value;
|
---|
| 1171 | tab->num_entries++;
|
---|
| 1172 | if (tab->bins != NULL) {
|
---|
| 1173 | bin_ind = find_table_bin_ind_direct(tab, hash, key);
|
---|
| 1174 | st_assert (bin_ind != UNDEFINED_BIN_IND);
|
---|
| 1175 | set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
|
---|
[279] | 1176 | }
|
---|
[331] | 1177 | #ifdef ST_DEBUG
|
---|
| 1178 | st_check(tab);
|
---|
| 1179 | #endif
|
---|
| 1180 | }
|
---|
[279] | 1181 |
|
---|
[331] | 1182 | /* Insert (KEY, VALUE) into table TAB. The table should not have
|
---|
| 1183 | entry with KEY before the insertion. */
|
---|
| 1184 | void
|
---|
| 1185 | st_add_direct(st_table *tab, st_data_t key, st_data_t value)
|
---|
| 1186 | {
|
---|
| 1187 | st_hash_t hash_value;
|
---|
| 1188 |
|
---|
| 1189 | hash_value = do_hash(key, tab);
|
---|
| 1190 | st_add_direct_with_hash(tab, key, value, hash_value);
|
---|
| 1191 | }
|
---|
| 1192 |
|
---|
| 1193 | /* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If
|
---|
| 1194 | there is already entry with KEY in the table, return nonzero and
|
---|
| 1195 | and update the value of the found entry. */
|
---|
| 1196 | int
|
---|
| 1197 | st_insert2(st_table *tab, st_data_t key, st_data_t value,
|
---|
| 1198 | st_data_t (*func)(st_data_t)) {
|
---|
| 1199 | st_table_entry *entry;
|
---|
| 1200 | st_index_t bin;
|
---|
| 1201 | st_index_t ind, check;
|
---|
| 1202 | st_hash_t hash_value;
|
---|
| 1203 | st_index_t bin_ind;
|
---|
| 1204 | int new_p;
|
---|
| 1205 |
|
---|
| 1206 | rebuild_table_if_necessary (tab);
|
---|
| 1207 | hash_value = do_hash(key, tab);
|
---|
| 1208 | if (tab->bins == NULL) {
|
---|
| 1209 | bin = find_entry(tab, hash_value, key);
|
---|
| 1210 | new_p = bin == UNDEFINED_ENTRY_IND;
|
---|
| 1211 | bin_ind = UNDEFINED_BIN_IND;
|
---|
| 1212 | } else {
|
---|
| 1213 | bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
|
---|
| 1214 | key, &bin_ind);
|
---|
| 1215 | new_p = bin == UNDEFINED_ENTRY_IND;
|
---|
| 1216 | bin -= ENTRY_BASE;
|
---|
[279] | 1217 | }
|
---|
[331] | 1218 | if (new_p) {
|
---|
| 1219 | st_assert(tab->entries_bound < get_allocated_entries(tab));
|
---|
| 1220 | check = tab->rebuilds_num;
|
---|
| 1221 | key = (*func)(key);
|
---|
| 1222 | st_assert(check == tab->rebuilds_num
|
---|
| 1223 | && do_hash(key, tab) == hash_value);
|
---|
| 1224 | ind = tab->entries_bound++;
|
---|
| 1225 | entry = &tab->entries[ind];
|
---|
| 1226 | entry->hash = hash_value;
|
---|
| 1227 | entry->key = key;
|
---|
| 1228 | entry->record = value;
|
---|
| 1229 | if (bin_ind != UNDEFINED_BIN_IND)
|
---|
| 1230 | set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
|
---|
| 1231 | #ifdef ST_DEBUG
|
---|
| 1232 | st_check(tab);
|
---|
| 1233 | #endif
|
---|
| 1234 | return 0;
|
---|
| 1235 | }
|
---|
| 1236 | tab->entries[bin].record = value;
|
---|
| 1237 | #ifdef ST_DEBUG
|
---|
| 1238 | st_check(tab);
|
---|
| 1239 | #endif
|
---|
| 1240 | return 1;
|
---|
| 1241 | }
|
---|
[279] | 1242 |
|
---|
[331] | 1243 | /* Create and return a copy of table OLD_TAB. */
|
---|
| 1244 | st_table *
|
---|
| 1245 | st_copy(st_table *old_tab)
|
---|
| 1246 | {
|
---|
| 1247 | st_table *new_tab;
|
---|
| 1248 |
|
---|
| 1249 | new_tab = (st_table *) malloc(sizeof(st_table));
|
---|
| 1250 | if (new_tab == NULL)
|
---|
| 1251 | return NULL;
|
---|
| 1252 | *new_tab = *old_tab;
|
---|
| 1253 | if (old_tab->bins == NULL)
|
---|
| 1254 | new_tab->bins = NULL;
|
---|
| 1255 | else {
|
---|
| 1256 | new_tab->bins = (st_index_t *) malloc(bins_size(old_tab));
|
---|
| 1257 | if (new_tab->bins == NULL) {
|
---|
| 1258 | free(new_tab);
|
---|
| 1259 | return NULL;
|
---|
[279] | 1260 | }
|
---|
| 1261 | }
|
---|
[331] | 1262 | new_tab->entries = (st_table_entry *) malloc(get_allocated_entries(old_tab)
|
---|
| 1263 | * sizeof(st_table_entry));
|
---|
| 1264 | if (new_tab->entries == NULL) {
|
---|
| 1265 | st_free_table(new_tab);
|
---|
| 1266 | return NULL;
|
---|
| 1267 | }
|
---|
| 1268 | MEMCPY(new_tab->entries, old_tab->entries, st_table_entry,
|
---|
| 1269 | get_allocated_entries(old_tab));
|
---|
| 1270 | if (old_tab->bins != NULL)
|
---|
| 1271 | MEMCPY(new_tab->bins, old_tab->bins, char, bins_size(old_tab));
|
---|
| 1272 | #ifdef ST_DEBUG
|
---|
| 1273 | st_check(new_tab);
|
---|
| 1274 | #endif
|
---|
| 1275 | return new_tab;
|
---|
| 1276 | }
|
---|
| 1277 | #endif /* RUBY */
|
---|
[279] | 1278 |
|
---|
[331] | 1279 | /* Update the entries start of table TAB after removing an entry
|
---|
| 1280 | with index N in the array entries. */
|
---|
| 1281 | static inline void
|
---|
| 1282 | update_range_for_deleted(st_table *tab, st_index_t n)
|
---|
| 1283 | {
|
---|
| 1284 | /* Do not update entries_bound here. Otherwise, we can fill all
|
---|
| 1285 | bins by deleted entry value before rebuilding the table. */
|
---|
| 1286 | if (tab->entries_start == n)
|
---|
| 1287 | tab->entries_start = n + 1;
|
---|
[279] | 1288 | }
|
---|
| 1289 |
|
---|
[331] | 1290 | #ifdef RUBY
|
---|
| 1291 | /* Delete entry with KEY from table TAB, set up *VALUE (unless
|
---|
| 1292 | VALUE is zero) from deleted table entry, and return non-zero. If
|
---|
| 1293 | there is no entry with KEY in the table, clear *VALUE (unless VALUE
|
---|
| 1294 | is zero), and return zero. */
|
---|
| 1295 | static int
|
---|
| 1296 | st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
|
---|
| 1297 | {
|
---|
| 1298 | st_table_entry *entry;
|
---|
| 1299 | st_index_t bin;
|
---|
| 1300 | st_index_t bin_ind;
|
---|
| 1301 | st_hash_t hash;
|
---|
| 1302 |
|
---|
| 1303 | st_assert(tab != NULL);
|
---|
| 1304 | hash = do_hash(*key, tab);
|
---|
| 1305 | if (tab->bins == NULL) {
|
---|
| 1306 | bin = find_entry(tab, hash, *key);
|
---|
| 1307 | if (bin == UNDEFINED_ENTRY_IND) {
|
---|
| 1308 | if (value != 0) *value = 0;
|
---|
| 1309 | return 0;
|
---|
| 1310 | }
|
---|
| 1311 | } else {
|
---|
| 1312 | bin_ind = find_table_bin_ind(tab, hash, *key);
|
---|
| 1313 | if (bin_ind == UNDEFINED_BIN_IND) {
|
---|
| 1314 | if (value != 0) *value = 0;
|
---|
| 1315 | return 0;
|
---|
| 1316 | }
|
---|
| 1317 | bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
|
---|
| 1318 | MARK_BIN_DELETED(tab, bin_ind);
|
---|
| 1319 | }
|
---|
| 1320 | entry = &tab->entries[bin];
|
---|
| 1321 | *key = entry->key;
|
---|
| 1322 | if (value != 0) *value = entry->record;
|
---|
| 1323 | MARK_ENTRY_DELETED(entry);
|
---|
| 1324 | tab->num_entries--;
|
---|
| 1325 | update_range_for_deleted(tab, bin);
|
---|
| 1326 | #ifdef ST_DEBUG
|
---|
| 1327 | st_check(tab);
|
---|
| 1328 | #endif
|
---|
| 1329 | return 1;
|
---|
| 1330 | }
|
---|
| 1331 |
|
---|
[279] | 1332 | int
|
---|
[331] | 1333 | st_delete(st_table *tab, st_data_t *key, st_data_t *value)
|
---|
[279] | 1334 | {
|
---|
[331] | 1335 | return st_general_delete(tab, key, value);
|
---|
| 1336 | }
|
---|
[279] | 1337 |
|
---|
[331] | 1338 | /* The function and other functions with suffix '_safe' or '_check'
|
---|
| 1339 | are originated from the previous implementation of the hash tables.
|
---|
| 1340 | It was necessary for correct deleting entries during traversing
|
---|
| 1341 | tables. The current implementation permits deletion during
|
---|
| 1342 | traversing without a specific way to do this. */
|
---|
| 1343 | int
|
---|
| 1344 | st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value,
|
---|
| 1345 | st_data_t never ATTRIBUTE_UNUSED) {
|
---|
| 1346 | return st_general_delete(tab, key, value);
|
---|
| 1347 | }
|
---|
[279] | 1348 |
|
---|
[331] | 1349 | /* If table TAB is empty, clear *VALUE (unless VALUE is zero), and
|
---|
| 1350 | return zero. Otherwise, remove the first entry in the table.
|
---|
| 1351 | Return its key through KEY and its record through VALUE (unless
|
---|
| 1352 | VALUE is zero). */
|
---|
| 1353 | int
|
---|
| 1354 | st_shift(st_table *tab, st_data_t *key, st_data_t *value)
|
---|
| 1355 | {
|
---|
| 1356 | st_index_t i, bound;
|
---|
| 1357 | st_index_t bin;
|
---|
| 1358 | st_table_entry *entries, *curr_entry_ptr;
|
---|
| 1359 | st_index_t bin_ind;
|
---|
[279] | 1360 |
|
---|
[331] | 1361 | entries = tab->entries;
|
---|
| 1362 | bound = tab->entries_bound;
|
---|
| 1363 | for (i = tab->entries_start; i < bound; i++) {
|
---|
| 1364 | curr_entry_ptr = &entries[i];
|
---|
| 1365 | if (! DELETED_ENTRY_P(curr_entry_ptr)) {
|
---|
| 1366 | if (value != 0) *value = curr_entry_ptr->record;
|
---|
| 1367 | *key = curr_entry_ptr->key;
|
---|
| 1368 | if (tab->bins == NULL) {
|
---|
| 1369 | bin = find_entry(tab, curr_entry_ptr->hash, curr_entry_ptr->key);
|
---|
| 1370 | st_assert(bin != UNDEFINED_ENTRY_IND
|
---|
| 1371 | && &entries[bin] == curr_entry_ptr);
|
---|
| 1372 | } else {
|
---|
| 1373 | bin_ind = find_table_bin_ind(tab, curr_entry_ptr->hash,
|
---|
| 1374 | curr_entry_ptr->key);
|
---|
| 1375 | st_assert(bin_ind != UNDEFINED_BIN_IND
|
---|
| 1376 | && &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
|
---|
| 1377 | - ENTRY_BASE] == curr_entry_ptr);
|
---|
| 1378 | MARK_BIN_DELETED(tab, bin_ind);
|
---|
| 1379 | }
|
---|
| 1380 | MARK_ENTRY_DELETED(curr_entry_ptr);
|
---|
| 1381 | tab->num_entries--;
|
---|
| 1382 | update_range_for_deleted(tab, i);
|
---|
| 1383 | #ifdef ST_DEBUG
|
---|
| 1384 | st_check(tab);
|
---|
| 1385 | #endif
|
---|
[279] | 1386 | return 1;
|
---|
| 1387 | }
|
---|
| 1388 | }
|
---|
[331] | 1389 | st_assert(tab->num_entries == 0);
|
---|
| 1390 | tab->entries_start = tab->entries_bound = 0;
|
---|
| 1391 | if (value != 0) *value = 0;
|
---|
[279] | 1392 | return 0;
|
---|
| 1393 | }
|
---|
| 1394 |
|
---|
[331] | 1395 | /* See comments for function st_delete_safe. */
|
---|
| 1396 | void
|
---|
| 1397 | st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED,
|
---|
| 1398 | st_data_t never ATTRIBUTE_UNUSED) {
|
---|
[279] | 1399 | }
|
---|
| 1400 |
|
---|
[331] | 1401 | /* Find entry with KEY in table TAB, call FUNC with the key and the
|
---|
| 1402 | value of the found entry, and non-zero as the 3rd argument. If the
|
---|
| 1403 | entry is not found, call FUNC with KEY, and 2 zero arguments. If
|
---|
| 1404 | the call returns ST_CONTINUE, the table will have an entry with key
|
---|
| 1405 | and value returned by FUNC through the 1st and 2nd parameters. If
|
---|
| 1406 | the call of FUNC returns ST_DELETE, the table will not have entry
|
---|
| 1407 | with KEY. The function returns flag of that the entry with KEY was
|
---|
| 1408 | in the table before the call. */
|
---|
| 1409 | int
|
---|
| 1410 | st_update(st_table *tab, st_data_t key,
|
---|
| 1411 | st_update_callback_func *func, st_data_t arg) {
|
---|
| 1412 | st_table_entry *entry = NULL; /* to avoid uninitialized value warning */
|
---|
| 1413 | st_index_t bin = 0; /* Ditto */
|
---|
| 1414 | st_table_entry *entries;
|
---|
| 1415 | st_index_t bin_ind;
|
---|
| 1416 | st_data_t value = 0, old_key;
|
---|
| 1417 | st_index_t check;
|
---|
| 1418 | int retval, existing;
|
---|
| 1419 | st_hash_t hash = do_hash(key, tab);
|
---|
[279] | 1420 |
|
---|
[331] | 1421 | entries = tab->entries;
|
---|
| 1422 | if (tab->bins == NULL) {
|
---|
| 1423 | bin = find_entry(tab, hash, key);
|
---|
| 1424 | existing = bin != UNDEFINED_ENTRY_IND;
|
---|
| 1425 | entry = &entries[bin];
|
---|
| 1426 | bin_ind = UNDEFINED_BIN_IND;
|
---|
| 1427 | } else {
|
---|
| 1428 | bin_ind = find_table_bin_ind(tab, hash, key);
|
---|
| 1429 | existing = bin_ind != UNDEFINED_BIN_IND;
|
---|
| 1430 | if (existing) {
|
---|
| 1431 | bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
|
---|
| 1432 | entry = &entries[bin];
|
---|
| 1433 | }
|
---|
| 1434 | }
|
---|
| 1435 | if (existing) {
|
---|
| 1436 | key = entry->key;
|
---|
| 1437 | value = entry->record;
|
---|
| 1438 | }
|
---|
| 1439 | old_key = key;
|
---|
| 1440 | check = tab->rebuilds_num;
|
---|
| 1441 | retval = (*func)(&key, &value, arg, existing);
|
---|
| 1442 | st_assert(check == tab->rebuilds_num);
|
---|
| 1443 | switch (retval) {
|
---|
| 1444 | case ST_CONTINUE:
|
---|
| 1445 | if (! existing) {
|
---|
| 1446 | st_add_direct_with_hash(tab, key, value, hash);
|
---|
| 1447 | break;
|
---|
| 1448 | }
|
---|
| 1449 | if (old_key != key) {
|
---|
| 1450 | entry->key = key;
|
---|
| 1451 | }
|
---|
| 1452 | entry->record = value;
|
---|
| 1453 | break;
|
---|
| 1454 | case ST_DELETE:
|
---|
| 1455 | if (existing) {
|
---|
| 1456 | if (bin_ind != UNDEFINED_BIN_IND)
|
---|
| 1457 | MARK_BIN_DELETED(tab, bin_ind);
|
---|
| 1458 | MARK_ENTRY_DELETED(entry);
|
---|
| 1459 | tab->num_entries--;
|
---|
| 1460 | update_range_for_deleted(tab, bin);
|
---|
| 1461 | #ifdef ST_DEBUG
|
---|
| 1462 | st_check(tab);
|
---|
| 1463 | #endif
|
---|
| 1464 | }
|
---|
| 1465 | break;
|
---|
| 1466 | }
|
---|
| 1467 | #ifdef ST_DEBUG
|
---|
| 1468 | st_check(tab);
|
---|
| 1469 | #endif
|
---|
| 1470 | return existing;
|
---|
[279] | 1471 | }
|
---|
[331] | 1472 | #endif /* RUBY */
|
---|
[279] | 1473 |
|
---|
[331] | 1474 | /* Traverse all entries in table TAB calling FUNC with current entry
|
---|
| 1475 | key and value and zero. If the call returns ST_STOP, stop
|
---|
| 1476 | traversing. If the call returns ST_DELETE, delete the current
|
---|
| 1477 | entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
|
---|
| 1478 | traversing. The function returns zero unless an error is found.
|
---|
| 1479 | CHECK_P is flag of st_foreach_check call. The behavior is a bit
|
---|
| 1480 | different for ST_CHECK and when the current element is removed
|
---|
| 1481 | during traversing. */
|
---|
| 1482 | static inline int
|
---|
| 1483 | st_general_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg,
|
---|
| 1484 | int check_p) {
|
---|
| 1485 | st_index_t bin;
|
---|
| 1486 | st_index_t bin_ind;
|
---|
| 1487 | st_table_entry *entries, *curr_entry_ptr;
|
---|
[279] | 1488 | enum st_retval retval;
|
---|
[331] | 1489 | st_index_t i, rebuilds_num;
|
---|
| 1490 | st_hash_t hash;
|
---|
| 1491 | st_data_t key;
|
---|
| 1492 | int error_p, packed_p = tab->bins == NULL;
|
---|
[279] | 1493 |
|
---|
[331] | 1494 | st_assert(tab->entries_start <= tab->entries_bound);
|
---|
| 1495 | entries = tab->entries;
|
---|
| 1496 | /* The bound can change inside the loop even without rebuilding
|
---|
| 1497 | the table, e.g. by an entry inesrtion. */
|
---|
| 1498 | for (i = tab->entries_start; i < tab->entries_bound; i++) {
|
---|
| 1499 | curr_entry_ptr = &entries[i];
|
---|
| 1500 | if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
|
---|
| 1501 | continue;
|
---|
| 1502 | key = curr_entry_ptr->key;
|
---|
| 1503 | rebuilds_num = tab->rebuilds_num;
|
---|
| 1504 | hash = curr_entry_ptr->hash;
|
---|
| 1505 | retval = (*func)(key, curr_entry_ptr->record, arg, 0);
|
---|
| 1506 | if (rebuilds_num != tab->rebuilds_num) {
|
---|
| 1507 | entries = tab->entries;
|
---|
| 1508 | packed_p = tab->bins == NULL;
|
---|
| 1509 | if (packed_p) {
|
---|
| 1510 | i = find_entry(tab, hash, key);
|
---|
| 1511 | error_p = i == UNDEFINED_ENTRY_IND;
|
---|
| 1512 | } else {
|
---|
| 1513 | i = find_table_entry_ind(tab, hash, key);
|
---|
| 1514 | error_p = i == UNDEFINED_ENTRY_IND;
|
---|
| 1515 | i -= ENTRY_BASE;
|
---|
| 1516 | }
|
---|
| 1517 | if (error_p && check_p) {
|
---|
| 1518 | /* call func with error notice */
|
---|
| 1519 | retval = (*func)(0, 0, arg, 1);
|
---|
| 1520 | #ifdef ST_DEBUG
|
---|
| 1521 | st_check(tab);
|
---|
| 1522 | #endif
|
---|
| 1523 | return 1;
|
---|
| 1524 | }
|
---|
| 1525 | curr_entry_ptr = &entries[i];
|
---|
| 1526 | }
|
---|
| 1527 | switch (retval) {
|
---|
| 1528 | case ST_CONTINUE:
|
---|
| 1529 | break;
|
---|
| 1530 | case ST_CHECK:
|
---|
| 1531 | if (check_p)
|
---|
[279] | 1532 | break;
|
---|
[331] | 1533 | case ST_STOP:
|
---|
| 1534 | #ifdef ST_DEBUG
|
---|
| 1535 | st_check(tab);
|
---|
| 1536 | #endif
|
---|
| 1537 | return 0;
|
---|
| 1538 | case ST_DELETE:
|
---|
| 1539 | if (packed_p) {
|
---|
| 1540 | bin = find_entry(tab, hash, curr_entry_ptr->key);
|
---|
| 1541 | if (bin == UNDEFINED_ENTRY_IND)
|
---|
| 1542 | break;
|
---|
| 1543 | } else {
|
---|
| 1544 | bin_ind = find_table_bin_ind(tab, hash, curr_entry_ptr->key);
|
---|
| 1545 | if (bin_ind == UNDEFINED_BIN_IND)
|
---|
| 1546 | break;
|
---|
| 1547 | bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
|
---|
| 1548 | MARK_BIN_DELETED(tab, bin_ind);
|
---|
[279] | 1549 | }
|
---|
[331] | 1550 | st_assert(&entries[bin] == curr_entry_ptr);
|
---|
| 1551 | MARK_ENTRY_DELETED(curr_entry_ptr);
|
---|
| 1552 | tab->num_entries--;
|
---|
| 1553 | update_range_for_deleted(tab, bin);
|
---|
| 1554 | #ifdef ST_DEBUG
|
---|
| 1555 | st_check(tab);
|
---|
| 1556 | #endif
|
---|
| 1557 | break;
|
---|
[279] | 1558 | }
|
---|
| 1559 | }
|
---|
[331] | 1560 | #ifdef ST_DEBUG
|
---|
| 1561 | st_check(tab);
|
---|
| 1562 | #endif
|
---|
[279] | 1563 | return 0;
|
---|
| 1564 | }
|
---|
| 1565 |
|
---|
[331] | 1566 | int
|
---|
| 1567 | st_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg)
|
---|
[279] | 1568 | {
|
---|
[331] | 1569 | return st_general_foreach(tab, func, arg, FALSE);
|
---|
| 1570 | }
|
---|
[279] | 1571 |
|
---|
[331] | 1572 | #ifdef RUBY
|
---|
| 1573 | /* See comments for function st_delete_safe. */
|
---|
| 1574 | int
|
---|
| 1575 | st_foreach_check(st_table *tab, int (*func)(ANYARGS), st_data_t arg,
|
---|
| 1576 | st_data_t never ATTRIBUTE_UNUSED) {
|
---|
| 1577 | return st_general_foreach(tab, func, arg, TRUE);
|
---|
| 1578 | }
|
---|
[279] | 1579 |
|
---|
[331] | 1580 | /* Set up array KEYS by at most SIZE keys of head table TAB entries.
|
---|
| 1581 | Return the number of keys set up in array KEYS. */
|
---|
| 1582 | static inline st_index_t
|
---|
| 1583 | st_general_keys(st_table *tab, st_data_t *keys, st_index_t size)
|
---|
| 1584 | {
|
---|
| 1585 | st_index_t i, bound;
|
---|
| 1586 | st_data_t key, *keys_start, *keys_end;
|
---|
| 1587 | st_table_entry *curr_entry_ptr, *entries = tab->entries;
|
---|
| 1588 |
|
---|
| 1589 | bound = tab->entries_bound;
|
---|
| 1590 | keys_start = keys;
|
---|
| 1591 | keys_end = keys + size;
|
---|
| 1592 | for (i = tab->entries_start; i < bound; i++) {
|
---|
| 1593 | if (keys == keys_end)
|
---|
| 1594 | break;
|
---|
| 1595 | curr_entry_ptr = &entries[i];
|
---|
| 1596 | key = curr_entry_ptr->key;
|
---|
| 1597 | if (! DELETED_ENTRY_P(curr_entry_ptr))
|
---|
| 1598 | *keys++ = key;
|
---|
[279] | 1599 | }
|
---|
| 1600 |
|
---|
[331] | 1601 | return keys - keys_start;
|
---|
| 1602 | }
|
---|
| 1603 |
|
---|
| 1604 | st_index_t
|
---|
| 1605 | st_keys(st_table *tab, st_data_t *keys, st_index_t size)
|
---|
| 1606 | {
|
---|
| 1607 | return st_general_keys(tab, keys, size);
|
---|
| 1608 | }
|
---|
| 1609 |
|
---|
| 1610 | /* See comments for function st_delete_safe. */
|
---|
| 1611 | st_index_t
|
---|
| 1612 | st_keys_check(st_table *tab, st_data_t *keys, st_index_t size,
|
---|
| 1613 | st_data_t never ATTRIBUTE_UNUSED) {
|
---|
| 1614 | return st_general_keys(tab, keys, size);
|
---|
| 1615 | }
|
---|
| 1616 |
|
---|
| 1617 | /* Set up array VALUES by at most SIZE values of head table TAB
|
---|
| 1618 | entries. Return the number of values set up in array VALUES. */
|
---|
| 1619 | static inline st_index_t
|
---|
| 1620 | st_general_values(st_table *tab, st_data_t *values, st_index_t size)
|
---|
| 1621 | {
|
---|
| 1622 | st_index_t i, bound;
|
---|
| 1623 | st_data_t *values_start, *values_end;
|
---|
| 1624 | st_table_entry *curr_entry_ptr, *entries = tab->entries;
|
---|
| 1625 |
|
---|
| 1626 | values_start = values;
|
---|
| 1627 | values_end = values + size;
|
---|
| 1628 | bound = tab->entries_bound;
|
---|
| 1629 | st_assert(bound != 0);
|
---|
| 1630 | for (i = tab->entries_start; i < bound; i++) {
|
---|
| 1631 | if (values == values_end)
|
---|
| 1632 | break;
|
---|
| 1633 | curr_entry_ptr = &entries[i];
|
---|
| 1634 | if (! DELETED_ENTRY_P(curr_entry_ptr))
|
---|
| 1635 | *values++ = curr_entry_ptr->record;
|
---|
[279] | 1636 | }
|
---|
| 1637 |
|
---|
[331] | 1638 | return values - values_start;
|
---|
| 1639 | }
|
---|
| 1640 |
|
---|
| 1641 | st_index_t
|
---|
| 1642 | st_values(st_table *tab, st_data_t *values, st_index_t size)
|
---|
| 1643 | {
|
---|
| 1644 | return st_general_values(tab, values, size);
|
---|
| 1645 | }
|
---|
| 1646 |
|
---|
| 1647 | /* See comments for function st_delete_safe. */
|
---|
| 1648 | st_index_t
|
---|
| 1649 | st_values_check(st_table *tab, st_data_t *values, st_index_t size,
|
---|
| 1650 | st_data_t never ATTRIBUTE_UNUSED) {
|
---|
| 1651 | return st_general_values(tab, values, size);
|
---|
| 1652 | }
|
---|
| 1653 | #endif /* RUBY */
|
---|
| 1654 |
|
---|
| 1655 | #ifdef RUBY
|
---|
| 1656 | #define FNV1_32A_INIT 0x811c9dc5
|
---|
| 1657 |
|
---|
| 1658 | /*
|
---|
| 1659 | * 32 bit magic FNV-1a prime
|
---|
| 1660 | */
|
---|
| 1661 | #define FNV_32_PRIME 0x01000193
|
---|
| 1662 |
|
---|
| 1663 | #ifndef UNALIGNED_WORD_ACCESS
|
---|
| 1664 | # if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
|
---|
| 1665 | defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
|
---|
| 1666 | defined(__powerpc64__) || \
|
---|
| 1667 | defined(__mc68020__)
|
---|
| 1668 | # define UNALIGNED_WORD_ACCESS 1
|
---|
| 1669 | # endif
|
---|
| 1670 | #endif
|
---|
| 1671 | #ifndef UNALIGNED_WORD_ACCESS
|
---|
| 1672 | # define UNALIGNED_WORD_ACCESS 0
|
---|
| 1673 | #endif
|
---|
| 1674 |
|
---|
| 1675 | /* This hash function is quite simplified MurmurHash3
|
---|
| 1676 | * Simplification is legal, cause most of magic still happens in finalizator.
|
---|
| 1677 | * And finalizator is almost the same as in MurmurHash3 */
|
---|
| 1678 | #define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
|
---|
| 1679 | #define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
|
---|
| 1680 |
|
---|
| 1681 | #if ST_INDEX_BITS <= 32
|
---|
| 1682 | #define C1 (st_index_t)0xcc9e2d51
|
---|
| 1683 | #define C2 (st_index_t)0x1b873593
|
---|
[279] | 1684 | #else
|
---|
[331] | 1685 | #define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
|
---|
| 1686 | #define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
|
---|
| 1687 | #endif
|
---|
| 1688 | static inline st_index_t
|
---|
| 1689 | murmur_step(st_index_t h, st_index_t k)
|
---|
| 1690 | {
|
---|
| 1691 | #if ST_INDEX_BITS <= 32
|
---|
| 1692 | #define r1 (17)
|
---|
| 1693 | #define r2 (11)
|
---|
| 1694 | #else
|
---|
| 1695 | #define r1 (33)
|
---|
| 1696 | #define r2 (24)
|
---|
| 1697 | #endif
|
---|
| 1698 | k *= C1;
|
---|
| 1699 | h ^= ROTL(k, r1);
|
---|
| 1700 | h *= C2;
|
---|
| 1701 | h = ROTL(h, r2);
|
---|
| 1702 | return h;
|
---|
| 1703 | }
|
---|
| 1704 | #undef r1
|
---|
| 1705 | #undef r2
|
---|
[279] | 1706 |
|
---|
[331] | 1707 | static inline st_index_t
|
---|
| 1708 | murmur_finish(st_index_t h)
|
---|
| 1709 | {
|
---|
| 1710 | #if ST_INDEX_BITS <= 32
|
---|
| 1711 | #define r1 (16)
|
---|
| 1712 | #define r2 (13)
|
---|
| 1713 | #define r3 (16)
|
---|
| 1714 | const st_index_t c1 = 0x85ebca6b;
|
---|
| 1715 | const st_index_t c2 = 0xc2b2ae35;
|
---|
| 1716 | #else
|
---|
| 1717 | /* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */
|
---|
| 1718 | #define r1 (30)
|
---|
| 1719 | #define r2 (27)
|
---|
| 1720 | #define r3 (31)
|
---|
| 1721 | const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
|
---|
| 1722 | const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
|
---|
| 1723 | #endif
|
---|
| 1724 | #if ST_INDEX_BITS > 64
|
---|
| 1725 | h ^= h >> 64;
|
---|
| 1726 | h *= c2;
|
---|
| 1727 | h ^= h >> 65;
|
---|
| 1728 | #endif
|
---|
| 1729 | h ^= h >> r1;
|
---|
| 1730 | h *= c1;
|
---|
| 1731 | h ^= h >> r2;
|
---|
| 1732 | h *= c2;
|
---|
| 1733 | h ^= h >> r3;
|
---|
| 1734 | return h;
|
---|
| 1735 | }
|
---|
| 1736 | #undef r1
|
---|
| 1737 | #undef r2
|
---|
| 1738 | #undef r3
|
---|
| 1739 |
|
---|
| 1740 | st_index_t
|
---|
| 1741 | st_hash(const void *ptr, size_t len, st_index_t h)
|
---|
| 1742 | {
|
---|
| 1743 | const char *data = ptr;
|
---|
| 1744 | st_index_t t = 0;
|
---|
| 1745 | size_t l = len;
|
---|
| 1746 |
|
---|
| 1747 | #define data_at(n) (st_index_t)((unsigned char)data[(n)])
|
---|
| 1748 | #define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
|
---|
| 1749 | #if SIZEOF_ST_INDEX_T > 4
|
---|
| 1750 | #define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
|
---|
| 1751 | #if SIZEOF_ST_INDEX_T > 8
|
---|
| 1752 | #define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
|
---|
| 1753 | UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
|
---|
| 1754 | #define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
|
---|
| 1755 | #endif
|
---|
| 1756 | #define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
|
---|
| 1757 | #else
|
---|
| 1758 | #define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
|
---|
| 1759 | #endif
|
---|
| 1760 | #undef SKIP_TAIL
|
---|
| 1761 | if (len >= sizeof(st_index_t)) {
|
---|
| 1762 | #if !UNALIGNED_WORD_ACCESS
|
---|
| 1763 | int align = (int)((st_data_t)data % sizeof(st_index_t));
|
---|
| 1764 | if (align) {
|
---|
| 1765 | st_index_t d = 0;
|
---|
| 1766 | int sl, sr, pack;
|
---|
| 1767 |
|
---|
| 1768 | switch (align) {
|
---|
| 1769 | #ifdef WORDS_BIGENDIAN
|
---|
| 1770 | # define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
|
---|
| 1771 | t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
|
---|
| 1772 | #else
|
---|
| 1773 | # define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
|
---|
| 1774 | t |= data_at(n) << CHAR_BIT*(n)
|
---|
| 1775 | #endif
|
---|
| 1776 | UNALIGNED_ADD_ALL;
|
---|
| 1777 | #undef UNALIGNED_ADD
|
---|
| 1778 | }
|
---|
| 1779 |
|
---|
| 1780 | #ifdef WORDS_BIGENDIAN
|
---|
| 1781 | t >>= (CHAR_BIT * align) - CHAR_BIT;
|
---|
| 1782 | #else
|
---|
| 1783 | t <<= (CHAR_BIT * align);
|
---|
| 1784 | #endif
|
---|
| 1785 |
|
---|
| 1786 | data += sizeof(st_index_t)-align;
|
---|
| 1787 | len -= sizeof(st_index_t)-align;
|
---|
| 1788 |
|
---|
| 1789 | sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
|
---|
| 1790 | sr = CHAR_BIT * align;
|
---|
| 1791 |
|
---|
| 1792 | while (len >= sizeof(st_index_t)) {
|
---|
| 1793 | d = *(st_index_t *)data;
|
---|
| 1794 | #ifdef WORDS_BIGENDIAN
|
---|
| 1795 | t = (t << sr) | (d >> sl);
|
---|
| 1796 | #else
|
---|
| 1797 | t = (t >> sr) | (d << sl);
|
---|
| 1798 | #endif
|
---|
| 1799 | h = murmur_step(h, t);
|
---|
| 1800 | t = d;
|
---|
| 1801 | data += sizeof(st_index_t);
|
---|
| 1802 | len -= sizeof(st_index_t);
|
---|
| 1803 | }
|
---|
| 1804 |
|
---|
| 1805 | pack = len < (size_t)align ? (int)len : align;
|
---|
| 1806 | d = 0;
|
---|
| 1807 | switch (pack) {
|
---|
| 1808 | #ifdef WORDS_BIGENDIAN
|
---|
| 1809 | # define UNALIGNED_ADD(n) case (n) + 1: \
|
---|
| 1810 | d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
|
---|
| 1811 | #else
|
---|
| 1812 | # define UNALIGNED_ADD(n) case (n) + 1: \
|
---|
| 1813 | d |= data_at(n) << CHAR_BIT*(n)
|
---|
| 1814 | #endif
|
---|
| 1815 | UNALIGNED_ADD_ALL;
|
---|
| 1816 | #undef UNALIGNED_ADD
|
---|
| 1817 | }
|
---|
| 1818 | #ifdef WORDS_BIGENDIAN
|
---|
| 1819 | t = (t << sr) | (d >> sl);
|
---|
| 1820 | #else
|
---|
| 1821 | t = (t >> sr) | (d << sl);
|
---|
| 1822 | #endif
|
---|
| 1823 |
|
---|
| 1824 | if (len < (size_t)align) goto skip_tail;
|
---|
| 1825 | # define SKIP_TAIL 1
|
---|
| 1826 | h = murmur_step(h, t);
|
---|
| 1827 | data += pack;
|
---|
| 1828 | len -= pack;
|
---|
| 1829 | }
|
---|
| 1830 | else
|
---|
| 1831 | #endif
|
---|
| 1832 | {
|
---|
| 1833 | do {
|
---|
| 1834 | h = murmur_step(h, *(st_index_t *)data);
|
---|
| 1835 | data += sizeof(st_index_t);
|
---|
| 1836 | len -= sizeof(st_index_t);
|
---|
| 1837 | } while (len >= sizeof(st_index_t));
|
---|
| 1838 | }
|
---|
[279] | 1839 | }
|
---|
| 1840 |
|
---|
[331] | 1841 | t = 0;
|
---|
| 1842 | switch (len) {
|
---|
| 1843 | #if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
|
---|
| 1844 | /* in this case byteorder doesn't really matter */
|
---|
| 1845 | #if SIZEOF_ST_INDEX_T > 4
|
---|
| 1846 | case 7: t |= data_at(6) << 48;
|
---|
| 1847 | case 6: t |= data_at(5) << 40;
|
---|
| 1848 | case 5: t |= data_at(4) << 32;
|
---|
| 1849 | case 4:
|
---|
| 1850 | t |= (st_index_t)*(uint32_t*)data;
|
---|
| 1851 | goto skip_tail;
|
---|
| 1852 | # define SKIP_TAIL 1
|
---|
[279] | 1853 | #endif
|
---|
[331] | 1854 | case 3: t |= data_at(2) << 16;
|
---|
| 1855 | case 2: t |= data_at(1) << 8;
|
---|
| 1856 | case 1: t |= data_at(0);
|
---|
| 1857 | #else
|
---|
| 1858 | #ifdef WORDS_BIGENDIAN
|
---|
| 1859 | # define UNALIGNED_ADD(n) case (n) + 1: \
|
---|
| 1860 | t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
|
---|
| 1861 | #else
|
---|
| 1862 | # define UNALIGNED_ADD(n) case (n) + 1: \
|
---|
| 1863 | t |= data_at(n) << CHAR_BIT*(n)
|
---|
| 1864 | #endif
|
---|
| 1865 | UNALIGNED_ADD_ALL;
|
---|
| 1866 | #undef UNALIGNED_ADD
|
---|
| 1867 | #endif
|
---|
| 1868 | #ifdef SKIP_TAIL
|
---|
| 1869 | skip_tail:
|
---|
| 1870 | #endif
|
---|
| 1871 | h ^= t; h -= ROTL(t, 7);
|
---|
| 1872 | h *= C2;
|
---|
| 1873 | }
|
---|
| 1874 | h ^= l;
|
---|
| 1875 |
|
---|
| 1876 | return murmur_finish(h);
|
---|
[279] | 1877 | }
|
---|
| 1878 |
|
---|
[331] | 1879 | st_index_t
|
---|
| 1880 | st_hash_uint32(st_index_t h, uint32_t i)
|
---|
[279] | 1881 | {
|
---|
[331] | 1882 | return murmur_step(h, i);
|
---|
| 1883 | }
|
---|
| 1884 |
|
---|
| 1885 | st_index_t
|
---|
| 1886 | st_hash_uint(st_index_t h, st_index_t i)
|
---|
| 1887 | {
|
---|
| 1888 | i += h;
|
---|
| 1889 | /* no matter if it is BigEndian or LittleEndian,
|
---|
| 1890 | * we hash just integers */
|
---|
| 1891 | #if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
|
---|
| 1892 | h = murmur_step(h, i >> 8*8);
|
---|
| 1893 | #endif
|
---|
| 1894 | h = murmur_step(h, i);
|
---|
| 1895 | return h;
|
---|
| 1896 | }
|
---|
| 1897 |
|
---|
| 1898 | st_index_t
|
---|
| 1899 | st_hash_end(st_index_t h)
|
---|
| 1900 | {
|
---|
| 1901 | h = murmur_finish(h);
|
---|
| 1902 | return h;
|
---|
| 1903 | }
|
---|
| 1904 |
|
---|
| 1905 | #undef st_hash_start
|
---|
| 1906 | st_index_t
|
---|
| 1907 | st_hash_start(st_index_t h)
|
---|
| 1908 | {
|
---|
| 1909 | return h;
|
---|
| 1910 | }
|
---|
| 1911 |
|
---|
| 1912 | static st_index_t
|
---|
| 1913 | strhash(st_data_t arg)
|
---|
| 1914 | {
|
---|
| 1915 | register const char *string = (const char *)arg;
|
---|
| 1916 | return st_hash(string, strlen(string), FNV1_32A_INIT);
|
---|
| 1917 | }
|
---|
| 1918 |
|
---|
| 1919 | int
|
---|
| 1920 | st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
|
---|
| 1921 | {
|
---|
| 1922 | unsigned int c1, c2;
|
---|
| 1923 |
|
---|
| 1924 | while (1) {
|
---|
| 1925 | c1 = (unsigned char)*s1++;
|
---|
| 1926 | c2 = (unsigned char)*s2++;
|
---|
| 1927 | if (c1 == '\0' || c2 == '\0') {
|
---|
| 1928 | if (c1 != '\0') return 1;
|
---|
| 1929 | if (c2 != '\0') return -1;
|
---|
| 1930 | return 0;
|
---|
| 1931 | }
|
---|
| 1932 | if ((unsigned int)(c1 - 'A') <= ('Z' - 'A')) c1 += 'a' - 'A';
|
---|
| 1933 | if ((unsigned int)(c2 - 'A') <= ('Z' - 'A')) c2 += 'a' - 'A';
|
---|
| 1934 | if (c1 != c2) {
|
---|
| 1935 | if (c1 > c2)
|
---|
| 1936 | return 1;
|
---|
| 1937 | else
|
---|
| 1938 | return -1;
|
---|
| 1939 | }
|
---|
| 1940 | }
|
---|
| 1941 | }
|
---|
| 1942 |
|
---|
| 1943 | int
|
---|
| 1944 | st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
|
---|
| 1945 | {
|
---|
| 1946 | unsigned int c1, c2;
|
---|
| 1947 |
|
---|
| 1948 | while (n--) {
|
---|
| 1949 | c1 = (unsigned char)*s1++;
|
---|
| 1950 | c2 = (unsigned char)*s2++;
|
---|
| 1951 | if (c1 == '\0' || c2 == '\0') {
|
---|
| 1952 | if (c1 != '\0') return 1;
|
---|
| 1953 | if (c2 != '\0') return -1;
|
---|
| 1954 | return 0;
|
---|
| 1955 | }
|
---|
| 1956 | if ((unsigned int)(c1 - 'A') <= ('Z' - 'A')) c1 += 'a' - 'A';
|
---|
| 1957 | if ((unsigned int)(c2 - 'A') <= ('Z' - 'A')) c2 += 'a' - 'A';
|
---|
| 1958 | if (c1 != c2) {
|
---|
| 1959 | if (c1 > c2)
|
---|
| 1960 | return 1;
|
---|
| 1961 | else
|
---|
| 1962 | return -1;
|
---|
| 1963 | }
|
---|
| 1964 | }
|
---|
| 1965 | return 0;
|
---|
| 1966 | }
|
---|
| 1967 |
|
---|
| 1968 | static st_index_t
|
---|
| 1969 | strcasehash(st_data_t arg)
|
---|
| 1970 | {
|
---|
| 1971 | register const char *string = (const char *)arg;
|
---|
| 1972 | register st_index_t hval = FNV1_32A_INIT;
|
---|
| 1973 |
|
---|
| 1974 | /*
|
---|
| 1975 | * FNV-1a hash each octet in the buffer
|
---|
| 1976 | */
|
---|
| 1977 | while (*string) {
|
---|
| 1978 | unsigned int c = (unsigned char)*string++;
|
---|
| 1979 | if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
|
---|
| 1980 | hval ^= c;
|
---|
| 1981 |
|
---|
| 1982 | /* multiply by the 32 bit FNV magic prime mod 2^32 */
|
---|
| 1983 | hval *= FNV_32_PRIME;
|
---|
| 1984 | }
|
---|
| 1985 | return hval;
|
---|
| 1986 | }
|
---|
| 1987 |
|
---|
| 1988 | int
|
---|
| 1989 | st_numcmp(st_data_t x, st_data_t y)
|
---|
| 1990 | {
|
---|
[279] | 1991 | return x != y;
|
---|
| 1992 | }
|
---|
| 1993 |
|
---|
[331] | 1994 | st_index_t
|
---|
| 1995 | st_numhash(st_data_t n)
|
---|
[279] | 1996 | {
|
---|
[331] | 1997 | enum {s1 = 11, s2 = 3};
|
---|
| 1998 | return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
|
---|
[279] | 1999 | }
|
---|
[331] | 2000 | #endif /* RUBY */
|
---|