Changeset 331 for EcnlProtoTool/trunk/onigmo-6.1.3/src/st.c
- Timestamp:
- Jan 21, 2018, 12:10:09 AM (6 years ago)
- Location:
- EcnlProtoTool/trunk/onigmo-6.1.3
- Files:
-
- 1 edited
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
EcnlProtoTool/trunk/onigmo-6.1.3/src/st.c
r321 r331 1 /* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */ 2 3 /* static char sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */ 1 /* This is a public domain general purpose hash table package 2 originally written by Peter Moore @ UCB. 3 4 The hash table data structures were redesigned and the package was 5 rewritten by Vladimir Makarov <vmakarov@redhat.com>. */ 6 7 /* The original package implemented classic bucket-based hash tables 8 with entries doubly linked for an access by their insertion order. 9 To decrease pointer chasing and as a consequence to improve a data 10 locality the current implementation is based on storing entries in 11 an array and using hash tables with open addressing. The current 12 entries are more compact in comparison with the original ones and 13 this also improves the data locality. 14 15 The hash table has two arrays called *bins* and *entries*. 16 17 bins: 18 ------- 19 | | entries array: 20 |-------| -------------------------------- 21 | index | | | entry: | | | 22 |-------| | | | | | 23 | ... | | ... | hash | ... | ... | 24 |-------| | | key | | | 25 | empty | | | record | | | 26 |-------| -------------------------------- 27 | ... | ^ ^ 28 |-------| |_ entries start |_ entries bound 29 |deleted| 30 ------- 31 32 o The entry array contains table entries in the same order as they 33 were inserted. 34 35 When the first entry is deleted, a variable containing index of 36 the current first entry (*entries start*) is changed. In all 37 other cases of the deletion, we just mark the entry as deleted by 38 using a reserved hash value. 39 40 Such organization of the entry storage makes operations of the 41 table shift and the entries traversal very fast. 42 43 o The bins provide access to the entries by their keys. The 44 key hash is mapped to a bin containing *index* of the 45 corresponding entry in the entry array. 46 47 The bin array size is always power of two, it makes mapping very 48 fast by using the corresponding lower bits of the hash. 49 Generally it is not a good idea to ignore some part of the hash. 50 But alternative approach is worse. For example, we could use a 51 modulo operation for mapping and a prime number for the size of 52 the bin array. Unfortunately, the modulo operation for big 53 64-bit numbers are extremely slow (it takes more than 100 cycles 54 on modern Intel CPUs). 55 56 Still other bits of the hash value are used when the mapping 57 results in a collision. In this case we use a secondary hash 58 value which is a result of a function of the collision bin 59 index and the original hash value. The function choice 60 guarantees that we can traverse all bins and finally find the 61 corresponding bin as after several iterations the function 62 becomes a full cycle linear congruential generator because it 63 satisfies requirements of the Hull-Dobell theorem. 64 65 When an entry is removed from the table besides marking the 66 hash in the corresponding entry described above, we also mark 67 the bin by a special value in order to find entries which had 68 a collision with the removed entries. 69 70 There are two reserved values for the bins. One denotes an 71 empty bin, another one denotes a bin for a deleted entry. 72 73 o The length of the bin array is at least two times more than the 74 entry array length. This keeps the table load factor healthy. 75 The trigger of rebuilding the table is always a case when we can 76 not insert an entry anymore at the entries bound. We could 77 change the entries bound too in case of deletion but than we need 78 a special code to count bins with corresponding deleted entries 79 and reset the bin values when there are too many bins 80 corresponding deleted entries 81 82 Table rebuilding is done by creation of a new entry array and 83 bins of an appropriate size. We also try to reuse the arrays 84 in some cases by compacting the array and removing deleted 85 entries. 86 87 o To save memory very small tables have no allocated arrays 88 bins. We use a linear search for an access by a key. 89 90 o To save more memory we use 8-, 16-, 32- and 64- bit indexes in 91 bins depending on the current hash table size. 92 93 This implementation speeds up the Ruby hash table benchmarks in 94 average by more 40% on Intel Haswell CPU. 95 96 */ 97 98 #ifdef RUBY 99 #include "internal.h" 100 #else 101 #include "regint.h" 102 #include "st.h" 103 #endif 4 104 5 105 #include <stdio.h> 6 106 #include <stdlib.h> 7 107 #include <string.h> 8 9 #ifdef _WIN32 10 #include <malloc.h> 11 #endif 12 13 #include "regint.h" 14 #include "st.h" 15 16 typedef struct st_table_entry st_table_entry; 108 #include <assert.h> 109 110 #ifdef __GNUC__ 111 #define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p) 112 #define EXPECT(expr, val) __builtin_expect(expr, val) 113 #define ATTRIBUTE_UNUSED __attribute__((unused)) 114 #else 115 #define PREFETCH(addr, write_p) 116 #define EXPECT(expr, val) (expr) 117 #define ATTRIBUTE_UNUSED 118 #endif 119 120 #ifdef ST_DEBUG 121 #define st_assert(cond) assert(cond) 122 #else 123 #define st_assert(cond) ((void)(0 && (cond))) 124 #endif 125 126 /* The type of hashes. */ 127 typedef st_index_t st_hash_t; 17 128 18 129 struct st_table_entry { 19 unsigned int hash;130 st_hash_t hash; 20 131 st_data_t key; 21 132 st_data_t record; 22 st_table_entry *next;23 133 }; 24 134 25 #define ST_DEFAULT_MAX_DENSITY 5 26 #define ST_DEFAULT_INIT_TABLE_SIZE 11 27 28 /* 29 * DEFAULT_MAX_DENSITY is the default for the largest we allow the 30 * average number of items per bin before increasing the number of 31 * bins 32 * 33 * DEFAULT_INIT_TABLE_SIZE is the default for the number of bins 34 * allocated initially 35 * 36 */ 37 38 static int numcmp(long, long); 39 static int numhash(long); 40 static struct st_hash_type type_numhash = { 41 numcmp, 42 numhash, 135 #ifdef RUBY 136 #define type_numhash st_hashtype_num 137 const struct st_hash_type st_hashtype_num = { 138 st_numcmp, 139 st_numhash, 43 140 }; 44 141 45 142 /* extern int strcmp(const char *, const char *); */ 46 static int strhash(const char *);47 static struct st_hash_type type_strhash = {143 static st_index_t strhash(st_data_t); 144 static const struct st_hash_type type_strhash = { 48 145 strcmp, 49 146 strhash, 50 147 }; 51 148 52 static void rehash(st_table *); 53 54 #define alloc(type) (type*)xmalloc((unsigned)sizeof(type)) 55 #define Calloc(n,s) (char*)xcalloc((n),(s)) 56 57 #define EQUAL(table,x,y) ((x)==(y) || (*table->type->compare)((x),(y)) == 0) 58 59 #define do_hash(key,table) (unsigned int)(*(table)->type->hash)((key)) 60 #define do_hash_bin(key,table) (do_hash(key, table)%(table)->num_bins) 61 62 /* 63 * MINSIZE is the minimum size of a dictionary. 64 */ 65 66 #define MINSIZE 8 67 68 /* 69 Table of prime numbers 2^n+a, 2<=n<=30. 70 */ 71 static const long primes[] = { 72 8 + 3, 73 16 + 3, 74 32 + 5, 75 64 + 3, 76 128 + 3, 77 256 + 27, 78 512 + 9, 79 1024 + 9, 80 2048 + 5, 81 4096 + 3, 82 8192 + 27, 83 16384 + 43, 84 32768 + 3, 85 65536 + 45, 86 131072 + 29, 87 262144 + 3, 88 524288 + 21, 89 1048576 + 7, 90 2097152 + 17, 91 4194304 + 15, 92 8388608 + 9, 93 16777216 + 43, 94 33554432 + 35, 95 67108864 + 15, 96 134217728 + 29, 97 268435456 + 3, 98 536870912 + 11, 99 1073741824 + 85, 100 0 149 static st_index_t strcasehash(st_data_t); 150 static const struct st_hash_type type_strcasehash = { 151 st_locale_insensitive_strcasecmp, 152 strcasehash, 101 153 }; 102 154 #endif /* RUBY */ 155 156 /* Value used to catch uninitialized entries/bins during debugging. 157 There is a possibility for a false alarm, but its probability is 158 extremely small. */ 159 #define ST_INIT_VAL 0xafafafafafafafaf 160 #define ST_INIT_VAL_BYTE 0xafa 161 162 #ifdef RUBY 163 #undef malloc 164 #undef realloc 165 #undef calloc 166 #undef free 167 #define malloc ruby_xmalloc 168 #define calloc ruby_xcalloc 169 #define realloc ruby_xrealloc 170 #define free ruby_xfree 171 #else /* RUBY */ 172 #define MEMCPY(p1,p2,type,n) memcpy((p1), (p2), sizeof(type)*(n)) 173 #endif /* RUBY */ 174 175 #define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0) 176 #define PTR_EQUAL(tab, ptr, hash_val, key_) \ 177 ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key)) 178 179 /* Features of a table. */ 180 struct st_features { 181 /* Power of 2 used for number of allocated entries. */ 182 unsigned char entry_power; 183 /* Power of 2 used for number of allocated bins. Depending on the 184 table size, the number of bins is 2-4 times more than the 185 number of entries. */ 186 unsigned char bin_power; 187 /* Enumeration of sizes of bins (8-bit, 16-bit etc). */ 188 unsigned char size_ind; 189 /* Bins are packed in words of type st_index_t. The following is 190 a size of bins counted by words. */ 191 st_index_t bins_words; 192 }; 193 194 /* Features of all possible size tables. */ 195 #if SIZEOF_ST_INDEX_T == 8 196 #define MAX_POWER2 62 197 static const struct st_features features[] = { 198 {0, 1, 0, 0x0}, 199 {1, 2, 0, 0x1}, 200 {2, 3, 0, 0x1}, 201 {3, 4, 0, 0x2}, 202 {4, 5, 0, 0x4}, 203 {5, 6, 0, 0x8}, 204 {6, 7, 0, 0x10}, 205 {7, 8, 0, 0x20}, 206 {8, 9, 1, 0x80}, 207 {9, 10, 1, 0x100}, 208 {10, 11, 1, 0x200}, 209 {11, 12, 1, 0x400}, 210 {12, 13, 1, 0x800}, 211 {13, 14, 1, 0x1000}, 212 {14, 15, 1, 0x2000}, 213 {15, 16, 1, 0x4000}, 214 {16, 17, 2, 0x10000}, 215 {17, 18, 2, 0x20000}, 216 {18, 19, 2, 0x40000}, 217 {19, 20, 2, 0x80000}, 218 {20, 21, 2, 0x100000}, 219 {21, 22, 2, 0x200000}, 220 {22, 23, 2, 0x400000}, 221 {23, 24, 2, 0x800000}, 222 {24, 25, 2, 0x1000000}, 223 {25, 26, 2, 0x2000000}, 224 {26, 27, 2, 0x4000000}, 225 {27, 28, 2, 0x8000000}, 226 {28, 29, 2, 0x10000000}, 227 {29, 30, 2, 0x20000000}, 228 {30, 31, 2, 0x40000000}, 229 {31, 32, 2, 0x80000000}, 230 {32, 33, 3, 0x200000000}, 231 {33, 34, 3, 0x400000000}, 232 {34, 35, 3, 0x800000000}, 233 {35, 36, 3, 0x1000000000}, 234 {36, 37, 3, 0x2000000000}, 235 {37, 38, 3, 0x4000000000}, 236 {38, 39, 3, 0x8000000000}, 237 {39, 40, 3, 0x10000000000}, 238 {40, 41, 3, 0x20000000000}, 239 {41, 42, 3, 0x40000000000}, 240 {42, 43, 3, 0x80000000000}, 241 {43, 44, 3, 0x100000000000}, 242 {44, 45, 3, 0x200000000000}, 243 {45, 46, 3, 0x400000000000}, 244 {46, 47, 3, 0x800000000000}, 245 {47, 48, 3, 0x1000000000000}, 246 {48, 49, 3, 0x2000000000000}, 247 {49, 50, 3, 0x4000000000000}, 248 {50, 51, 3, 0x8000000000000}, 249 {51, 52, 3, 0x10000000000000}, 250 {52, 53, 3, 0x20000000000000}, 251 {53, 54, 3, 0x40000000000000}, 252 {54, 55, 3, 0x80000000000000}, 253 {55, 56, 3, 0x100000000000000}, 254 {56, 57, 3, 0x200000000000000}, 255 {57, 58, 3, 0x400000000000000}, 256 {58, 59, 3, 0x800000000000000}, 257 {59, 60, 3, 0x1000000000000000}, 258 {60, 61, 3, 0x2000000000000000}, 259 {61, 62, 3, 0x4000000000000000}, 260 {62, 63, 3, 0x8000000000000000}, 261 }; 262 263 #else 264 #define MAX_POWER2 30 265 266 static const struct st_features features[] = { 267 {0, 1, 0, 0x1}, 268 {1, 2, 0, 0x1}, 269 {2, 3, 0, 0x2}, 270 {3, 4, 0, 0x4}, 271 {4, 5, 0, 0x8}, 272 {5, 6, 0, 0x10}, 273 {6, 7, 0, 0x20}, 274 {7, 8, 0, 0x40}, 275 {8, 9, 1, 0x100}, 276 {9, 10, 1, 0x200}, 277 {10, 11, 1, 0x400}, 278 {11, 12, 1, 0x800}, 279 {12, 13, 1, 0x1000}, 280 {13, 14, 1, 0x2000}, 281 {14, 15, 1, 0x4000}, 282 {15, 16, 1, 0x8000}, 283 {16, 17, 2, 0x20000}, 284 {17, 18, 2, 0x40000}, 285 {18, 19, 2, 0x80000}, 286 {19, 20, 2, 0x100000}, 287 {20, 21, 2, 0x200000}, 288 {21, 22, 2, 0x400000}, 289 {22, 23, 2, 0x800000}, 290 {23, 24, 2, 0x1000000}, 291 {24, 25, 2, 0x2000000}, 292 {25, 26, 2, 0x4000000}, 293 {26, 27, 2, 0x8000000}, 294 {27, 28, 2, 0x10000000}, 295 {28, 29, 2, 0x20000000}, 296 {29, 30, 2, 0x40000000}, 297 {30, 31, 2, 0x80000000}, 298 }; 299 300 #endif 301 302 /* The reserved hash value and its substitution. */ 303 #define RESERVED_HASH_VAL (~(st_hash_t) 0) 304 #define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0) 305 306 /* Return hash value of KEY for table TAB. */ 307 static inline st_hash_t 308 do_hash(st_data_t key, st_table *tab) 309 { 310 st_hash_t hash = (st_hash_t)(tab->type->hash)(key); 311 312 /* RESERVED_HASH_VAL is used for a deleted entry. Map it into 313 another value. Such mapping should be extremely rare. */ 314 return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash; 315 } 316 317 /* Power of 2 defining the minimal number of allocated entries. */ 318 #define MINIMAL_POWER2 2 319 320 #if MINIMAL_POWER2 < 2 321 #error "MINIMAL_POWER2 should be >= 2" 322 #endif 323 324 /* If the power2 of the allocated `entries` is less than the following 325 value, don't allocate bins and use a linear search. */ 326 #define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4 327 328 /* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */ 103 329 static int 104 new_size(size) 105 int size; 106 { 107 int i; 108 109 #if 0 110 for (i=3; i<31; i++) { 111 if ((1<<i) > size) return 1<<i; 112 } 330 get_power2(st_index_t size) 331 { 332 unsigned int n; 333 334 for (n = 0; size != 0; n++) 335 size >>= 1; 336 if (n <= MAX_POWER2) 337 return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n; 338 #ifdef RUBY 339 /* Ran out of the table entries */ 340 rb_raise(rb_eRuntimeError, "st_table too big"); 341 #endif 342 /* should raise exception */ 113 343 return -1; 114 #else 115 int newsize; 116 117 for (i = 0, newsize = MINSIZE; 118 i < (int )(sizeof(primes)/sizeof(primes[0])); 119 i++, newsize <<= 1) 344 } 345 346 /* Return value of N-th bin in array BINS of table with bins size 347 index S. */ 348 static inline st_index_t 349 get_bin(st_index_t *bins, int s, st_index_t n) 350 { 351 return (s == 0 ? ((unsigned char *) bins)[n] 352 : s == 1 ? ((unsigned short *) bins)[n] 353 : s == 2 ? ((unsigned int *) bins)[n] 354 : ((st_index_t *) bins)[n]); 355 } 356 357 /* Set up N-th bin in array BINS of table with bins size index S to 358 value V. */ 359 static inline void 360 set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v) 361 { 362 if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v; 363 else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v; 364 else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v; 365 else ((st_index_t *) bins)[n] = v; 366 } 367 368 /* These macros define reserved values for empty table bin and table 369 bin which contains a deleted entry. We will never use such values 370 for an entry index in bins. */ 371 #define EMPTY_BIN 0 372 #define DELETED_BIN 1 373 /* Base of a real entry index in the bins. */ 374 #define ENTRY_BASE 2 375 376 /* Mark I-th bin of table TAB as empty, in other words not 377 corresponding to any entry. */ 378 #define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN)) 379 380 /* Values used for not found entry and bin with given 381 characteristics. */ 382 #define UNDEFINED_ENTRY_IND (~(st_index_t) 0) 383 #define UNDEFINED_BIN_IND (~(st_index_t) 0) 384 385 /* Mark I-th bin of table TAB as corresponding to a deleted table 386 entry. Update number of entries in the table and number of bins 387 corresponding to deleted entries. */ 388 #define MARK_BIN_DELETED(tab, i) \ 389 do { \ 390 st_assert(i != UNDEFINED_BIN_IND); \ 391 st_assert(! IND_EMPTY_OR_DELETED_BIN_P(tab, i)); \ 392 set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \ 393 } while (0) 394 395 /* Macros to check that value B is used empty bins and bins 396 corresponding deleted entries. */ 397 #define EMPTY_BIN_P(b) ((b) == EMPTY_BIN) 398 #define DELETED_BIN_P(b) ((b) == DELETED_BIN) 399 #define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN) 400 401 /* Macros to check empty bins and bins corresponding to deleted 402 entries. Bins are given by their index I in table TAB. */ 403 #define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i))) 404 #define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i))) 405 #define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i))) 406 407 /* Macros for marking and checking deleted entries given by their 408 pointer E_PTR. */ 409 #define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL) 410 #define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL) 411 412 /* Return bin size index of table TAB. */ 413 static inline unsigned int 414 get_size_ind(const st_table *tab) 415 { 416 return tab->size_ind; 417 } 418 419 /* Return the number of allocated bins of table TAB. */ 420 static inline st_index_t 421 get_bins_num(const st_table *tab) 422 { 423 return ((st_index_t) 1)<<tab->bin_power; 424 } 425 426 /* Return mask for a bin index in table TAB. */ 427 static inline st_index_t 428 bins_mask(const st_table *tab) 429 { 430 return get_bins_num(tab) - 1; 431 } 432 433 /* Return the index of table TAB bin corresponding to 434 HASH_VALUE. */ 435 static inline st_index_t 436 hash_bin(st_hash_t hash_value, st_table *tab) 437 { 438 return hash_value & bins_mask(tab); 439 } 440 441 /* Return the number of allocated entries of table TAB. */ 442 static inline st_index_t 443 get_allocated_entries(const st_table *tab) 444 { 445 return ((st_index_t) 1)<<tab->entry_power; 446 } 447 448 /* Return size of the allocated bins of table TAB. */ 449 static inline st_index_t 450 bins_size(const st_table *tab) 451 { 452 return features[tab->entry_power].bins_words * sizeof (st_index_t); 453 } 454 455 /* Mark all bins of table TAB as empty. */ 456 static void 457 initialize_bins(st_table *tab) 458 { 459 memset(tab->bins, 0, bins_size(tab)); 460 } 461 462 /* Make table TAB empty. */ 463 static void 464 make_tab_empty(st_table *tab) 465 { 466 tab->num_entries = 0; 467 tab->entries_start = tab->entries_bound = 0; 468 if (tab->bins != NULL) 469 initialize_bins(tab); 470 } 471 472 #ifdef ST_DEBUG 473 /* Check the table T consistency. It can be extremely slow. So use 474 it only for debugging. */ 475 static void 476 st_check(st_table *tab) 477 { 478 st_index_t d, e, i, n, p; 479 480 for (p = get_allocated_entries(tab), i = 0; p > 1; i++, p>>=1) 481 ; 482 p = i; 483 assert(p >= MINIMAL_POWER2); 484 assert(tab->entries_bound <= get_allocated_entries(tab) 485 && tab->entries_start <= tab->entries_bound); 486 n = 0; 487 return; 488 if (tab->entries_bound != 0) 489 for (i = tab->entries_start; i < tab->entries_bound; i++) { 490 assert(tab->entries[i].hash != (st_hash_t) ST_INIT_VAL 491 && tab->entries[i].key != ST_INIT_VAL 492 && tab->entries[i].record != ST_INIT_VAL); 493 if (! DELETED_ENTRY_P(&tab->entries[i])) 494 n++; 495 } 496 assert(n == tab->num_entries); 497 if (tab->bins == NULL) 498 assert(p <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS); 499 else { 500 assert(p > MAX_POWER2_FOR_TABLES_WITHOUT_BINS); 501 for (n = d = i = 0; i < get_bins_num(tab); i++) { 502 assert(get_bin(tab->bins, tab->size_ind, i) != ST_INIT_VAL); 503 if (IND_DELETED_BIN_P(tab, i)) { 504 d++; 505 continue; 506 } 507 else if (IND_EMPTY_BIN_P(tab, i)) 508 continue; 509 n++; 510 e = get_bin(tab->bins, tab->size_ind, i) - ENTRY_BASE; 511 assert(tab->entries_start <= e && e < tab->entries_bound); 512 assert(! DELETED_ENTRY_P(&tab->entries[e])); 513 assert(tab->entries[e].hash != (st_hash_t) ST_INIT_VAL 514 && tab->entries[e].key != ST_INIT_VAL 515 && tab->entries[e].record != ST_INIT_VAL); 516 } 517 assert(n == tab->num_entries); 518 assert(n + d < get_bins_num(tab)); 519 } 520 } 521 #endif 522 523 #ifdef HASH_LOG 524 #ifdef HAVE_UNISTD_H 525 #include <unistd.h> 526 #endif 527 static struct { 528 int all, total, num, str, strcase; 529 } collision; 530 531 /* Flag switching off output of package statistics at the end of 532 program. */ 533 static int init_st = 0; 534 535 /* Output overall number of table searches and collisions into a 536 temporary file. */ 537 static void 538 stat_col(void) 539 { 540 char fname[10+sizeof(long)*3]; 541 FILE *f; 542 if (!collision.total) return; 543 f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w"); 544 if (f == 0) return ; 545 546 fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total, 547 ((double)collision.all / (collision.total)) * 100); 548 fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase); 549 fclose(f); 550 } 551 #endif 552 553 /* Create and return table with TYPE which can hold at least SIZE 554 entries. The real number of entries which the table can hold is 555 the nearest power of two for SIZE. */ 556 st_table * 557 st_init_table_with_size(const struct st_hash_type *type, st_index_t size) 558 { 559 st_table *tab; 560 int n; 561 562 #ifdef HASH_LOG 563 #if HASH_LOG+0 < 0 120 564 { 121 if (newsize > size) return primes[i]; 122 } 123 /* Ran out of polynomials */ 124 return -1; /* should raise exception */ 125 #endif 126 } 565 const char *e = getenv("ST_HASH_LOG"); 566 if (!e || !*e) init_st = 1; 567 } 568 #endif 569 if (init_st == 0) { 570 init_st = 1; 571 atexit(stat_col); 572 } 573 #endif 574 575 n = get_power2(size); 576 #ifndef RUBY 577 if (n < 0) 578 return NULL; 579 #endif 580 tab = (st_table *) malloc(sizeof (st_table)); 581 if (tab == NULL) 582 return NULL; 583 tab->type = type; 584 tab->entry_power = n; 585 tab->bin_power = features[n].bin_power; 586 tab->size_ind = features[n].size_ind; 587 if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS) 588 tab->bins = NULL; 589 else { 590 tab->bins = (st_index_t *) malloc(bins_size(tab)); 591 if (tab->bins == NULL) { 592 free(tab); 593 return NULL; 594 } 595 } 596 tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab) 597 * sizeof(st_table_entry)); 598 if (tab->entries == NULL) { 599 st_free_table(tab); 600 return NULL; 601 } 602 #ifdef ST_DEBUG 603 memset(tab->entries, ST_INIT_VAL_BYTE, 604 get_allocated_entries(tab) * sizeof(st_table_entry)); 605 if (tab->bins != NULL) 606 memset(tab->bins, ST_INIT_VAL_BYTE, bins_size(tab)); 607 #endif 608 make_tab_empty(tab); 609 tab->rebuilds_num = 0; 610 #ifdef ST_DEBUG 611 st_check(tab); 612 #endif 613 return tab; 614 } 615 616 #ifdef RUBY 617 /* Create and return table with TYPE which can hold a minimal number 618 of entries (see comments for get_power2). */ 619 st_table * 620 st_init_table(const struct st_hash_type *type) 621 { 622 return st_init_table_with_size(type, 0); 623 } 624 625 /* Create and return table which can hold a minimal number of 626 numbers. */ 627 st_table * 628 st_init_numtable(void) 629 { 630 return st_init_table(&type_numhash); 631 } 632 633 /* Create and return table which can hold SIZE numbers. */ 634 st_table * 635 st_init_numtable_with_size(st_index_t size) 636 { 637 return st_init_table_with_size(&type_numhash, size); 638 } 639 640 /* Create and return table which can hold a minimal number of 641 strings. */ 642 st_table * 643 st_init_strtable(void) 644 { 645 return st_init_table(&type_strhash); 646 } 647 648 /* Create and return table which can hold SIZE strings. */ 649 st_table * 650 st_init_strtable_with_size(st_index_t size) 651 { 652 return st_init_table_with_size(&type_strhash, size); 653 } 654 655 /* Create and return table which can hold a minimal number of strings 656 whose character case is ignored. */ 657 st_table * 658 st_init_strcasetable(void) 659 { 660 return st_init_table(&type_strcasehash); 661 } 662 663 /* Create and return table which can hold SIZE strings whose character 664 case is ignored. */ 665 st_table * 666 st_init_strcasetable_with_size(st_index_t size) 667 { 668 return st_init_table_with_size(&type_strcasehash, size); 669 } 670 671 /* Make table TAB empty. */ 672 void 673 st_clear(st_table *tab) 674 { 675 make_tab_empty(tab); 676 tab->rebuilds_num++; 677 #ifdef ST_DEBUG 678 st_check(tab); 679 #endif 680 } 681 #endif /* RUBY */ 682 683 /* Free table TAB space. */ 684 void 685 st_free_table(st_table *tab) 686 { 687 if (tab->bins != NULL) 688 free(tab->bins); 689 free(tab->entries); 690 free(tab); 691 } 692 693 #ifdef RUBY 694 /* Return byte size of memory allocted for table TAB. */ 695 size_t 696 st_memsize(const st_table *tab) 697 { 698 return(sizeof(st_table) 699 + (tab->bins == NULL ? 0 : bins_size(tab)) 700 + get_allocated_entries(tab) * sizeof(st_table_entry)); 701 } 702 #endif /* RUBY */ 703 704 static st_index_t 705 find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key); 706 707 static st_index_t 708 find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key); 709 710 static st_index_t 711 find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key); 712 713 static st_index_t 714 find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value, 715 st_data_t key, st_index_t *bin_ind); 127 716 128 717 #ifdef HASH_LOG 129 static int collision = 0;130 static int init_st = 0;131 132 718 static void 133 stat_col() 134 { 135 FILE *f = fopen("/tmp/col", "w"); 136 fprintf(f, "collision: %d\n", collision); 137 fclose(f); 138 } 139 #endif 140 141 st_table* 142 st_init_table_with_size(type, size) 143 struct st_hash_type *type; 144 int size; 145 { 146 st_table *tbl; 147 148 #ifdef HASH_LOG 149 if (init_st == 0) { 150 init_st = 1; 151 atexit(stat_col); 152 } 153 #endif 154 155 size = new_size(size); /* round up to prime number */ 156 157 tbl = alloc(st_table); 158 tbl->type = type; 159 tbl->num_entries = 0; 160 tbl->num_bins = size; 161 tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*)); 162 163 return tbl; 164 } 165 166 st_table* 167 st_init_table(type) 168 struct st_hash_type *type; 169 { 170 return st_init_table_with_size(type, 0); 171 } 172 173 st_table* 174 st_init_numtable(void) 175 { 176 return st_init_table(&type_numhash); 177 } 178 179 st_table* 180 st_init_numtable_with_size(size) 181 int size; 182 { 183 return st_init_table_with_size(&type_numhash, size); 184 } 185 186 st_table* 187 st_init_strtable(void) 188 { 189 return st_init_table(&type_strhash); 190 } 191 192 st_table* 193 st_init_strtable_with_size(size) 194 int size; 195 { 196 return st_init_table_with_size(&type_strhash, size); 197 } 198 719 count_collision(const struct st_hash_type *type) 720 { 721 collision.all++; 722 if (type == &type_numhash) { 723 collision.num++; 724 } 725 else if (type == &type_strhash) { 726 collision.strcase++; 727 } 728 else if (type == &type_strcasehash) { 729 collision.str++; 730 } 731 } 732 733 #define COLLISION (collision_check ? count_collision(tab->type) : (void)0) 734 #define FOUND_BIN (collision_check ? collision.total++ : (void)0) 735 #define collision_check 0 736 #else 737 #define COLLISION 738 #define FOUND_BIN 739 #endif 740 741 /* If the number of entries in the table is at least REBUILD_THRESHOLD 742 times less than the entry array length, decrease the table 743 size. */ 744 #define REBUILD_THRESHOLD 4 745 746 #if REBUILD_THRESHOLD < 2 747 #error "REBUILD_THRESHOLD should be >= 2" 748 #endif 749 750 /* Rebuild table TAB. Rebuilding removes all deleted bins and entries 751 and can change size of the table entries and bins arrays. 752 Rebuilding is implemented by creation of a new table or by 753 compaction of the existing one. */ 754 static void 755 rebuild_table(st_table *tab) 756 { 757 st_index_t i, ni, bound; 758 unsigned int size_ind; 759 st_table *new_tab; 760 st_table_entry *entries, *new_entries; 761 st_table_entry *curr_entry_ptr; 762 st_index_t *bins; 763 st_index_t bin_ind; 764 765 st_assert(tab != NULL); 766 bound = tab->entries_bound; 767 entries = tab->entries; 768 if ((2 * tab->num_entries <= get_allocated_entries(tab) 769 && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab)) 770 || tab->num_entries < (1 << MINIMAL_POWER2)) { 771 /* Compaction: */ 772 tab->num_entries = 0; 773 if (tab->bins != NULL) 774 initialize_bins(tab); 775 new_tab = tab; 776 new_entries = entries; 777 } 778 else { 779 new_tab = st_init_table_with_size(tab->type, 780 2 * tab->num_entries - 1); 781 new_entries = new_tab->entries; 782 } 783 ni = 0; 784 bins = new_tab->bins; 785 size_ind = get_size_ind(new_tab); 786 for (i = tab->entries_start; i < bound; i++) { 787 curr_entry_ptr = &entries[i]; 788 PREFETCH(entries + i + 1, 0); 789 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0)) 790 continue; 791 if (&new_entries[ni] != curr_entry_ptr) 792 new_entries[ni] = *curr_entry_ptr; 793 if (EXPECT(bins != NULL, 1)) { 794 bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash, 795 curr_entry_ptr->key); 796 st_assert(bin_ind != UNDEFINED_BIN_IND 797 && (tab == new_tab || new_tab->rebuilds_num == 0) 798 && IND_EMPTY_BIN_P(new_tab, bin_ind)); 799 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE); 800 } 801 new_tab->num_entries++; 802 ni++; 803 } 804 if (new_tab != tab) { 805 tab->entry_power = new_tab->entry_power; 806 tab->bin_power = new_tab->bin_power; 807 tab->size_ind = new_tab->size_ind; 808 st_assert (tab->num_entries == ni && new_tab->num_entries == ni); 809 if (tab->bins != NULL) 810 free(tab->bins); 811 tab->bins = new_tab->bins; 812 free(tab->entries); 813 tab->entries = new_tab->entries; 814 free(new_tab); 815 } 816 tab->entries_start = 0; 817 tab->entries_bound = tab->num_entries; 818 tab->rebuilds_num++; 819 #ifdef ST_DEBUG 820 st_check(tab); 821 #endif 822 } 823 824 /* Return the next secondary hash index for table TAB using previous 825 index IND and PERTERB. Finally modulo of the function becomes a 826 full *cycle linear congruential generator*, in other words it 827 guarantees traversing all table bins in extreme case. 828 829 According the Hull-Dobell theorem a generator 830 "Xnext = (a*Xprev + c) mod m" is a full cycle generator iff 831 o m and c are relatively prime 832 o a-1 is divisible by all prime factors of m 833 o a-1 is divisible by 4 if m is divisible by 4. 834 835 For our case a is 5, c is 1, and m is a power of two. */ 836 static inline st_index_t 837 secondary_hash(st_index_t ind, st_table *tab, st_index_t *perterb) 838 { 839 *perterb >>= 11; 840 ind = (ind << 2) + ind + *perterb + 1; 841 return hash_bin(ind, tab); 842 } 843 844 /* Find an entry with HASH_VALUE and KEY in TABLE using a linear 845 search. Return the index of the found entry in array `entries`. 846 If it is not found, return UNDEFINED_ENTRY_IND. */ 847 static inline st_index_t 848 find_entry(st_table *tab, st_hash_t hash_value, st_data_t key) 849 { 850 st_index_t i, bound; 851 st_table_entry *entries; 852 853 bound = tab->entries_bound; 854 entries = tab->entries; 855 for (i = tab->entries_start; i < bound; i++) { 856 if (PTR_EQUAL(tab, &entries[i], hash_value, key)) 857 return i; 858 } 859 return UNDEFINED_ENTRY_IND; 860 } 861 862 /* Use the quadratic probing. The method has a better data locality 863 but more collisions than the current approach. In average it 864 results in a bit slower search. */ 865 /*#define QUADRATIC_PROBE*/ 866 867 /* Return index of entry with HASH_VALUE and KEY in table TAB. If 868 there is no such entry, return UNDEFINED_ENTRY_IND. */ 869 static st_index_t 870 find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key) 871 { 872 st_index_t ind; 873 #ifdef QUADRATIC_PROBE 874 st_index_t d; 875 #else 876 st_index_t peterb; 877 #endif 878 st_index_t bin; 879 st_table_entry *entries = tab->entries; 880 881 st_assert(tab != NULL && tab->bins != NULL); 882 ind = hash_bin(hash_value, tab); 883 #ifdef QUADRATIC_PROBE 884 d = 1; 885 #else 886 peterb = hash_value; 887 #endif 888 FOUND_BIN; 889 for (;;) { 890 bin = get_bin(tab->bins, get_size_ind(tab), ind); 891 if (! EMPTY_OR_DELETED_BIN_P(bin) 892 && PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key)) 893 break; 894 else if (EMPTY_BIN_P(bin)) 895 return UNDEFINED_ENTRY_IND; 896 #ifdef QUADRATIC_PROBE 897 ind = hash_bin(ind + d, tab); 898 d++; 899 #else 900 ind = secondary_hash(ind, tab, &peterb); 901 #endif 902 COLLISION; 903 } 904 return bin; 905 } 906 907 /* Find and return index of table TAB bin corresponding to an entry 908 with HASH_VALUE and KEY. If there is no such bin, return 909 UNDEFINED_BIN_IND. */ 910 static st_index_t 911 find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key) 912 { 913 st_index_t ind; 914 #ifdef QUADRATIC_PROBE 915 st_index_t d; 916 #else 917 st_index_t peterb; 918 #endif 919 st_index_t bin; 920 st_table_entry *entries = tab->entries; 921 922 st_assert(tab != NULL && tab->bins != NULL); 923 ind = hash_bin(hash_value, tab); 924 #ifdef QUADRATIC_PROBE 925 d = 1; 926 #else 927 peterb = hash_value; 928 #endif 929 FOUND_BIN; 930 for (;;) { 931 bin = get_bin(tab->bins, get_size_ind(tab), ind); 932 if (! EMPTY_OR_DELETED_BIN_P(bin) 933 && PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key)) 934 break; 935 else if (EMPTY_BIN_P(bin)) 936 return UNDEFINED_BIN_IND; 937 #ifdef QUADRATIC_PROBE 938 ind = hash_bin(ind + d, tab); 939 d++; 940 #else 941 ind = secondary_hash(ind, tab, &peterb); 942 #endif 943 COLLISION; 944 } 945 return ind; 946 } 947 948 /* Find and return index of table TAB bin corresponding to an entry 949 with HASH_VALUE and KEY. The entry should be in the table 950 already. */ 951 static st_index_t 952 find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key) 953 { 954 st_index_t ind; 955 #ifdef QUADRATIC_PROBE 956 st_index_t d; 957 #else 958 st_index_t peterb; 959 #endif 960 st_index_t bin; 961 st_table_entry *entries = tab->entries; 962 963 st_assert(tab != NULL && tab->bins != NULL); 964 ind = hash_bin(hash_value, tab); 965 #ifdef QUADRATIC_PROBE 966 d = 1; 967 #else 968 peterb = hash_value; 969 #endif 970 FOUND_BIN; 971 for (;;) { 972 bin = get_bin(tab->bins, get_size_ind(tab), ind); 973 if (EMPTY_OR_DELETED_BIN_P(bin)) 974 return ind; 975 st_assert (! PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key)); 976 #ifdef QUADRATIC_PROBE 977 ind = hash_bin(ind + d, tab); 978 d++; 979 #else 980 ind = secondary_hash(ind, tab, &peterb); 981 #endif 982 COLLISION; 983 } 984 } 985 986 /* Return index of table TAB bin for HASH_VALUE and KEY through 987 BIN_IND and the pointed value as the function result. Reserve the 988 bin for inclusion of the corresponding entry into the table if it 989 is not there yet. We always find such bin as bins array length is 990 bigger entries array. Although we can reuse a deleted bin, the 991 result bin value is always empty if the table has no entry with 992 KEY. Return the entries array index of the found entry or 993 UNDEFINED_ENTRY_IND if it is not found. */ 994 static st_index_t 995 find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value, 996 st_data_t key, st_index_t *bin_ind) { 997 st_index_t ind; 998 st_hash_t curr_hash_value = *hash_value; 999 #ifdef QUADRATIC_PROBE 1000 st_index_t d; 1001 #else 1002 st_index_t peterb; 1003 #endif 1004 st_index_t entry_index; 1005 st_index_t first_deleted_bin_ind; 1006 st_table_entry *entries; 1007 1008 st_assert(tab != NULL && tab->bins != NULL 1009 && tab->entries_bound <= get_allocated_entries(tab) 1010 && tab->entries_start <= tab->entries_bound); 1011 ind = hash_bin(curr_hash_value, tab); 1012 #ifdef QUADRATIC_PROBE 1013 d = 1; 1014 #else 1015 peterb = curr_hash_value; 1016 #endif 1017 FOUND_BIN; 1018 first_deleted_bin_ind = UNDEFINED_BIN_IND; 1019 entries = tab->entries; 1020 for (;;) { 1021 entry_index = get_bin(tab->bins, get_size_ind(tab), ind); 1022 if (EMPTY_BIN_P(entry_index)) { 1023 tab->num_entries++; 1024 entry_index = UNDEFINED_ENTRY_IND; 1025 if (first_deleted_bin_ind != UNDEFINED_BIN_IND) { 1026 /* We can reuse bin of a deleted entry. */ 1027 ind = first_deleted_bin_ind; 1028 MARK_BIN_EMPTY(tab, ind); 1029 } 1030 break; 1031 } else if (! DELETED_BIN_P(entry_index)) { 1032 if (PTR_EQUAL(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key)) 1033 break; 1034 } else if (first_deleted_bin_ind == UNDEFINED_BIN_IND) 1035 first_deleted_bin_ind = ind; 1036 #ifdef QUADRATIC_PROBE 1037 ind = hash_bin(ind + d, tab); 1038 d++; 1039 #else 1040 ind = secondary_hash(ind, tab, &peterb); 1041 #endif 1042 COLLISION; 1043 } 1044 *bin_ind = ind; 1045 return entry_index; 1046 } 1047 1048 /* Find an entry with KEY in table TAB. Return non-zero if we found 1049 it. Set up *RECORD to the found entry record. */ 1050 int 1051 st_lookup(st_table *tab, st_data_t key, st_data_t *value) 1052 { 1053 st_index_t bin; 1054 st_hash_t hash = do_hash(key, tab); 1055 1056 if (tab->bins == NULL) { 1057 bin = find_entry(tab, hash, key); 1058 if (bin == UNDEFINED_ENTRY_IND) 1059 return 0; 1060 } else { 1061 bin = find_table_entry_ind(tab, hash, key); 1062 if (bin == UNDEFINED_ENTRY_IND) 1063 return 0; 1064 bin -= ENTRY_BASE; 1065 } 1066 if (value != 0) 1067 *value = tab->entries[bin].record; 1068 return 1; 1069 } 1070 1071 #ifdef RUBY 1072 /* Find an entry with KEY in table TAB. Return non-zero if we found 1073 it. Set up *RESULT to the found table entry key. */ 1074 int 1075 st_get_key(st_table *tab, st_data_t key, st_data_t *result) 1076 { 1077 st_index_t bin; 1078 st_hash_t hash = do_hash(key, tab); 1079 1080 if (tab->bins == NULL) { 1081 bin = find_entry(tab, hash, key); 1082 if (bin == UNDEFINED_ENTRY_IND) 1083 return 0; 1084 } else { 1085 bin = find_table_entry_ind(tab, hash, key); 1086 if (bin == UNDEFINED_ENTRY_IND) 1087 return 0; 1088 bin -= ENTRY_BASE; 1089 } 1090 if (result != 0) 1091 *result = tab->entries[bin].key; 1092 return 1; 1093 } 1094 #endif /* RUBY */ 1095 1096 /* Check the table and rebuild it if it is necessary. */ 1097 static inline void 1098 rebuild_table_if_necessary (st_table *tab) 1099 { 1100 st_index_t bound = tab->entries_bound; 1101 1102 if (bound == get_allocated_entries(tab)) 1103 rebuild_table(tab); 1104 st_assert(tab->entries_bound < get_allocated_entries(tab)); 1105 } 1106 1107 /* Insert (KEY, VALUE) into table TAB and return zero. If there is 1108 already entry with KEY in the table, return nonzero and and update 1109 the value of the found entry. */ 1110 int 1111 st_insert(st_table *tab, st_data_t key, st_data_t value) 1112 { 1113 st_table_entry *entry; 1114 st_index_t bin; 1115 st_index_t ind; 1116 st_hash_t hash_value; 1117 st_index_t bin_ind; 1118 int new_p; 1119 1120 rebuild_table_if_necessary(tab); 1121 hash_value = do_hash(key, tab); 1122 if (tab->bins == NULL) { 1123 bin = find_entry(tab, hash_value, key); 1124 new_p = bin == UNDEFINED_ENTRY_IND; 1125 if (new_p) 1126 tab->num_entries++; 1127 bin_ind = UNDEFINED_BIN_IND; 1128 } else { 1129 bin = find_table_bin_ptr_and_reserve(tab, &hash_value, 1130 key, &bin_ind); 1131 new_p = bin == UNDEFINED_ENTRY_IND; 1132 bin -= ENTRY_BASE; 1133 } 1134 if (new_p) { 1135 st_assert(tab->entries_bound < get_allocated_entries(tab)); 1136 ind = tab->entries_bound++; 1137 entry = &tab->entries[ind]; 1138 entry->hash = hash_value; 1139 entry->key = key; 1140 entry->record = value; 1141 if (bin_ind != UNDEFINED_BIN_IND) 1142 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE); 1143 #ifdef ST_DEBUG 1144 st_check(tab); 1145 #endif 1146 return 0; 1147 } 1148 tab->entries[bin].record = value; 1149 #ifdef ST_DEBUG 1150 st_check(tab); 1151 #endif 1152 return 1; 1153 } 1154 1155 #ifdef RUBY 1156 /* Insert (KEY, VALUE, HASH) into table TAB. The table should not have 1157 entry with KEY before the insertion. */ 1158 static inline void 1159 st_add_direct_with_hash(st_table *tab, 1160 st_data_t key, st_data_t value, st_hash_t hash) { 1161 st_table_entry *entry; 1162 st_index_t ind; 1163 st_index_t bin_ind; 1164 1165 rebuild_table_if_necessary(tab); 1166 ind = tab->entries_bound++; 1167 entry = &tab->entries[ind]; 1168 entry->hash = hash; 1169 entry->key = key; 1170 entry->record = value; 1171 tab->num_entries++; 1172 if (tab->bins != NULL) { 1173 bin_ind = find_table_bin_ind_direct(tab, hash, key); 1174 st_assert (bin_ind != UNDEFINED_BIN_IND); 1175 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE); 1176 } 1177 #ifdef ST_DEBUG 1178 st_check(tab); 1179 #endif 1180 } 1181 1182 /* Insert (KEY, VALUE) into table TAB. The table should not have 1183 entry with KEY before the insertion. */ 199 1184 void 200 st_free_table(table) 201 st_table *table; 202 { 203 register st_table_entry *ptr, *next; 204 int i; 205 206 for(i = 0; i < table->num_bins; i++) { 207 ptr = table->bins[i]; 208 while (ptr != 0) { 209 next = ptr->next; 210 free(ptr); 211 ptr = next; 1185 st_add_direct(st_table *tab, st_data_t key, st_data_t value) 1186 { 1187 st_hash_t hash_value; 1188 1189 hash_value = do_hash(key, tab); 1190 st_add_direct_with_hash(tab, key, value, hash_value); 1191 } 1192 1193 /* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If 1194 there is already entry with KEY in the table, return nonzero and 1195 and update the value of the found entry. */ 1196 int 1197 st_insert2(st_table *tab, st_data_t key, st_data_t value, 1198 st_data_t (*func)(st_data_t)) { 1199 st_table_entry *entry; 1200 st_index_t bin; 1201 st_index_t ind, check; 1202 st_hash_t hash_value; 1203 st_index_t bin_ind; 1204 int new_p; 1205 1206 rebuild_table_if_necessary (tab); 1207 hash_value = do_hash(key, tab); 1208 if (tab->bins == NULL) { 1209 bin = find_entry(tab, hash_value, key); 1210 new_p = bin == UNDEFINED_ENTRY_IND; 1211 bin_ind = UNDEFINED_BIN_IND; 1212 } else { 1213 bin = find_table_bin_ptr_and_reserve(tab, &hash_value, 1214 key, &bin_ind); 1215 new_p = bin == UNDEFINED_ENTRY_IND; 1216 bin -= ENTRY_BASE; 1217 } 1218 if (new_p) { 1219 st_assert(tab->entries_bound < get_allocated_entries(tab)); 1220 check = tab->rebuilds_num; 1221 key = (*func)(key); 1222 st_assert(check == tab->rebuilds_num 1223 && do_hash(key, tab) == hash_value); 1224 ind = tab->entries_bound++; 1225 entry = &tab->entries[ind]; 1226 entry->hash = hash_value; 1227 entry->key = key; 1228 entry->record = value; 1229 if (bin_ind != UNDEFINED_BIN_IND) 1230 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE); 1231 #ifdef ST_DEBUG 1232 st_check(tab); 1233 #endif 1234 return 0; 1235 } 1236 tab->entries[bin].record = value; 1237 #ifdef ST_DEBUG 1238 st_check(tab); 1239 #endif 1240 return 1; 1241 } 1242 1243 /* Create and return a copy of table OLD_TAB. */ 1244 st_table * 1245 st_copy(st_table *old_tab) 1246 { 1247 st_table *new_tab; 1248 1249 new_tab = (st_table *) malloc(sizeof(st_table)); 1250 if (new_tab == NULL) 1251 return NULL; 1252 *new_tab = *old_tab; 1253 if (old_tab->bins == NULL) 1254 new_tab->bins = NULL; 1255 else { 1256 new_tab->bins = (st_index_t *) malloc(bins_size(old_tab)); 1257 if (new_tab->bins == NULL) { 1258 free(new_tab); 1259 return NULL; 212 1260 } 213 1261 } 214 free(table->bins); 215 free(table); 216 } 217 218 #define PTR_NOT_EQUAL(table, ptr, hash_val, key) \ 219 ((ptr) != 0 && (ptr->hash != (hash_val) || !EQUAL((table), (key), (ptr)->key))) 220 221 #ifdef HASH_LOG 222 #define COLLISION collision++ 223 #else 224 #define COLLISION 225 #endif 226 227 #define FIND_ENTRY(table, ptr, hash_val, bin_pos) do {\ 228 bin_pos = hash_val%(table)->num_bins;\ 229 ptr = (table)->bins[bin_pos];\ 230 if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {\ 231 COLLISION;\ 232 while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {\ 233 ptr = ptr->next;\ 234 }\ 235 ptr = ptr->next;\ 236 }\ 237 } while (0) 1262 new_tab->entries = (st_table_entry *) malloc(get_allocated_entries(old_tab) 1263 * sizeof(st_table_entry)); 1264 if (new_tab->entries == NULL) { 1265 st_free_table(new_tab); 1266 return NULL; 1267 } 1268 MEMCPY(new_tab->entries, old_tab->entries, st_table_entry, 1269 get_allocated_entries(old_tab)); 1270 if (old_tab->bins != NULL) 1271 MEMCPY(new_tab->bins, old_tab->bins, char, bins_size(old_tab)); 1272 #ifdef ST_DEBUG 1273 st_check(new_tab); 1274 #endif 1275 return new_tab; 1276 } 1277 #endif /* RUBY */ 1278 1279 /* Update the entries start of table TAB after removing an entry 1280 with index N in the array entries. */ 1281 static inline void 1282 update_range_for_deleted(st_table *tab, st_index_t n) 1283 { 1284 /* Do not update entries_bound here. Otherwise, we can fill all 1285 bins by deleted entry value before rebuilding the table. */ 1286 if (tab->entries_start == n) 1287 tab->entries_start = n + 1; 1288 } 1289 1290 #ifdef RUBY 1291 /* Delete entry with KEY from table TAB, set up *VALUE (unless 1292 VALUE is zero) from deleted table entry, and return non-zero. If 1293 there is no entry with KEY in the table, clear *VALUE (unless VALUE 1294 is zero), and return zero. */ 1295 static int 1296 st_general_delete(st_table *tab, st_data_t *key, st_data_t *value) 1297 { 1298 st_table_entry *entry; 1299 st_index_t bin; 1300 st_index_t bin_ind; 1301 st_hash_t hash; 1302 1303 st_assert(tab != NULL); 1304 hash = do_hash(*key, tab); 1305 if (tab->bins == NULL) { 1306 bin = find_entry(tab, hash, *key); 1307 if (bin == UNDEFINED_ENTRY_IND) { 1308 if (value != 0) *value = 0; 1309 return 0; 1310 } 1311 } else { 1312 bin_ind = find_table_bin_ind(tab, hash, *key); 1313 if (bin_ind == UNDEFINED_BIN_IND) { 1314 if (value != 0) *value = 0; 1315 return 0; 1316 } 1317 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE; 1318 MARK_BIN_DELETED(tab, bin_ind); 1319 } 1320 entry = &tab->entries[bin]; 1321 *key = entry->key; 1322 if (value != 0) *value = entry->record; 1323 MARK_ENTRY_DELETED(entry); 1324 tab->num_entries--; 1325 update_range_for_deleted(tab, bin); 1326 #ifdef ST_DEBUG 1327 st_check(tab); 1328 #endif 1329 return 1; 1330 } 238 1331 239 1332 int 240 st_lookup(table, key, value) 241 st_table *table; 242 register st_data_t key; 243 st_data_t *value; 244 { 245 unsigned int hash_val, bin_pos; 246 register st_table_entry *ptr; 247 248 hash_val = do_hash(key, table); 249 FIND_ENTRY(table, ptr, hash_val, bin_pos); 250 251 if (ptr == 0) { 252 return 0; 253 } 254 else { 255 if (value != 0) *value = ptr->record; 256 return 1; 257 } 258 } 259 260 #define ADD_DIRECT(table, key, value, hash_val, bin_pos)\ 261 do {\ 262 st_table_entry *entry;\ 263 if (table->num_entries/(table->num_bins) > ST_DEFAULT_MAX_DENSITY) {\ 264 rehash(table);\ 265 bin_pos = hash_val % table->num_bins;\ 266 }\ 267 \ 268 entry = alloc(st_table_entry);\ 269 \ 270 entry->hash = hash_val;\ 271 entry->key = key;\ 272 entry->record = value;\ 273 entry->next = table->bins[bin_pos];\ 274 table->bins[bin_pos] = entry;\ 275 table->num_entries++;\ 276 } while (0) 277 1333 st_delete(st_table *tab, st_data_t *key, st_data_t *value) 1334 { 1335 return st_general_delete(tab, key, value); 1336 } 1337 1338 /* The function and other functions with suffix '_safe' or '_check' 1339 are originated from the previous implementation of the hash tables. 1340 It was necessary for correct deleting entries during traversing 1341 tables. The current implementation permits deletion during 1342 traversing without a specific way to do this. */ 278 1343 int 279 st_insert(table, key, value) 280 register st_table *table; 281 register st_data_t key; 282 st_data_t value; 283 { 284 unsigned int hash_val, bin_pos; 285 register st_table_entry *ptr; 286 287 hash_val = do_hash(key, table); 288 FIND_ENTRY(table, ptr, hash_val, bin_pos); 289 290 if (ptr == 0) { 291 ADD_DIRECT(table, key, value, hash_val, bin_pos); 292 return 0; 293 } 294 else { 295 ptr->record = value; 296 return 1; 297 } 298 } 299 300 void 301 st_add_direct(table, key, value) 302 st_table *table; 303 st_data_t key; 304 st_data_t value; 305 { 306 unsigned int hash_val, bin_pos; 307 308 hash_val = do_hash(key, table); 309 bin_pos = hash_val % table->num_bins; 310 ADD_DIRECT(table, key, value, hash_val, bin_pos); 311 } 312 313 static void 314 rehash(table) 315 register st_table *table; 316 { 317 register st_table_entry *ptr, *next, **new_bins; 318 int i, old_num_bins = table->num_bins, new_num_bins; 319 unsigned int hash_val; 320 321 new_num_bins = new_size(old_num_bins+1); 322 new_bins = (st_table_entry**)Calloc(new_num_bins, sizeof(st_table_entry*)); 323 324 for(i = 0; i < old_num_bins; i++) { 325 ptr = table->bins[i]; 326 while (ptr != 0) { 327 next = ptr->next; 328 hash_val = ptr->hash % new_num_bins; 329 ptr->next = new_bins[hash_val]; 330 new_bins[hash_val] = ptr; 331 ptr = next; 332 } 333 } 334 free(table->bins); 335 table->num_bins = new_num_bins; 336 table->bins = new_bins; 337 } 338 339 st_table* 340 st_copy(old_table) 341 st_table *old_table; 342 { 343 st_table *new_table; 344 st_table_entry *ptr, *entry; 345 int i, num_bins = old_table->num_bins; 346 347 new_table = alloc(st_table); 348 if (new_table == 0) { 349 return 0; 350 } 351 352 *new_table = *old_table; 353 new_table->bins = (st_table_entry**) 354 Calloc((unsigned)num_bins, sizeof(st_table_entry*)); 355 356 if (new_table->bins == 0) { 357 free(new_table); 358 return 0; 359 } 360 361 for(i = 0; i < num_bins; i++) { 362 new_table->bins[i] = 0; 363 ptr = old_table->bins[i]; 364 while (ptr != 0) { 365 entry = alloc(st_table_entry); 366 if (entry == 0) { 367 free(new_table->bins); 368 free(new_table); 369 return 0; 1344 st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value, 1345 st_data_t never ATTRIBUTE_UNUSED) { 1346 return st_general_delete(tab, key, value); 1347 } 1348 1349 /* If table TAB is empty, clear *VALUE (unless VALUE is zero), and 1350 return zero. Otherwise, remove the first entry in the table. 1351 Return its key through KEY and its record through VALUE (unless 1352 VALUE is zero). */ 1353 int 1354 st_shift(st_table *tab, st_data_t *key, st_data_t *value) 1355 { 1356 st_index_t i, bound; 1357 st_index_t bin; 1358 st_table_entry *entries, *curr_entry_ptr; 1359 st_index_t bin_ind; 1360 1361 entries = tab->entries; 1362 bound = tab->entries_bound; 1363 for (i = tab->entries_start; i < bound; i++) { 1364 curr_entry_ptr = &entries[i]; 1365 if (! DELETED_ENTRY_P(curr_entry_ptr)) { 1366 if (value != 0) *value = curr_entry_ptr->record; 1367 *key = curr_entry_ptr->key; 1368 if (tab->bins == NULL) { 1369 bin = find_entry(tab, curr_entry_ptr->hash, curr_entry_ptr->key); 1370 st_assert(bin != UNDEFINED_ENTRY_IND 1371 && &entries[bin] == curr_entry_ptr); 1372 } else { 1373 bin_ind = find_table_bin_ind(tab, curr_entry_ptr->hash, 1374 curr_entry_ptr->key); 1375 st_assert(bin_ind != UNDEFINED_BIN_IND 1376 && &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind) 1377 - ENTRY_BASE] == curr_entry_ptr); 1378 MARK_BIN_DELETED(tab, bin_ind); 370 1379 } 371 *entry = *ptr; 372 entry->next = new_table->bins[i]; 373 new_table->bins[i] = entry; 374 ptr = ptr->next; 375 } 376 } 377 return new_table; 378 } 379 380 int 381 st_delete(table, key, value) 382 register st_table *table; 383 register st_data_t *key; 384 st_data_t *value; 385 { 386 unsigned int hash_val; 387 st_table_entry *tmp; 388 register st_table_entry *ptr; 389 390 hash_val = do_hash_bin(*key, table); 391 ptr = table->bins[hash_val]; 392 393 if (ptr == 0) { 394 if (value != 0) *value = 0; 395 return 0; 396 } 397 398 if (EQUAL(table, *key, ptr->key)) { 399 table->bins[hash_val] = ptr->next; 400 table->num_entries--; 401 if (value != 0) *value = ptr->record; 402 *key = ptr->key; 403 free(ptr); 404 return 1; 405 } 406 407 for(; ptr->next != 0; ptr = ptr->next) { 408 if (EQUAL(table, ptr->next->key, *key)) { 409 tmp = ptr->next; 410 ptr->next = ptr->next->next; 411 table->num_entries--; 412 if (value != 0) *value = tmp->record; 413 *key = tmp->key; 414 free(tmp); 1380 MARK_ENTRY_DELETED(curr_entry_ptr); 1381 tab->num_entries--; 1382 update_range_for_deleted(tab, i); 1383 #ifdef ST_DEBUG 1384 st_check(tab); 1385 #endif 415 1386 return 1; 416 1387 } 417 1388 } 418 1389 st_assert(tab->num_entries == 0); 1390 tab->entries_start = tab->entries_bound = 0; 1391 if (value != 0) *value = 0; 419 1392 return 0; 420 1393 } 421 1394 1395 /* See comments for function st_delete_safe. */ 1396 void 1397 st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED, 1398 st_data_t never ATTRIBUTE_UNUSED) { 1399 } 1400 1401 /* Find entry with KEY in table TAB, call FUNC with the key and the 1402 value of the found entry, and non-zero as the 3rd argument. If the 1403 entry is not found, call FUNC with KEY, and 2 zero arguments. If 1404 the call returns ST_CONTINUE, the table will have an entry with key 1405 and value returned by FUNC through the 1st and 2nd parameters. If 1406 the call of FUNC returns ST_DELETE, the table will not have entry 1407 with KEY. The function returns flag of that the entry with KEY was 1408 in the table before the call. */ 422 1409 int 423 st_delete_safe(table, key, value, never) 424 register st_table *table; 425 register st_data_t *key; 426 st_data_t *value; 427 st_data_t never; 428 { 429 unsigned int hash_val; 430 register st_table_entry *ptr; 431 432 hash_val = do_hash_bin(*key, table); 433 ptr = table->bins[hash_val]; 434 435 if (ptr == 0) { 436 if (value != 0) *value = 0; 437 return 0; 438 } 439 440 for(; ptr != 0; ptr = ptr->next) { 441 if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) { 442 table->num_entries--; 443 *key = ptr->key; 444 if (value != 0) *value = ptr->record; 445 ptr->key = ptr->record = never; 446 return 1; 1410 st_update(st_table *tab, st_data_t key, 1411 st_update_callback_func *func, st_data_t arg) { 1412 st_table_entry *entry = NULL; /* to avoid uninitialized value warning */ 1413 st_index_t bin = 0; /* Ditto */ 1414 st_table_entry *entries; 1415 st_index_t bin_ind; 1416 st_data_t value = 0, old_key; 1417 st_index_t check; 1418 int retval, existing; 1419 st_hash_t hash = do_hash(key, tab); 1420 1421 entries = tab->entries; 1422 if (tab->bins == NULL) { 1423 bin = find_entry(tab, hash, key); 1424 existing = bin != UNDEFINED_ENTRY_IND; 1425 entry = &entries[bin]; 1426 bin_ind = UNDEFINED_BIN_IND; 1427 } else { 1428 bin_ind = find_table_bin_ind(tab, hash, key); 1429 existing = bin_ind != UNDEFINED_BIN_IND; 1430 if (existing) { 1431 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE; 1432 entry = &entries[bin]; 447 1433 } 448 1434 } 449 1435 if (existing) { 1436 key = entry->key; 1437 value = entry->record; 1438 } 1439 old_key = key; 1440 check = tab->rebuilds_num; 1441 retval = (*func)(&key, &value, arg, existing); 1442 st_assert(check == tab->rebuilds_num); 1443 switch (retval) { 1444 case ST_CONTINUE: 1445 if (! existing) { 1446 st_add_direct_with_hash(tab, key, value, hash); 1447 break; 1448 } 1449 if (old_key != key) { 1450 entry->key = key; 1451 } 1452 entry->record = value; 1453 break; 1454 case ST_DELETE: 1455 if (existing) { 1456 if (bin_ind != UNDEFINED_BIN_IND) 1457 MARK_BIN_DELETED(tab, bin_ind); 1458 MARK_ENTRY_DELETED(entry); 1459 tab->num_entries--; 1460 update_range_for_deleted(tab, bin); 1461 #ifdef ST_DEBUG 1462 st_check(tab); 1463 #endif 1464 } 1465 break; 1466 } 1467 #ifdef ST_DEBUG 1468 st_check(tab); 1469 #endif 1470 return existing; 1471 } 1472 #endif /* RUBY */ 1473 1474 /* Traverse all entries in table TAB calling FUNC with current entry 1475 key and value and zero. If the call returns ST_STOP, stop 1476 traversing. If the call returns ST_DELETE, delete the current 1477 entry from the table. In case of ST_CHECK or ST_CONTINUE, continue 1478 traversing. The function returns zero unless an error is found. 1479 CHECK_P is flag of st_foreach_check call. The behavior is a bit 1480 different for ST_CHECK and when the current element is removed 1481 during traversing. */ 1482 static inline int 1483 st_general_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg, 1484 int check_p) { 1485 st_index_t bin; 1486 st_index_t bin_ind; 1487 st_table_entry *entries, *curr_entry_ptr; 1488 enum st_retval retval; 1489 st_index_t i, rebuilds_num; 1490 st_hash_t hash; 1491 st_data_t key; 1492 int error_p, packed_p = tab->bins == NULL; 1493 1494 st_assert(tab->entries_start <= tab->entries_bound); 1495 entries = tab->entries; 1496 /* The bound can change inside the loop even without rebuilding 1497 the table, e.g. by an entry inesrtion. */ 1498 for (i = tab->entries_start; i < tab->entries_bound; i++) { 1499 curr_entry_ptr = &entries[i]; 1500 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0)) 1501 continue; 1502 key = curr_entry_ptr->key; 1503 rebuilds_num = tab->rebuilds_num; 1504 hash = curr_entry_ptr->hash; 1505 retval = (*func)(key, curr_entry_ptr->record, arg, 0); 1506 if (rebuilds_num != tab->rebuilds_num) { 1507 entries = tab->entries; 1508 packed_p = tab->bins == NULL; 1509 if (packed_p) { 1510 i = find_entry(tab, hash, key); 1511 error_p = i == UNDEFINED_ENTRY_IND; 1512 } else { 1513 i = find_table_entry_ind(tab, hash, key); 1514 error_p = i == UNDEFINED_ENTRY_IND; 1515 i -= ENTRY_BASE; 1516 } 1517 if (error_p && check_p) { 1518 /* call func with error notice */ 1519 retval = (*func)(0, 0, arg, 1); 1520 #ifdef ST_DEBUG 1521 st_check(tab); 1522 #endif 1523 return 1; 1524 } 1525 curr_entry_ptr = &entries[i]; 1526 } 1527 switch (retval) { 1528 case ST_CONTINUE: 1529 break; 1530 case ST_CHECK: 1531 if (check_p) 1532 break; 1533 case ST_STOP: 1534 #ifdef ST_DEBUG 1535 st_check(tab); 1536 #endif 1537 return 0; 1538 case ST_DELETE: 1539 if (packed_p) { 1540 bin = find_entry(tab, hash, curr_entry_ptr->key); 1541 if (bin == UNDEFINED_ENTRY_IND) 1542 break; 1543 } else { 1544 bin_ind = find_table_bin_ind(tab, hash, curr_entry_ptr->key); 1545 if (bin_ind == UNDEFINED_BIN_IND) 1546 break; 1547 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE; 1548 MARK_BIN_DELETED(tab, bin_ind); 1549 } 1550 st_assert(&entries[bin] == curr_entry_ptr); 1551 MARK_ENTRY_DELETED(curr_entry_ptr); 1552 tab->num_entries--; 1553 update_range_for_deleted(tab, bin); 1554 #ifdef ST_DEBUG 1555 st_check(tab); 1556 #endif 1557 break; 1558 } 1559 } 1560 #ifdef ST_DEBUG 1561 st_check(tab); 1562 #endif 450 1563 return 0; 451 1564 } 452 1565 453 static int454 #if defined(__GNUC__)455 delete_never(st_data_t key __attribute__ ((unused)), st_data_t value,456 st_data_t never)457 #else458 delete_never(key, value, never)459 st_data_t key, value, never;460 #endif461 {462 if (value == never) return ST_DELETE;463 return ST_CONTINUE;464 }465 466 void467 st_cleanup_safe(table, never)468 st_table *table;469 st_data_t never;470 {471 int num_entries = table->num_entries;472 473 st_foreach(table, delete_never, never);474 table->num_entries = num_entries;475 }476 477 1566 int 478 st_foreach(table, func, arg) 479 st_table *table; 480 int (*func)(); 481 st_data_t arg; 482 { 483 st_table_entry *ptr, *last, *tmp; 484 enum st_retval retval; 485 int i; 486 487 for(i = 0; i < table->num_bins; i++) { 488 last = 0; 489 for(ptr = table->bins[i]; ptr != 0;) { 490 retval = (*func)(ptr->key, ptr->record, arg); 491 switch (retval) { 492 case ST_CHECK: /* check if hash is modified during iteration */ 493 tmp = 0; 494 if (i < table->num_bins) { 495 for (tmp = table->bins[i]; tmp; tmp=tmp->next) { 496 if (tmp == ptr) break; 497 } 498 } 499 if (!tmp) { 500 /* call func with error notice */ 501 return 1; 502 } 503 /* fall through */ 504 case ST_CONTINUE: 505 last = ptr; 506 ptr = ptr->next; 507 break; 508 case ST_STOP: 509 return 0; 510 case ST_DELETE: 511 tmp = ptr; 512 if (last == 0) { 513 table->bins[i] = ptr->next; 514 } 515 else { 516 last->next = ptr->next; 517 } 518 ptr = ptr->next; 519 free(tmp); 520 table->num_entries--; 1567 st_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg) 1568 { 1569 return st_general_foreach(tab, func, arg, FALSE); 1570 } 1571 1572 #ifdef RUBY 1573 /* See comments for function st_delete_safe. */ 1574 int 1575 st_foreach_check(st_table *tab, int (*func)(ANYARGS), st_data_t arg, 1576 st_data_t never ATTRIBUTE_UNUSED) { 1577 return st_general_foreach(tab, func, arg, TRUE); 1578 } 1579 1580 /* Set up array KEYS by at most SIZE keys of head table TAB entries. 1581 Return the number of keys set up in array KEYS. */ 1582 static inline st_index_t 1583 st_general_keys(st_table *tab, st_data_t *keys, st_index_t size) 1584 { 1585 st_index_t i, bound; 1586 st_data_t key, *keys_start, *keys_end; 1587 st_table_entry *curr_entry_ptr, *entries = tab->entries; 1588 1589 bound = tab->entries_bound; 1590 keys_start = keys; 1591 keys_end = keys + size; 1592 for (i = tab->entries_start; i < bound; i++) { 1593 if (keys == keys_end) 1594 break; 1595 curr_entry_ptr = &entries[i]; 1596 key = curr_entry_ptr->key; 1597 if (! DELETED_ENTRY_P(curr_entry_ptr)) 1598 *keys++ = key; 1599 } 1600 1601 return keys - keys_start; 1602 } 1603 1604 st_index_t 1605 st_keys(st_table *tab, st_data_t *keys, st_index_t size) 1606 { 1607 return st_general_keys(tab, keys, size); 1608 } 1609 1610 /* See comments for function st_delete_safe. */ 1611 st_index_t 1612 st_keys_check(st_table *tab, st_data_t *keys, st_index_t size, 1613 st_data_t never ATTRIBUTE_UNUSED) { 1614 return st_general_keys(tab, keys, size); 1615 } 1616 1617 /* Set up array VALUES by at most SIZE values of head table TAB 1618 entries. Return the number of values set up in array VALUES. */ 1619 static inline st_index_t 1620 st_general_values(st_table *tab, st_data_t *values, st_index_t size) 1621 { 1622 st_index_t i, bound; 1623 st_data_t *values_start, *values_end; 1624 st_table_entry *curr_entry_ptr, *entries = tab->entries; 1625 1626 values_start = values; 1627 values_end = values + size; 1628 bound = tab->entries_bound; 1629 st_assert(bound != 0); 1630 for (i = tab->entries_start; i < bound; i++) { 1631 if (values == values_end) 1632 break; 1633 curr_entry_ptr = &entries[i]; 1634 if (! DELETED_ENTRY_P(curr_entry_ptr)) 1635 *values++ = curr_entry_ptr->record; 1636 } 1637 1638 return values - values_start; 1639 } 1640 1641 st_index_t 1642 st_values(st_table *tab, st_data_t *values, st_index_t size) 1643 { 1644 return st_general_values(tab, values, size); 1645 } 1646 1647 /* See comments for function st_delete_safe. */ 1648 st_index_t 1649 st_values_check(st_table *tab, st_data_t *values, st_index_t size, 1650 st_data_t never ATTRIBUTE_UNUSED) { 1651 return st_general_values(tab, values, size); 1652 } 1653 #endif /* RUBY */ 1654 1655 #ifdef RUBY 1656 #define FNV1_32A_INIT 0x811c9dc5 1657 1658 /* 1659 * 32 bit magic FNV-1a prime 1660 */ 1661 #define FNV_32_PRIME 0x01000193 1662 1663 #ifndef UNALIGNED_WORD_ACCESS 1664 # if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ 1665 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \ 1666 defined(__powerpc64__) || \ 1667 defined(__mc68020__) 1668 # define UNALIGNED_WORD_ACCESS 1 1669 # endif 1670 #endif 1671 #ifndef UNALIGNED_WORD_ACCESS 1672 # define UNALIGNED_WORD_ACCESS 0 1673 #endif 1674 1675 /* This hash function is quite simplified MurmurHash3 1676 * Simplification is legal, cause most of magic still happens in finalizator. 1677 * And finalizator is almost the same as in MurmurHash3 */ 1678 #define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y)) 1679 #define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n))) 1680 1681 #if ST_INDEX_BITS <= 32 1682 #define C1 (st_index_t)0xcc9e2d51 1683 #define C2 (st_index_t)0x1b873593 1684 #else 1685 #define C1 BIG_CONSTANT(0x87c37b91,0x114253d5); 1686 #define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f); 1687 #endif 1688 static inline st_index_t 1689 murmur_step(st_index_t h, st_index_t k) 1690 { 1691 #if ST_INDEX_BITS <= 32 1692 #define r1 (17) 1693 #define r2 (11) 1694 #else 1695 #define r1 (33) 1696 #define r2 (24) 1697 #endif 1698 k *= C1; 1699 h ^= ROTL(k, r1); 1700 h *= C2; 1701 h = ROTL(h, r2); 1702 return h; 1703 } 1704 #undef r1 1705 #undef r2 1706 1707 static inline st_index_t 1708 murmur_finish(st_index_t h) 1709 { 1710 #if ST_INDEX_BITS <= 32 1711 #define r1 (16) 1712 #define r2 (13) 1713 #define r3 (16) 1714 const st_index_t c1 = 0x85ebca6b; 1715 const st_index_t c2 = 0xc2b2ae35; 1716 #else 1717 /* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */ 1718 #define r1 (30) 1719 #define r2 (27) 1720 #define r3 (31) 1721 const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9); 1722 const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb); 1723 #endif 1724 #if ST_INDEX_BITS > 64 1725 h ^= h >> 64; 1726 h *= c2; 1727 h ^= h >> 65; 1728 #endif 1729 h ^= h >> r1; 1730 h *= c1; 1731 h ^= h >> r2; 1732 h *= c2; 1733 h ^= h >> r3; 1734 return h; 1735 } 1736 #undef r1 1737 #undef r2 1738 #undef r3 1739 1740 st_index_t 1741 st_hash(const void *ptr, size_t len, st_index_t h) 1742 { 1743 const char *data = ptr; 1744 st_index_t t = 0; 1745 size_t l = len; 1746 1747 #define data_at(n) (st_index_t)((unsigned char)data[(n)]) 1748 #define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0) 1749 #if SIZEOF_ST_INDEX_T > 4 1750 #define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4 1751 #if SIZEOF_ST_INDEX_T > 8 1752 #define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \ 1753 UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8 1754 #define UNALIGNED_ADD_ALL UNALIGNED_ADD_16 1755 #endif 1756 #define UNALIGNED_ADD_ALL UNALIGNED_ADD_8 1757 #else 1758 #define UNALIGNED_ADD_ALL UNALIGNED_ADD_4 1759 #endif 1760 #undef SKIP_TAIL 1761 if (len >= sizeof(st_index_t)) { 1762 #if !UNALIGNED_WORD_ACCESS 1763 int align = (int)((st_data_t)data % sizeof(st_index_t)); 1764 if (align) { 1765 st_index_t d = 0; 1766 int sl, sr, pack; 1767 1768 switch (align) { 1769 #ifdef WORDS_BIGENDIAN 1770 # define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \ 1771 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2) 1772 #else 1773 # define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \ 1774 t |= data_at(n) << CHAR_BIT*(n) 1775 #endif 1776 UNALIGNED_ADD_ALL; 1777 #undef UNALIGNED_ADD 521 1778 } 1779 1780 #ifdef WORDS_BIGENDIAN 1781 t >>= (CHAR_BIT * align) - CHAR_BIT; 1782 #else 1783 t <<= (CHAR_BIT * align); 1784 #endif 1785 1786 data += sizeof(st_index_t)-align; 1787 len -= sizeof(st_index_t)-align; 1788 1789 sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align); 1790 sr = CHAR_BIT * align; 1791 1792 while (len >= sizeof(st_index_t)) { 1793 d = *(st_index_t *)data; 1794 #ifdef WORDS_BIGENDIAN 1795 t = (t << sr) | (d >> sl); 1796 #else 1797 t = (t >> sr) | (d << sl); 1798 #endif 1799 h = murmur_step(h, t); 1800 t = d; 1801 data += sizeof(st_index_t); 1802 len -= sizeof(st_index_t); 1803 } 1804 1805 pack = len < (size_t)align ? (int)len : align; 1806 d = 0; 1807 switch (pack) { 1808 #ifdef WORDS_BIGENDIAN 1809 # define UNALIGNED_ADD(n) case (n) + 1: \ 1810 d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1) 1811 #else 1812 # define UNALIGNED_ADD(n) case (n) + 1: \ 1813 d |= data_at(n) << CHAR_BIT*(n) 1814 #endif 1815 UNALIGNED_ADD_ALL; 1816 #undef UNALIGNED_ADD 1817 } 1818 #ifdef WORDS_BIGENDIAN 1819 t = (t << sr) | (d >> sl); 1820 #else 1821 t = (t >> sr) | (d << sl); 1822 #endif 1823 1824 if (len < (size_t)align) goto skip_tail; 1825 # define SKIP_TAIL 1 1826 h = murmur_step(h, t); 1827 data += pack; 1828 len -= pack; 522 1829 } 1830 else 1831 #endif 1832 { 1833 do { 1834 h = murmur_step(h, *(st_index_t *)data); 1835 data += sizeof(st_index_t); 1836 len -= sizeof(st_index_t); 1837 } while (len >= sizeof(st_index_t)); 1838 } 1839 } 1840 1841 t = 0; 1842 switch (len) { 1843 #if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8 1844 /* in this case byteorder doesn't really matter */ 1845 #if SIZEOF_ST_INDEX_T > 4 1846 case 7: t |= data_at(6) << 48; 1847 case 6: t |= data_at(5) << 40; 1848 case 5: t |= data_at(4) << 32; 1849 case 4: 1850 t |= (st_index_t)*(uint32_t*)data; 1851 goto skip_tail; 1852 # define SKIP_TAIL 1 1853 #endif 1854 case 3: t |= data_at(2) << 16; 1855 case 2: t |= data_at(1) << 8; 1856 case 1: t |= data_at(0); 1857 #else 1858 #ifdef WORDS_BIGENDIAN 1859 # define UNALIGNED_ADD(n) case (n) + 1: \ 1860 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1) 1861 #else 1862 # define UNALIGNED_ADD(n) case (n) + 1: \ 1863 t |= data_at(n) << CHAR_BIT*(n) 1864 #endif 1865 UNALIGNED_ADD_ALL; 1866 #undef UNALIGNED_ADD 1867 #endif 1868 #ifdef SKIP_TAIL 1869 skip_tail: 1870 #endif 1871 h ^= t; h -= ROTL(t, 7); 1872 h *= C2; 1873 } 1874 h ^= l; 1875 1876 return murmur_finish(h); 1877 } 1878 1879 st_index_t 1880 st_hash_uint32(st_index_t h, uint32_t i) 1881 { 1882 return murmur_step(h, i); 1883 } 1884 1885 st_index_t 1886 st_hash_uint(st_index_t h, st_index_t i) 1887 { 1888 i += h; 1889 /* no matter if it is BigEndian or LittleEndian, 1890 * we hash just integers */ 1891 #if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8 1892 h = murmur_step(h, i >> 8*8); 1893 #endif 1894 h = murmur_step(h, i); 1895 return h; 1896 } 1897 1898 st_index_t 1899 st_hash_end(st_index_t h) 1900 { 1901 h = murmur_finish(h); 1902 return h; 1903 } 1904 1905 #undef st_hash_start 1906 st_index_t 1907 st_hash_start(st_index_t h) 1908 { 1909 return h; 1910 } 1911 1912 static st_index_t 1913 strhash(st_data_t arg) 1914 { 1915 register const char *string = (const char *)arg; 1916 return st_hash(string, strlen(string), FNV1_32A_INIT); 1917 } 1918 1919 int 1920 st_locale_insensitive_strcasecmp(const char *s1, const char *s2) 1921 { 1922 unsigned int c1, c2; 1923 1924 while (1) { 1925 c1 = (unsigned char)*s1++; 1926 c2 = (unsigned char)*s2++; 1927 if (c1 == '\0' || c2 == '\0') { 1928 if (c1 != '\0') return 1; 1929 if (c2 != '\0') return -1; 1930 return 0; 1931 } 1932 if ((unsigned int)(c1 - 'A') <= ('Z' - 'A')) c1 += 'a' - 'A'; 1933 if ((unsigned int)(c2 - 'A') <= ('Z' - 'A')) c2 += 'a' - 'A'; 1934 if (c1 != c2) { 1935 if (c1 > c2) 1936 return 1; 1937 else 1938 return -1; 1939 } 1940 } 1941 } 1942 1943 int 1944 st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n) 1945 { 1946 unsigned int c1, c2; 1947 1948 while (n--) { 1949 c1 = (unsigned char)*s1++; 1950 c2 = (unsigned char)*s2++; 1951 if (c1 == '\0' || c2 == '\0') { 1952 if (c1 != '\0') return 1; 1953 if (c2 != '\0') return -1; 1954 return 0; 1955 } 1956 if ((unsigned int)(c1 - 'A') <= ('Z' - 'A')) c1 += 'a' - 'A'; 1957 if ((unsigned int)(c2 - 'A') <= ('Z' - 'A')) c2 += 'a' - 'A'; 1958 if (c1 != c2) { 1959 if (c1 > c2) 1960 return 1; 1961 else 1962 return -1; 1963 } 523 1964 } 524 1965 return 0; 525 1966 } 526 1967 527 static int 528 strhash(string) 529 register const char *string; 530 { 531 register int c; 532 533 #ifdef HASH_ELFHASH 534 register unsigned int h = 0, g; 535 536 while ((c = *string++) != '\0') { 537 h = ( h << 4 ) + c; 538 if ( g = h & 0xF0000000 ) 539 h ^= g >> 24; 540 h &= ~g; 541 } 542 return h; 543 #elif HASH_PERL 544 register int val = 0; 545 546 while ((c = *string++) != '\0') { 547 val += c; 548 val += (val << 10); 549 val ^= (val >> 6); 550 } 551 val += (val << 3); 552 val ^= (val >> 11); 553 554 return val + (val << 15); 555 #else 556 register int val = 0; 557 558 while ((c = *string++) != '\0') { 559 val = val*997 + c; 560 } 561 562 return val + (val>>5); 563 #endif 564 } 565 566 static int 567 numcmp(x, y) 568 long x, y; 1968 static st_index_t 1969 strcasehash(st_data_t arg) 1970 { 1971 register const char *string = (const char *)arg; 1972 register st_index_t hval = FNV1_32A_INIT; 1973 1974 /* 1975 * FNV-1a hash each octet in the buffer 1976 */ 1977 while (*string) { 1978 unsigned int c = (unsigned char)*string++; 1979 if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A'; 1980 hval ^= c; 1981 1982 /* multiply by the 32 bit FNV magic prime mod 2^32 */ 1983 hval *= FNV_32_PRIME; 1984 } 1985 return hval; 1986 } 1987 1988 int 1989 st_numcmp(st_data_t x, st_data_t y) 569 1990 { 570 1991 return x != y; 571 1992 } 572 1993 573 static int 574 numhash(n) 575 long n; 576 { 577 return n; 578 } 1994 st_index_t 1995 st_numhash(st_data_t n) 1996 { 1997 enum {s1 = 11, s2 = 3}; 1998 return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2)); 1999 } 2000 #endif /* RUBY */
Note:
See TracChangeset
for help on using the changeset viewer.