The ROme OpTimistic Simulator  2.0.0
A General-Purpose Multithreaded Parallel/Distributed Simulation Platform
hash_map.c
Go to the documentation of this file.
1 
32 #include <datatypes/hash_map.h>
33 #include <mm/dymelor.h>
34 #include <memory.h>
35 #include <limits.h>
36 
37 // must be a power of two
38 #define HM_INITIAL_CAPACITY 8
39 #define DIB(curr_i, hash, capacity_mo) ((curr_i) >= ((hash) & (capacity_mo)) ? (curr_i) - ((hash) & (capacity_mo)) : (capacity_mo) + 1 + (curr_i) - ((hash) & (capacity_mo)))
40 #define SWAP_VALUES(a, b) do{__typeof(a) _tmp = (a); (a) = (b); (b) = _tmp;}while(0)
41 
42 // Adapted from http://xorshift.di.unimi.it/splitmix64.c PRNG,
43 // written by Sebastiano Vigna (vigna@acm.org)
44 // TODO benchmark further and select a possibly better hash function
45 static hash_t _get_hash(key_type_t key){
46  uint64_t z = key + 0x9e3779b97f4a7c15;
47  z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
48  z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
49  return (hash_t)((z ^ (z >> 31)) >> 32);
50 }
51 
52 void _hash_map_init(struct _inner_hash_map_t *_i_hmap){
53  // this is the effective capacity_minus_one
54  // this trick saves us some subtractions when we
55  // use the capacity as a bitmask to
56  // select the relevant hash bits for table indexing
57  _i_hmap->capacity_mo = HM_INITIAL_CAPACITY - 1;
58  _i_hmap->nodes = rsalloc(sizeof(struct _hash_map_node_t) * HM_INITIAL_CAPACITY);
59  memset(_i_hmap->nodes, UCHAR_MAX, sizeof(struct _hash_map_node_t) * HM_INITIAL_CAPACITY);
60 }
61 
62 void _hash_map_fini(struct _inner_hash_map_t *_i_hmap){
63  rsfree(_i_hmap->nodes);
64 }
65 
66 static void _hash_map_insert_hashed(struct _inner_hash_map_t *_i_hmap, struct _hash_map_node_t node){
67  struct _hash_map_node_t *nodes = _i_hmap->nodes;
68  struct _hash_map_node_t cur_node = node;
69  map_size_t capacity_mo = _i_hmap->capacity_mo;
70  // since capacity_mo is 2^n - 1 for some n
71  // this effectively is a modulo 2^n
72  map_size_t i = node.hash & capacity_mo;
73  // dib stays for distance from initial bucket
74  map_size_t dib = 0;
75 
76  // linear probing with robin hood hashing
77  // https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf by Pedro Celis
78  while(1){
79  if(nodes[i].elem_i == HMAP_INVALID_I){
80  // found an empty cell, put the pair here and we're done
81  nodes[i] = cur_node;
82  return;
83  } else if(dib > DIB(i, nodes[i].hash, capacity_mo)){
84  // found a "richer" cell, swap the pairs and continue looking for a hole
85  dib = DIB(i, nodes[i].hash, capacity_mo);
86  SWAP_VALUES(cur_node, nodes[i]);
87  }
88  ++i;
89  // modulo capacity
90  i &= capacity_mo;
91  ++dib;
92  }
93 }
94 
95 static void _hash_map_realloc_rehash(struct _inner_hash_map_t *_i_hmap, map_size_t count){
96  // helper pointers to iterate over the old array
97  struct _hash_map_node_t *rmv = _i_hmap->nodes;
98  // instantiates new array
99  _i_hmap->nodes = rsalloc(sizeof(struct _hash_map_node_t) * (_i_hmap->capacity_mo + 1));
100  memset(_i_hmap->nodes, UCHAR_MAX, sizeof(struct _hash_map_node_t) * (_i_hmap->capacity_mo + 1));
101  // rehash the old array elements
102  map_size_t i = count, j = 0;
103  while(i--){
104  while(rmv[j].elem_i == HMAP_INVALID_I)
105  ++j;
106  // TODO: implement more efficient rehashing (in place rehashing)
107  _hash_map_insert_hashed(_i_hmap, rmv[j]);
108  ++j;
109  }
110  // free the old array
111  rsfree(rmv);
112 }
113 
114 static void _hash_map_expand(struct _inner_hash_map_t *_i_hmap, map_size_t count){
115  // check if threshold has been reached
116  if((double)_i_hmap->capacity_mo * MAX_LOAD_FACTOR >= count)
117  return;
118  // increase map capacity (remember capacity_minus_one)
119  _i_hmap->capacity_mo = 2 * _i_hmap->capacity_mo + 1;
120 
121  _hash_map_realloc_rehash(_i_hmap, count);
122 }
123 
124 static void _hash_map_shrink(struct _inner_hash_map_t *_i_hmap, map_size_t count){
125  // check if threshold has been reached
126  if((double)_i_hmap->capacity_mo * MIN_LOAD_FACTOR <= count ||
127  _i_hmap->capacity_mo <= HM_INITIAL_CAPACITY)
128  return;
129  // decrease map capacity (remember capacity_minus_one)
130  _i_hmap->capacity_mo /= 2;
131 
132  _hash_map_realloc_rehash(_i_hmap, count);
133 }
134 
135 void _hash_map_add(struct _inner_hash_map_t *_i_hmap, key_type_t key, map_size_t count){
136  // expand if needed
137  _hash_map_expand(_i_hmap, count);
138 
139  struct _hash_map_node_t node = {key, _get_hash(key), count};
140  // insert the element
141  _hash_map_insert_hashed(_i_hmap, node);
142 }
143 
144 static map_size_t _hash_map_index_lookup(struct _inner_hash_map_t *_i_hmap, key_type_t key){
145  struct _hash_map_node_t *nodes = _i_hmap->nodes;
146  map_size_t capacity_mo = _i_hmap->capacity_mo;
147 
148  hash_t cur_hash = _get_hash(key);
149  map_size_t i = cur_hash & capacity_mo;
150  map_size_t dib = 0;
151 
152  do{
153  if(nodes[i].elem_i == HMAP_INVALID_I){
154  // we found a hole where we expected something, the pair hasn't been found
155  return HMAP_INVALID_I;
156  // the more expensive comparison with the key is done only if necessary
157  } else if(nodes[i].hash == cur_hash && nodes[i].key == key)
158  // we found a pair with coinciding keys, return the index
159  return i;
160  ++i;
161  i &= capacity_mo;
162  ++dib;
163  }while(dib <= DIB(i, nodes[i].hash, capacity_mo));
164  // we found a node- with lower DIB than expected: the wanted key isn't here
165  // (else it would have finished here during its insertion).
166  return UINT_MAX;
167 }
168 
169 unsigned _hash_map_lookup(struct _inner_hash_map_t *_i_hmap, unsigned long long key){
170  // find the index of the wanted key
171  map_size_t i = _hash_map_index_lookup(_i_hmap, key);
172  // return the pair if successful
173  return i == UINT_MAX ? UINT_MAX : _i_hmap->nodes[i].elem_i;
174 }
175 
176 void _hash_map_update_i(struct _inner_hash_map_t *_i_hmap, unsigned long long key, map_size_t new_i){
177  // find the index of the wanted key
178  map_size_t i = _hash_map_index_lookup(_i_hmap, key);
179  // update the pair if successful
180  if(i != UINT_MAX)
181  _i_hmap->nodes[i].elem_i = new_i;
182 }
183 
184 void _hash_map_remove(struct _inner_hash_map_t *_i_hmap, unsigned long long key, map_size_t cur_count){
185  // find the index of the wanted key
186  map_size_t i = _hash_map_index_lookup(_i_hmap, key);
187  // if unsuccessful we're done, nothing to remove here!
188  if(i == UINT_MAX) return;
189 
190  struct _hash_map_node_t *nodes = _i_hmap->nodes;
191  map_size_t capacity_mo = _i_hmap->capacity_mo;
192  map_size_t j = i;
193  // backward shift to restore the table state as if the insertion never happened:
194  // http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion by Emmanuel Goossaert
195  do{ // the first iteration is necessary since the removed element is always overwritten somehow
196  ++j;
197  j &= capacity_mo;
198  }while(nodes[j].elem_i != UINT_MAX && DIB(j, nodes[j].hash, capacity_mo) != 0);
199  // we finally found out the end of the displaced sequence of nodes,
200  // since we either found an empty slot or we found a node residing in his correct position
201 
202  --j; // we now point to the last element to move
203  j &= capacity_mo;
204 
205  if(j >= i){
206  // we simply move the nodes one slot back
207  memmove(&nodes[i], &nodes[i + 1], (j - i) * sizeof(struct _hash_map_node_t));
208  }else{
209  // we wrapped around the table: move the first part back
210  memmove(&nodes[i], &nodes[i + 1], (capacity_mo - i) * sizeof(struct _hash_map_node_t));
211  // move the first node to the last slot in the table
212  memcpy(&nodes[capacity_mo], &nodes[0], sizeof(struct _hash_map_node_t));
213  // move the remaining stuff at the table beginning
214  memmove(&nodes[0], &nodes[1], j * sizeof(struct _hash_map_node_t));
215  }
216 
217  // we clear the last moved slot (if we didn't move anything this clears the removed entry)
218  nodes[j].elem_i = UINT_MAX;
219 
220  // shrink the table if necessary
221  _hash_map_shrink(_i_hmap, cur_count - 1);
222 }
223 
224 size_t _hash_map_dump_size(struct _inner_hash_map_t *_i_hmap){
225  return sizeof(_i_hmap->capacity_mo) + (_i_hmap->capacity_mo + 1) * sizeof(*(_i_hmap->nodes));
226 }
227 
228 inline unsigned char * _hash_map_dump(struct _inner_hash_map_t *_i_hmap, unsigned char *_destination){
229  *((map_size_t *)_destination) = _i_hmap->capacity_mo;
230  _destination += sizeof(map_size_t);
231  size_t table_cpy_size = (_i_hmap->capacity_mo + 1) * sizeof(*(_i_hmap->nodes));
232  memcpy(_destination, _i_hmap->nodes, table_cpy_size);
233  _destination += table_cpy_size;
234  return _destination;
235 }
236 
237 inline unsigned char * _hash_map_load(struct _inner_hash_map_t *_i_hmap, unsigned char *_source){
238  _i_hmap->capacity_mo = *((map_size_t *)_source);
239  _source += sizeof(map_size_t);
240  size_t table_cpy_size = (_i_hmap->capacity_mo + 1) * sizeof(*(_i_hmap->nodes));
241  _i_hmap->nodes = rsalloc(table_cpy_size);
242  memcpy(_i_hmap->nodes, _source, table_cpy_size);
243  _source += table_cpy_size;
244  return _source;
245 }
Dynamic Memory Logger and Restorer (DyMeLoR)
This header implements a simple hash map data structure.