1
use std::{
2
    collections::hash_map::RandomState,
3
    hash::{BuildHasher, Hash, Hasher},
4
    slice,
5
};
6

            
7
/// A [`std::collections::HashMap`] alternative that provides some guarantees on
8
/// entry order.
9
///
10
/// This implementation preserves insert order until an operation that removes a
11
/// key occurs. To keep this implementation efficient, when a removal occurs,
12
/// the order is updated by moving the last key inserted to replace the entry
13
/// being removed.
14
///
15
/// The benefit of keeping the order is that an iterator can be made against
16
/// this collection that doesn't require borrowing the original value.
17
/// Additionally, methods can be used to retrieve entries by index instead of
18
/// just by key.
19
1
#[derive(Debug, Clone)]
20
pub struct BudMap<Key, Value, HashBuilder = RandomState> {
21
    /// A dense list of all entries in this map. This is where the actual
22
    /// key/value data is stored.
23
    entries: Vec<RawEntry<Key, Value>>,
24
    /// A list of bins that is at least as big as the bin count
25
    /// (`bin_mask.into_count() + 1`). All entries beyond the bin count are
26
    /// colliding entries.
27
    bins: Vec<Bin>,
28
    /// A bit mask that can be applied to a hash to produce an index into
29
    /// `bins`. This means all valid bin counts are powers of 2.
30
    bin_mask: BinMask,
31
    /// The index inside of `bins` of the last collision that was freed. This is
32
    /// the start of a singly-linked list that points to all currently free
33
    /// collision bins.
34
    free_collision_head: OptionalIndex,
35
    /// The [`BuildHasher`] implementor via which keys are hashed.
36
    hash_builder: HashBuilder,
37
}
38

            
39
impl<Key, Value> Default for BudMap<Key, Value, RandomState> {
40
38
    fn default() -> Self {
41
38
        Self {
42
38
            entries: Vec::default(),
43
38
            bins: Vec::default(),
44
38
            bin_mask: BinMask(0),
45
38
            free_collision_head: OptionalIndex::none(),
46
38
            hash_builder: RandomState::default(),
47
38
        }
48
38
    }
49
}
50

            
51
impl<Key, Value> BudMap<Key, Value, RandomState>
52
where
53
    Key: Eq + Hash + std::fmt::Debug,
54
{
55
    /// Returns an empty map with enough room for `minimum_capacity` elements to
56
    /// be inserted without allocations (assuming no hash collisions).
57
    #[must_use]
58
36
    pub fn with_capacity(minimum_capacity: usize) -> Self {
59
36
        let mut map = Self::default();
60
36
        map.grow(minimum_capacity);
61
36
        map
62
36
    }
63
}
64

            
65
impl<Key, Value, HashBuilder> BudMap<Key, Value, HashBuilder>
66
where
67
    Key: Eq + Hash + std::fmt::Debug,
68
    HashBuilder: BuildHasher,
69
{
70
    /// Returns an empty map with enough room for `minimum_capacity` elements to
71
    /// be inserted without allocations (assuming no hash collisions). Keys are
72
    /// hashed using `hash_builder`.
73
    #[must_use]
74
2
    pub fn with_capacity_and_hasher(minimum_capacity: usize, hash_builder: HashBuilder) -> Self {
75
2
        let mut map = Self::with_hasher(hash_builder);
76
2
        map.grow(minimum_capacity);
77
2
        map
78
2
    }
79

            
80
    /// Returns an empty map whose keys are hashed using `hash_builder`.
81
    #[must_use]
82
36
    pub const fn with_hasher(hash_builder: HashBuilder) -> Self {
83
36
        Self {
84
36
            entries: Vec::new(),
85
36
            bins: Vec::new(),
86
36
            bin_mask: BinMask(0),
87
36
            free_collision_head: OptionalIndex::none(),
88
36
            hash_builder,
89
36
        }
90
36
    }
91

            
92
13584
    fn get_entry(&self, hash: u64, key: &Key) -> Option<(usize, Option<usize>, usize)> {
93
13584
        if self.entries.is_empty() {
94
3
            None
95
        } else {
96
13581
            let bin_index = hash_to_bin(hash, self.bin_mask);
97
13581
            let mut existing_entry = None;
98
13581
            let mut previous_bin_id = None;
99
13581
            let mut bin_index = Some(bin_index);
100
25036
            while let Some(bin_id) = bin_index {
101
25034
                let bin = self.bins[bin_id];
102
25034
                let entry_index = bin.entry_index.as_opt()?;
103
25034
                let entry = &self.entries[entry_index];
104
25034
                if entry.hash == hash && &entry.key == key {
105
13579
                    existing_entry = Some((bin_id, previous_bin_id, entry_index));
106
13579
                    break;
107
11455
                }
108
11455
                previous_bin_id = Some(bin_id);
109
11455
                bin_index = bin.collision_index.as_opt();
110
            }
111
13581
            existing_entry
112
        }
113
13584
    }
114

            
115
    /// Looks up an entry for `key`. If one is found, [`Entry::Occupied`] will
116
    /// be returned. If one is not found, [`Entry::Vacant`] will be returned.
117
5
    pub fn entry(&mut self, key: Key) -> Entry<'_, Key, Value, HashBuilder> {
118
5
        let hash = self.hash(&key);
119
5

            
120
5
        // Try to find the existing value
121
5
        let existing_entry = self.get_entry(hash, &key);
122

            
123
5
        if let Some((bin_index, pointee, entry_index)) = existing_entry {
124
2
            Entry::Occupied(OccupiedEntry {
125
2
                map: self,
126
2
                entry_index,
127
2
                bin_index,
128
2
                pointee,
129
2
            })
130
        } else {
131
3
            Entry::Vacant(VacantEntry {
132
3
                map: self,
133
3
                key,
134
3
                hash,
135
3
            })
136
        }
137
5
    }
138

            
139
5015487
    fn grow_for_insert(&mut self) {
140
5015487
        // If we have don't have any free entriues, check that we shouldn't
141
5015487
        // grow to prevent collisions.
142
5015487
        let current_length = self.entries.len();
143
5015487
        let current_capacity = self.bin_mask.into_count();
144
5015487
        let should_grow = match (current_length, current_capacity) {
145
            // No capacity, always grow
146
62
            (0, _) => true,
147
            // Map with 8 bins, reallocate on the 6th
148
248
            (current_length, 8) => current_length >= 6,
149
            // Map with 16 bins, reallocate on the 13th
150
241
            (current_length, 16) => current_length >= 13,
151
            // Now the capacity is large enough that / 8 * 7 doesn't produce
152
            // values that don't make sense. This ratio of 7/8 comes from
153
            // hashbrown. It reduces memory waste as it prefers more densely
154
            // filled bins, but it might make sense to change the scaling rate
155
            // with our implementation.
156
5014936
            _ if current_length > current_capacity / 8 * 7
157
279
                && current_length < (usize::MAX << 1) =>
158
279
            {
159
279
                true
160
            }
161
5014657
            _ => false,
162
        };
163
5015487
        if should_grow {
164
406
            self.grow(current_capacity + 1);
165
5015081
        }
166
5015487
    }
167

            
168
66761782
    fn hash(&self, key: &Key) -> u64 {
169
66761782
        let mut hasher = self.hash_builder.build_hasher();
170
66761782
        key.hash(&mut hasher);
171
66761782
        hasher.finish()
172
66761782
    }
173

            
174
444
    fn grow(&mut self, minimum_capacity: usize) {
175
444
        let old_bin_count = self.bin_mask.into_count();
176
444
        if old_bin_count < minimum_capacity {
177
418
            if let Some(new_bin_count) = next_bucket_size(minimum_capacity) {
178
418
                let capacity_growth = new_bin_count - self.entries.len();
179
418
                self.entries.reserve_exact(capacity_growth);
180
418

            
181
418
                // Clear and extend the bins.
182
418
                // Trying to reuse the existing vec here will always
183
418
                // cause extra data IO than necessary, beacuse we are
184
418
                // clearing the existing bins. If we clear before we
185
418
                // extend, the data written for the clear is an extra
186
418
                // write that could be avoided. If we clear after we
187
418
                // extend, the underlying data copy when the vec is
188
418
                // resized is wasted.
189
418
                self.bins.clear();
190
418
                self.bins.resize(new_bin_count, Bin::default());
191
418

            
192
418
                self.bin_mask = BinMask::from_count(new_bin_count);
193
418
                self.free_collision_head = OptionalIndex::none();
194

            
195
8860931
                for (entry_index, slot) in self.entries.iter().enumerate() {
196
8860898
                    let bin = hash_to_bin(slot.hash, self.bin_mask);
197
8860898

            
198
8860898
                    insert_into_bin(
199
8860898
                        &mut self.bins,
200
8860898
                        &mut self.free_collision_head,
201
8860898
                        bin,
202
8860898
                        entry_index,
203
8860898
                    );
204
8860898
                }
205
            }
206
26
        }
207
444
    }
208

            
209
    /// Inserts the key-value pair into the map. If an existing value was stored
210
    /// for the given key, it will be returned.
211
5015486
    pub fn insert(&mut self, key: Key, value: Value) -> Option<Value> {
212
5015486
        self.grow_for_insert();
213
5015486

            
214
5015486
        let entry_index_if_pushed = self.entries.len();
215
5015486
        let hash = self.hash(&key);
216
5015486

            
217
5015486
        // Check to see if we need to overwrite.
218
5015486
        let mut bin_index = hash_to_bin(hash, self.bin_mask);
219
5015486

            
220
5015486
        let mut bin = &self.bins[bin_index];
221
5015486
        if bin.entry_index.is_none() {
222
            // Vacant entry
223
2670568
            let entry_index = self.push_entry(hash, key, value);
224
2670568
            self.bins[bin_index].entry_index = OptionalIndex(entry_index);
225
2670568
            None
226
        } else {
227
            // Occupied entry -- insert or replace into this bin's linked list.
228
3218118
            loop {
229
3218118
                let next_bin_index = bin.collision_index;
230
3218118

            
231
3218118
                // Check if the current bin contains our key
232
3218118
                let entry_index = bin.entry_index.0;
233
3218118
                let entry = &mut self.entries[entry_index];
234
3218118
                if entry.hash == hash && entry.key == key {
235
                    // Replace an existing entry
236
441
                    let stored_value = std::mem::replace(&mut entry.value, value);
237
441
                    return Some(stored_value);
238
3217677
                }
239

            
240
3217677
                if let Some(next_bin) = next_bin_index.as_opt() {
241
873200
                    bin_index = next_bin;
242
873200
                    bin = &self.bins[bin_index];
243
873200
                } else {
244
                    // New entry that collides with another key.
245

            
246
2344477
                    let free_collision_index =
247
2344477
                        free_collision_index(&mut self.bins, &mut self.free_collision_head);
248
2344477
                    let collision_index = free_collision_index.unwrap_or(self.bins.len());
249
2344477
                    self.bins[bin_index].collision_index = OptionalIndex(collision_index);
250
                    // Create our new bin.
251
2344477
                    if let Some(index) = free_collision_index {
252
1416
                        // Reuse a collision bin that has been previously removed.
253
1416
                        self.bins[index].entry_index = OptionalIndex(entry_index_if_pushed);
254
2343061
                    } else {
255
2343061
                        // Create a new collision bin
256
2343061
                        self.bins.push(Bin {
257
2343061
                            entry_index: OptionalIndex(entry_index_if_pushed),
258
2343061
                            collision_index: OptionalIndex::none(),
259
2343061
                        });
260
2343061
                    };
261

            
262
2344477
                    self.push_entry(hash, key, value);
263
2344477
                    return None;
264
                }
265
            }
266
        }
267
5015486
    }
268

            
269
5015046
    fn push_entry(&mut self, hash: u64, key: Key, value: Value) -> usize {
270
5015046
        let entry_index = self.entries.len();
271
5015046
        if entry_index == self.bin_mask.0 {
272
            // This should only trigger when we don't grow after our upper
273
            // limit.
274
            assert_eq!(self.bin_mask.0, usize::MAX << 1);
275
            panic!("map too large for insert");
276
        } else {
277
5015046
            self.entries.push(RawEntry { hash, key, value });
278
5015046
            entry_index
279
5015046
        }
280
5015046
    }
281

            
282
    /// Removes a key from the map. If the key was found, the value stored will
283
    /// be returned.
284
13579
    pub fn remove(&mut self, key: &Key) -> Option<Value> {
285
13579
        let hash = self.hash(key);
286
13579
        if let Some((bin_index, pointee, entry_index)) = self.get_entry(hash, key) {
287
13577
            return Some(self.remove_inner(entry_index, bin_index, pointee).value);
288
2
        }
289
2

            
290
2
        None
291
13579
    }
292

            
293
13578
    fn remove_inner(
294
13578
        &mut self,
295
13578
        entry_index: usize,
296
13578
        bin_index: usize,
297
13578
        pointee: Option<usize>,
298
13578
    ) -> RawEntry<Key, Value> {
299
13578
        // Remove the entry itself. First, we pop the top entry, even though
300
13578
        // it may not be the one we are looking for. If `entry_index` isn't the
301
13578
        // top entry, we will move the entry we popped into its place and update
302
13578
        // the bin that points to it.
303
13578
        let mut removed = self.entries.pop().expect("called on empty map");
304
13578
        let removed_index = self.entries.len();
305
13578
        if entry_index < removed_index {
306
            // The slot removed wasn't actually the one we needed to remove, so
307
            // we need to place it back into the slots.
308
13552
            let removed_hash = removed.hash;
309
13552
            std::mem::swap(&mut self.entries[entry_index], &mut removed);
310
13552

            
311
13552
            // Fix the bin that pointed to this slot.
312
13552
            let mut bin_index = Some(hash_to_bin(removed_hash, self.bin_mask));
313
29193
            while let Some(bin) = bin_index {
314
29193
                let bin = &mut self.bins[bin];
315
29193
                if bin.entry_index.0 == removed_index {
316
13552
                    bin.entry_index.0 = entry_index;
317
13552
                    break;
318
15641
                }
319
15641
                bin_index = bin.collision_index.as_opt();
320
            }
321
26
        }
322

            
323
        // Remove the bin entry.
324
13578
        let removed_bin = std::mem::take(&mut self.bins[bin_index]);
325

            
326
13578
        if let Some(pointee) = pointee {
327
4016
            self.bins[pointee].collision_index = removed_bin.collision_index;
328
4016
            // Add the removed bin to the head of the collision list
329
4016
            self.bins[bin_index].collision_index = self.free_collision_head;
330
4016
            self.free_collision_head = OptionalIndex(bin_index);
331
9562
        } else if let Some(collision_index) = removed_bin.collision_index.as_opt() {
332
1976
            self.bins[bin_index] = std::mem::take(&mut self.bins[collision_index]);
333
7587
        }
334

            
335
13578
        removed
336
13578
    }
337

            
338
    /// Returns the number of entries contained in this map.
339
40783
    pub fn len(&self) -> usize {
340
40783
        self.entries.len()
341
40783
    }
342

            
343
    /// Returns true if no entries are contained in this map.
344
13481
    pub fn is_empty(&self) -> bool {
345
13481
        self.entries.is_empty()
346
13481
    }
347

            
348
    /// Returns a value for a given key.
349
61732626
    pub fn get(&self, key: &Key) -> Option<&Value> {
350
61732626
        if !self.entries.is_empty() {
351
61732624
            let hash = self.hash(key);
352
61732624
            let mut bin_index = hash_to_bin(hash, self.bin_mask);
353
            loop {
354
110015170
                let bin = &self.bins[bin_index];
355
110015170
                let entry_index = bin.entry_index.as_opt()?;
356
110015169
                let entry = &self.entries[entry_index];
357
110015169
                if entry.hash == hash && &entry.key == key {
358
61732621
                    return Some(&entry.value);
359
48282548
                }
360
48282548
                if bin.collision_index.is_none() {
361
2
                    break;
362
48282546
                }
363
48282546
                bin_index = bin.collision_index.0;
364
            }
365
2
        }
366

            
367
4
        None
368
61732626
    }
369

            
370
    /// Returns a value for a given 0-based index.
371
2011
    pub fn get_by_index(&self, index: usize) -> Option<&Value> {
372
2011
        self.entries.get(index).map(|slot| &slot.value)
373
2011
    }
374

            
375
    /// Returns an interator for this map.
376
27131
    pub fn iter(&self) -> Iter<'_, Key, Value> {
377
27131
        Iter {
378
27131
            order: self.entries.iter(),
379
27131
        }
380
27131
    }
381
}
382

            
383
#[inline]
384
4028959
fn free_collision_index(
385
4028959
    bins: &mut [Bin],
386
4028959
    free_collision_marker: &mut OptionalIndex,
387
4028959
) -> Option<usize> {
388
4028959
    let free_index = free_collision_marker.as_opt()?;
389
1416
    *free_collision_marker = std::mem::take(&mut bins[free_index].collision_index);
390
1416

            
391
1416
    Some(free_index)
392
4028959
}
393

            
394
#[inline]
395
8860899
fn insert_into_bin(
396
8860899
    bins: &mut Vec<Bin>,
397
8860899
    free_collision_marker: &mut OptionalIndex,
398
8860899
    bin_index: usize,
399
8860899
    entry_index: usize,
400
8860899
) {
401
8860899
    let existing_bin = std::mem::replace(
402
8860899
        &mut bins[bin_index],
403
8860899
        Bin {
404
8860899
            entry_index: OptionalIndex(entry_index),
405
8860899
            collision_index: OptionalIndex::none(),
406
8860899
        },
407
8860899
    );
408
8860899
    if existing_bin.entry_index.is_some() {
409
        // Move the removed bin to the collision list
410
1684482
        let free_collision_index = free_collision_index(bins, free_collision_marker);
411
1684482
        bins[bin_index].collision_index = OptionalIndex(free_collision_index.unwrap_or(bins.len()));
412

            
413
1684482
        if let Some(index) = free_collision_index {
414
            bins[index] = existing_bin;
415
1684482
        } else {
416
1684482
            bins.push(existing_bin);
417
1684482
        };
418
7176417
    }
419
8860899
}
420

            
421
/// A bitmask for a 2^n quantity of bins.
422
12
#[derive(Clone, Copy, Debug)]
423
struct BinMask(usize);
424

            
425
impl BinMask {
426
4147
    pub fn from_count(count: usize) -> Self {
427
4147
        Self(count - 1)
428
4147
    }
429

            
430
60017900
    pub fn into_count(self) -> usize {
431
60017900
        self.0 + 1
432
60017900
    }
433
}
434

            
435
/// A hash-map bin.
436
242675341
#[derive(Default, Debug, Clone, Copy)]
437
struct Bin {
438
    /// The index inside of the [`BudMap::entries`] vec, if present.
439
    entry_index: OptionalIndex,
440
    /// If present, an index inside of [`BudMap::entries`] for the next bin that
441
    /// collides with this bin. This forms a singly-linked list of bins.
442
    collision_index: OptionalIndex,
443
}
444

            
445
/// A wrapper for a `usize` which reserves `usize::MAX` as a marker indicating
446
/// None. `std::mem::size_of::<Option<usize>>()` is 2x usize, while
447
/// `size_of::<OptionalIndex>()` remains 1x usize.
448
228
#[derive(Clone, Copy, Debug)]
449
struct OptionalIndex(usize);
450

            
451
impl OptionalIndex {
452
134219465
    pub const fn none() -> Self {
453
134219465
        Self(usize::MAX)
454
134219465
    }
455

            
456
    #[inline]
457
355357663
    pub const fn as_opt(self) -> Option<usize> {
458
355357663
        if self.0 == usize::MAX {
459
172225054
            None
460
        } else {
461
183132609
            Some(self.0)
462
        }
463
355357663
    }
464

            
465
    #[inline]
466
53042923
    pub const fn is_none(self) -> bool {
467
53042923
        self.0 == usize::MAX
468
53042923
    }
469

            
470
    #[inline]
471
8860899
    pub const fn is_some(self) -> bool {
472
8860899
        self.0 != usize::MAX
473
8860899
    }
474
}
475

            
476
impl Default for OptionalIndex {
477
41137
    fn default() -> Self {
478
41137
        Self(usize::MAX)
479
41137
    }
480
}
481

            
482
/// A single key/value entry in a [`BudMap`].
483
3
#[derive(Clone, Debug)]
484
struct RawEntry<Key, Value> {
485
    /// The computed hash of `key`.
486
    hash: u64,
487
    /// The entry's key.
488
    key: Key,
489
    /// The entry's value.
490
    value: Value,
491
}
492

            
493
/// A possible entry for a key in a [`BudMap`].
494
pub enum Entry<'a, Key, Value, HashBuilder> {
495
    /// There is an entry for this key that contains a value.
496
    Occupied(OccupiedEntry<'a, Key, Value, HashBuilder>),
497
    /// There is not currently an entry for this key.
498
    Vacant(VacantEntry<'a, Key, Value, HashBuilder>),
499
}
500

            
501
/// An occupied entry for a key in a [`BudMap`].
502
pub struct OccupiedEntry<'a, Key, Value, HashBuilder> {
503
    map: &'a mut BudMap<Key, Value, HashBuilder>,
504
    entry_index: usize,
505
    bin_index: usize,
506
    pointee: Option<usize>,
507
}
508

            
509
impl<'a, Key, Value, HashBuilder> OccupiedEntry<'a, Key, Value, HashBuilder>
510
where
511
    Key: Eq + Hash + std::fmt::Debug,
512
    HashBuilder: BuildHasher,
513
{
514
2
    fn slot(&self) -> &RawEntry<Key, Value> {
515
2
        &self.map.entries[self.entry_index]
516
2
    }
517

            
518
1
    fn slot_mut(&mut self) -> &mut RawEntry<Key, Value> {
519
1
        &mut self.map.entries[self.entry_index]
520
1
    }
521

            
522
    /// Returns the key of this entry.
523
    #[must_use]
524
1
    pub fn key(&self) -> &Key {
525
1
        &self.slot().key
526
1
    }
527

            
528
    /// Returns the value of this entry.
529
    #[must_use]
530
1
    pub fn value(&self) -> &Value {
531
1
        &self.slot().value
532
1
    }
533

            
534
    /// Replaces the contained value, returning the existing value.
535
    #[must_use]
536
1
    pub fn replace(mut self, value: Value) -> Value {
537
1
        std::mem::replace(&mut self.slot_mut().value, value)
538
1
    }
539

            
540
    /// Removes the entry from the containing map, returning the existing value.
541
    #[must_use]
542
1
    pub fn remove(self) -> Value {
543
1
        self.map
544
1
            .remove_inner(self.entry_index, self.bin_index, self.pointee)
545
1
            .value
546
1
    }
547
}
548

            
549
/// An entry for a key that is is not currently part of a [`BudMap`].
550
///
551
/// Because the map has not been modified to create this record, dropping a
552
/// vacant entry will leave the original map untouched.
553
pub struct VacantEntry<'a, Key, Value, HashBuilder> {
554
    map: &'a mut BudMap<Key, Value, HashBuilder>,
555
    key: Key,
556
    hash: u64,
557
}
558

            
559
impl<'a, Key, Value, HashBuilder> VacantEntry<'a, Key, Value, HashBuilder>
560
where
561
    Key: Eq + Hash + std::fmt::Debug,
562
    HashBuilder: BuildHasher,
563
{
564
    /// Inserts `value` into the map for this entry's key.
565
1
    pub fn insert(self, value: Value) {
566
1
        self.map.grow_for_insert();
567
1

            
568
1
        let entry_index = self.map.push_entry(self.hash, self.key, value);
569
1

            
570
1
        insert_into_bin(
571
1
            &mut self.map.bins,
572
1
            &mut self.map.free_collision_head,
573
1
            hash_to_bin(self.hash, self.map.bin_mask),
574
1
            entry_index,
575
1
        );
576
1
    }
577
}
578

            
579
/// A [`BudMap`] iterator that produces borrowed key-value pairs.
580
pub struct Iter<'a, Key, Value> {
581
    order: slice::Iter<'a, RawEntry<Key, Value>>,
582
}
583

            
584
impl<'a, Key, Value> Iterator for Iter<'a, Key, Value> {
585
    type Item = (&'a Key, &'a Value);
586

            
587
61719997
    fn next(&mut self) -> Option<Self::Item> {
588
61719997
        self.order.next().map(|slot| (&slot.key, &slot.value))
589
61719997
    }
590
}
591

            
592
#[inline]
593
#[allow(clippy::cast_possible_truncation)] // Intential truncation
594
75611618
const fn hash_to_bin(hash: u64, bins: BinMask) -> usize {
595
75611618
    hash as usize & bins.0
596
75611618
}
597

            
598
4147
fn next_bucket_size(current_size: usize) -> Option<usize> {
599
4147
    Some(current_size.checked_next_power_of_two()?.max(8))
600
4147
}
601

            
602
#[cfg(test)]
603
mod tests;