1 //! An index-accessed table implementation that avoids duplicate entries.
2 use std::collections::HashMap;
3 use std::hash::Hash;
4 use std::slice;
5
6 /// Collect items into the `table` list, removing duplicates.
7 pub(crate) struct UniqueTable<'entries, T: Eq + Hash> {
8 table: Vec<&'entries T>,
9 map: HashMap<&'entries T, usize>,
10 }
11
12 impl<'entries, T: Eq + Hash> UniqueTable<'entries, T> {
new() -> Self13 pub fn new() -> Self {
14 Self {
15 table: Vec::new(),
16 map: HashMap::new(),
17 }
18 }
19
add(&mut self, entry: &'entries T) -> usize20 pub fn add(&mut self, entry: &'entries T) -> usize {
21 match self.map.get(&entry) {
22 None => {
23 let i = self.table.len();
24 self.table.push(entry);
25 self.map.insert(entry, i);
26 i
27 }
28 Some(&i) => i,
29 }
30 }
31
len(&self) -> usize32 pub fn len(&self) -> usize {
33 self.table.len()
34 }
iter(&self) -> slice::Iter<'_, &'entries T>35 pub fn iter(&self) -> slice::Iter<'_, &'entries T> {
36 self.table.iter()
37 }
38 }
39
40 /// A table of sequences which tries to avoid common subsequences.
41 pub(crate) struct UniqueSeqTable<T: PartialEq + Clone> {
42 table: Vec<T>,
43 }
44
45 impl<T: PartialEq + Clone> UniqueSeqTable<T> {
new() -> Self46 pub fn new() -> Self {
47 Self { table: Vec::new() }
48 }
add(&mut self, values: &[T]) -> usize49 pub fn add(&mut self, values: &[T]) -> usize {
50 if values.is_empty() {
51 return 0;
52 }
53 if let Some(offset) = find_subsequence(values, &self.table) {
54 offset
55 } else {
56 let table_len = self.table.len();
57
58 // Try to put in common the last elements of the table if they're a prefix of the new
59 // sequence.
60 //
61 // We know there wasn't a full match, so the best prefix we can hope to find contains
62 // all the values but the last one.
63 let mut start_from = usize::min(table_len, values.len() - 1);
64 while start_from != 0 {
65 // Loop invariant: start_from <= table_len, so table_len - start_from >= 0.
66 if values[0..start_from] == self.table[table_len - start_from..table_len] {
67 break;
68 }
69 start_from -= 1;
70 }
71
72 self.table
73 .extend(values[start_from..values.len()].iter().cloned());
74 table_len - start_from
75 }
76 }
len(&self) -> usize77 pub fn len(&self) -> usize {
78 self.table.len()
79 }
iter(&self) -> slice::Iter<'_, T>80 pub fn iter(&self) -> slice::Iter<'_, T> {
81 self.table.iter()
82 }
83 }
84
85 /// Try to find the subsequence `sub` in the `whole` sequence. Returns None if
86 /// it's not been found, or Some(index) if it has been. Naive implementation
87 /// until proven we need something better.
find_subsequence<T: PartialEq>(sub: &[T], whole: &[T]) -> Option<usize>88 fn find_subsequence<T: PartialEq>(sub: &[T], whole: &[T]) -> Option<usize> {
89 assert!(!sub.is_empty());
90 // We want i + sub.len() <= whole.len(), i.e. i < whole.len() + 1 - sub.len().
91 if whole.len() < sub.len() {
92 return None;
93 }
94 let max = whole.len() - sub.len();
95 for i in 0..=max {
96 if whole[i..i + sub.len()] == sub[..] {
97 return Some(i);
98 }
99 }
100 None
101 }
102
103 #[test]
test_find_subsequence()104 fn test_find_subsequence() {
105 assert_eq!(find_subsequence(&vec![1], &vec![4]), None);
106 assert_eq!(find_subsequence(&vec![1], &vec![1]), Some(0));
107 assert_eq!(find_subsequence(&vec![1, 2], &vec![1]), None);
108 assert_eq!(find_subsequence(&vec![1, 2], &vec![1, 2]), Some(0));
109 assert_eq!(find_subsequence(&vec![1, 2], &vec![1, 3]), None);
110 assert_eq!(find_subsequence(&vec![1, 2], &vec![0, 1, 2]), Some(1));
111 assert_eq!(find_subsequence(&vec![1, 2], &vec![0, 1, 3, 1]), None);
112 assert_eq!(find_subsequence(&vec![1, 2], &vec![0, 1, 3, 1, 2]), Some(3));
113 assert_eq!(
114 find_subsequence(&vec![1, 1, 3], &vec![1, 1, 1, 3, 3]),
115 Some(1)
116 );
117 }
118
119 #[test]
test_optimal_add()120 fn test_optimal_add() {
121 let mut seq_table = UniqueSeqTable::new();
122 // [0, 1, 2, 3]
123 assert_eq!(seq_table.add(&vec![0, 1, 2, 3]), 0);
124 assert_eq!(seq_table.add(&vec![0, 1, 2, 3]), 0);
125 assert_eq!(seq_table.add(&vec![1, 2, 3]), 1);
126 assert_eq!(seq_table.add(&vec![2, 3]), 2);
127 assert_eq!(seq_table.len(), 4);
128 // [0, 1, 2, 3, 4]
129 assert_eq!(seq_table.add(&vec![2, 3, 4]), 2);
130 assert_eq!(seq_table.len(), 5);
131 // [0, 1, 2, 3, 4, 6, 5, 7]
132 assert_eq!(seq_table.add(&vec![4, 6, 5, 7]), 4);
133 assert_eq!(seq_table.len(), 8);
134 // [0, 1, 2, 3, 4, 6, 5, 7, 8, 2, 3, 4]
135 assert_eq!(seq_table.add(&vec![8, 2, 3, 4]), 8);
136 assert_eq!(seq_table.add(&vec![8]), 8);
137 assert_eq!(seq_table.len(), 12);
138 }
139