0%

算法基础 2 - 集合 Set Interface

定义

集合用于维护数据的内部属性,通常每个项目都具有唯一的键值(key)。集合接口是字典和数据库的一般化表示。

接口

数据结构


数组(Array)

  • 面向序列的数据结构在集合上的应用
  • 只能一个一个元素的查询再操作,效率较低
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def Set_from_Seq(seq):
class set_from_seq:
def __init__(self): self.S = seq()
def __len__(self): return len(self.S)
def __iter__(self): yield from self.S

def build(self, A):
self.S.build(A)

def insert(self, x):
for i in range(len(self.S)):
if self.S.get_at(i+1).key == x.key:
self.S.set_at(i+1, x)
return False
self.S.insert_last(x)
return True

def delete(self, k):
for i in range(len(self.S)):
if self.S.get_at(i+1).key == k:
return self.S.delete_at(i+1)
return None

def find(self, k):
for x in self:
if x.key == k:
return x
return None

def find_min(self):
out = None
for x in self:
if (out is None) or (x.key < out .key):
out = x
return out

def find_max(self):
out = None
for x in self:
if (out is None) or (x.key > out.key):
out = x
return out

def find_next(self, k):
out = None
for x in self:
if x.key > k:
if (out is None) or (x.key < out.key):
out = x
return out

def find_prev(self, k):
out = None
for x in self:
if x.key < k:
if (out is None) or (x.key > out.key):
out = x
return out

def iter_ord(self):
x = self.find_min()
while x:
yield x
x = self.find_next(x.key)

return set_from_seq

排序数组(Sorted Array)

  • 构建数组时根据键值排序,查找时根据排序定位(二分查找)
  • 插入和删除操作效率较低(需要重新分配空间)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
class Set_Item:
def __init__(self):
self.index = None
self.key = None

class Sorted_Array_Set:
def __init__(self): self.A = Array_Seq() # O(1)
def __len__(self): return len(self.A) # O(1)
def __iter__(self): yield from self.A # O(n)
def iter_order(self): yield from self

def build(self, X):
self.A.build(X)
self._sort()

def _sort(self): # bubble sort
for i in range(1, len(self.A)):
j = i
while j > 0 and self.A.get_at(j).key < self.A.get_at(j-1).key:
temp = self.A.get_at(j-1)
self.A.set_at(j-1, self.A.get_at(j))
self.A.set_at(j, temp)
j -= 1

def _binary_search(self, k, i, j):
if i >= j:
return i
m = (i + j)//2
x = self.A.get_at(m)
if x.key > k:
return self._binary_search(k, i, m-1)
if x.key < k:
return self._binary_search(k, m+1, j)
return m

def find_min(self):
if len(self.A) > 0:
return self.A.get_at(0)
else:
return None

def find_max(self):
if len(self.A) > 0:
return self.A.get_at(len(self.A)-1)
else:
return None

def find(self, k):
if len(self.A) == 0:
return None
i = self._binary_search(k, 0, len(self.A)-1)
x = self.A.get_at(i)
if x.key == k:
return x
else:
return None

def find_next(self, k):
if len(self.A) == 0:
return None
i = self._binary_search(k, 0, len(self.A)-1)
x = self.A.get_at(i)
if x.key > k:
return x
if i+1 < len(self.A):
return self.A.get_at(i+1)
else:
return None

def find_prev(self, k):
if len(self.A) == 0:
return None
i = self._binary_search(k, 0, len(self.A)-1)
x = self.A.get_at(i)
if x.key < k:
return x
if i > 0:
return self.A.get_at(i-1)
else:
return None

def insert(self, x):
if len(self.A) == 0:
self.A.insert_first(x)
else:
i = self._binary_search(x.key, 0, len(self.A)-1)
k = self.A.get_at(i).key
if k == x.key:
self.A.set_at(i, x)
return False
if k > x.key:
self.A.insert_at(i+1, x)
else:
self.A.insert_at(i+2, x)
return True

def delete(self, k):
i = self._binary_search(k, 0, len(self.A)-1)
if self.A.get_at(i).key == k:
return self.A.delete_at(i+1)
else:
return False

直接寻址表(Direct Access Arrays)

  • 将元素的键值映射到计算机的内存地址,直接地址来访问键值
  • 该数据结构用空间换取快速的查询时间,但在键值取值范围较大时浪费存储空间,同时find_prevfind_next等操作效率较低
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
class DirectAccessArray:
def __init__(self, u): self.A = [None] * u
def find(self, k): return self.A[k]
def insert(self, x): self.A[x.key] = x
def delete(self, k): self.A[k] = None
def find_next(self, k):
for i in range(k, len(self.A)):
if self.A[i] is not None:
return self.A[i]
def find_max(self):
for i in range(len(self.A)-1, -1, -1):
if self.A[i] is not None:
return self.A[i]
def delete_max(self):
for i in range(len(self.A)-1, -1, -1):
if self.A[i] is not None:
x = self.A[i]
self.A[i] = None
return x

哈希表(Hashing)

  • 将元素的键值映射一个更小的空间上,再映射到直接访问数组
  • 前一个映射函数称为哈希函数,这个更小的空间称为哈希表
  • 当两个键值映射到相同的哈希值时,会发生冲突(collide),此时可以将冲突存储在直接访问数组的其他地方(开放寻址,open addressing),或另找一些地方来存储(链接,chaining)
  • 通用哈希函数:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
class Hash_Table_Set:
def __init__(self, r=200):
self.chain_set = Set_from_Seq(Linked_list_Seq)
self.A = []
self.size = 0
self.r = r
self.p = 2**31 - 1
self.a = random.randint(1, self.p - 1)
self._compute_bounds()
self._resize(0)

def __len__(self): return self.size

def __iter__(self):
for X in self.A:
yield from X

def build(self, X):
for x in X:
self.insert(x)

def _hash(self, k, m):
return ((self.a * k) % self.p) % m

def _compute_bounds(self):
self.upper = len(self.A)
self.lower = len(self.A) * 100*100 // (self.r * self.r)

def _resize(self, n):
if (self.lower >= n) or (n >= self.upper):
f = self.r // 100
if self.r % 100:
f += 1
m = max(n, 1) * f
A = [self.chain_set() for _ in range(m)]
for x in self:
h = self._hash(x.key, m)
A[h].insert(x)
self.A = A
self._compute_bounds()

def find(self, k):
h = self._hash(k, len(self.A))
return self.A[h].find(k)

def insert(self, x):
self._resize(self.size + 1)
h = self._hash(x.key, len(self.A))
added = self.A[h].insert(x)
if added:
self.size += 1
return added

def delete(self, k):
if len(self) <= 0:
return False
h = self._hash(k, len(self.A))
x = self.A[h].delete(k)
if x is not None:
self.size -= 1
self._resize(self.size)
return x

def find_min(self):
out = None
for x in self:
if (out is None) or (x.key < out.key):
out = x
return out

def find_max(self):
out = None
for x in self:
if (out is None) or (x.key > out.key):
out = x
return out

def find_next(self, k):
out = None
for x in self:
if x.key > k:
if (out is None) or (x.key < out.key):
out = x
return out

def find_prev(self, k):
out = None
for x in self:
if x.key < k:
if (out is None) or (x.key > out.key):
out = x
return out

def iter_order(self):
x = self.find_min()
while x:
yield x
x = self.find_next(x.key)

二叉树(AVL树)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
def height(A):
if A:
return A.height
else:
return -1

class Binary_Node:
def __init__(A, x):
A.item = x
A.left = None
A.right = None
A.parent = None
A.subtree_update()

def subtree_update(A):
A.height = 1 + max(height(A.left), height(A.right))

def skew(A):
return height(A.right) - height(A.left)

def subtree_iter(A):
if A.left:
yield from A.left.subtree_iter()
yield A
if A.right:
yield from A.right.subtree_iter()

def subtree_first(A):
if A.left:
return A.left.subtree_first()
else:
return A

def subtree_last(A):
if A.right:
return A.right.subtree_last()
else:
return A

def successor(A):
if A.right:
return A.right.subtree_first()
while A.parent and (A is A.parent.right):
A = A.parent
return A.parent

def predecessor(A):
if A.left:
return A.left.subtree_last()
while A.parent and (A is A.parent.left):
A = A.parent
return A.parent

def subtree_rotate_right(D):
if D.left:
B, E = D.left, D.right
A, C = B.left, B.right
D, B = B, D
D.item, B.item = B.item, D.item
B.left, B.right = A, D
D.left, D.right = C, E
if A:
A.parent = B
if E:
E.parent = D

def subtree_rotate_left(B):
if B.right:
A, D = B.left, B.right
C, E = D.left, D.right
B, D = D, B
B.item, D.item = D.item, B.item
D.left, D.right = B, E
B.left, B.right = A, C
if A:
A.parent = B
if E:
E.parent = D

def rebalance(A):
if A.skew == 2:
if A.right.skew() < 0:
A.right.subtree_rotate_right()
A.subtree_rotate_left()
elif A.skew() == -2:
if A.left.skew() > 0:
A.left.subtree_rotate_left()
A.subtree_rotate_right()

def maintain(A):
A.rebalance()
A.subtree_update()
if A.parent:
A.parent.maintain()

def subtree_insert_before(A, B):
if A.left:
A = A.left.subtree_last()
A.right, B.parent = B, A
else:
A.left, B.parent = B, A
A.maintain()

def subtree_insert_after(A, B):
if A.right:
A = A.right.subtree_first()
A.left, B.parent = B, A
else:
A.right, B.parent = B, A
A.maintain()

def subtree_delete(A):
if A.left or A.right:
if A.left:
B = A.predecessor()
else:
B = A.successor()
A.item, B.item = B.item, A.item
return B.subtree_delete()
if A.parent:
if A is A.parent.left:
A.parent.left = None
else:
A.parent.right = None
A.maintain()
return A

class BST_Node(Binary_Node):
def subtree_find(A, k):
if k < A.item.key:
if A.left:
return A.left.subtree_find(k)
elif k > A.item.key:
if A.right:
return A.right.subtree_find(k)
else:
return A
return None

def subtree_find_next(A, k):
if A.item <= k:
if A.right:
return A.right.subtree_find_next(k)
else:
return None
elif A.left:
B = A.left.subtree_find_next(k)
if B:
return B
return A

def subtree_find_prev(A, k):
if A.item >= k:
if A.left:
return A.left.subtree_find_prev(k)
else:
return None
elif A.right:
B = A.right.subtree_find_prev(k)
if B:
return B
return A

def subtree_insert(A, B):
if B.item < A.item:
if A.left:
A.left.subtree_insert(B)
else:
A.subtree_insert_before(B)
elif B.item > A.item:
if A.right:
A.right.subtree_insert(B)
else:
A.subtree_insert_after(B)
else:
A.item = B.item

class Binary_Tree:
def __init__(T, Node_Type=Binary_Node):
T.root = None
T.size = 0
T.Node_Type = Node_Type

def __len__(T): return T.size

def __iter__(T):
if T.root:
for A in T.root.subtree_iter():
yield A.item

class Set_Binary_Set(Binary_Tree):
def __init__(self):
super().__init__(BST_Node)

def iter_order(self): yield from self

def build(self, X):
for x in X:
self.insert(x)

def find_min(self):
if self.root:
return self.root.subtree_first().item

def find_max(self):
if self.root:
return self.root.subtree_last().item

def find(self, k):
if self.root:
node = self.root.subtree_find(k)
if node:
return node.item

def find_next(self, k):
if self.root:
node = self.root.subtree_find_next(k)
if node:
return node.item

def find_prev(self, k):
if self.root:
node = self.root.subtree_find_prev(k)
if node:
return node.item

def insert(self, x):
new_node = self.Node_Type(x)
if self.root:
self.root.subtree_insert(new_node)
if new_node.parent is None:
return False
else:
self.root = new_node
self.size += 1
return True

def delete(self, k):
if self.root:
node = self.root.subtree_find(k)
if node:
ext = node.subtree_delete()
if ext.parent is None:
self.root = None
self.size -= 1
return ext.item