5. Sets and Maps

In the last chapter we studied sequences which are used to keep track of lists of things where duplicate values are allowed. For instance, there can be two sixes in a sequence or list of integers. In this chapter we look at sets where duplicate values are not allowed. After examining sets we’ll move on to talk about maps. Maps may also be called dictionaries or hash tables.

The term hash table actually suggests an implementation of a set or map. The primary focus of this chapter is in understanding hashing. Hashing is a very important concept in Computer Science because it is a very efficient method of searching for a value. To begin the chapter we’ll motivate our interest in hashing, then we’ll develop a hashing algorithm for finding values in a set. We’ll also apply hashing to the building of sets and maps. Then we’ll look at an important technique that uses hashing called memoization and we’ll apply that technique to a couple of problems.

5.1. The HashSet Datatype

You can download HashSet datatype implementation here. The implementation is partial. The remainder of the set implementation is left as an exercise for the reader.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
#################################################################################
# HashSet.py - A hashtable implementation of a set datatype. 
#################################################################################
# To complete this implementation you must complete the code for all methods that 
# currently have a "pass" in them. Consult the documentation for these methods
# to see what they should return. Many of these methods can be implemented
# by calling other supporting methods, so make sure you don't write more 
# code than necessary. Many methods take exactly one line of code to implement.
# The test main function at the bottom should completely run passing all tests
# once you have implemented the methods here. The test main function is not
# intended to completely test the entire class though. You have to write
# additional tests to throroughly test the HashSet class. 

class HashSet:
    class __Placeholder:
        def __init__(self):
            pass
        
        def __eq__(self,other):
            return False
        
    def __add(item,items):
        idx = hash(item) % len(items)
        loc = -1
        
        while items[idx] != None:
            if items[idx] == item:
                # item already in set
                return False
            
            if loc < 0 and type(items[idx]) == HashSet.__Placeholder:
                loc = idx
                
            idx = (idx + 1) % len(items)
            
        if loc < 0:
            loc = idx
            
        items[loc] = item  
        
        return True
    
    def __remove(item,items):
        idx = hash(item) % len(items)
        
        while items[idx] != None:
            if items[idx] == item:
                nextIdx = (idx + 1) % len(items)
                if items[nextIdx] == None:
                    items[idx] = None
                else:
                    items[idx] = HashSet.__Placeholder()
                return True
            
            idx = (idx + 1) % len(items)
            
        return False
        
    def __rehash(oldList, newList):
        for x in oldList:
            if x != None and type(x) != HashSet.__Placeholder:
                HashSet.__add(x,newList)
                
        return newList
    
    def __init__(self,contents=[]):
        self.items = [None] * 10
        self.numItems = 0
        
        for item in contents:
            self.add(item)
          
    def __str__(self):
        pass
    
    def __iter__(self):
        for i in range(len(self.items)):
            if self.items[i] != None and type(self.items[i]) != HashSet.__Placeholder:
                yield self.items[i]    
    
    # Following are the mutator set methods 
    def add(self, item):
        if HashSet.__add(item,self.items):
            self.numItems += 1             
            load = self.numItems / len(self.items)
            if load >= 0.75:
                self.items = HashSet.__rehash(self.items,[None]*2*len(self.items))
    def remove(self, item):
        if HashSet.__remove(item,self.items):
            self.numItems -= 1
            load = max(self.numItems, 10) / len(self.items)
            if load <= 0.25:
                self.items = HashSet.__rehash(self.items,[None]*int(len(self.items)/2))
        else:
            raise KeyError("Item not in HashSet")
        
    def discard(self, item):
        pass
        
    def pop(self):
        pass
            
    def clear(self):
        pass
        
    def update(self, other):
        pass
            
    def intersection_update(self, other):
        pass
            
    def difference_update(self, other):
        for item in other:
            self.discard(item)
                
    def symmetric_difference_update(self, other):
        pass
                
    # Following are the accessor methods for the HashSet  
    def __len__(self):
        pass
    
    def __contains__(self, item):
        idx = hash(item) % len(self.items)
        while self.items[idx] != None:
            if self.items[idx] == item:
                return True
            
            idx = (idx + 1) % len(self.items)
            
        return False
    
    # One extra method for use with the HashMap class. This method is not needed in the 
    # HashSet implementation, but it is used by the HashMap implementation. 
    def __getitem__(self, item):
        pass      
        
    def not__contains__(self, item):
        pass
    
    def isdisjoint(self, other):
        pass
    
    
    def issubset(self, other):
        pass
            
    
    def issuperset(self, other):
        pass
    
    def union(self, other):
        pass
    
    def intersection(self, other):
        pass
    #done
    def difference(self, other):
        pass
    
    def symmetric_difference(self, other):
        pass
    
    def copy(self):
        pass
    
    # Operator Definitions
    def __or__(self, other):
        pass
    
    def __and__(self,other):
        pass
    
    def __sub__(self,other):
        pass
    
    def __xor__(self,other):
        pass
    
    def __ior__(self,other):
        pass
    
    def __iand__(self,other):
        pass
    
    def __ixor(self,other):
        pass    
    
    def __le__(self,other):
        pass
    
    def __lt__(self,other):
        pass
    
    def __ge__(self,other):
        pass
    
    def __gt__(self,other):
        pass
    
    def __eq__(self,other):
        pass
    
    

def main():
    s = HashSet(list(range(100)))
    
    t = HashSet(list(range(10,20)))
    
    u = HashSet(list(range(10,20)))
    
    if len(t) == len(u) and len(t) == 10:
        print("Test 1 Passed")
    else:
        print("Test 1 Failed")
        
    s.intersection_update(t)
    
    if len(s) == 10:
        print("Test 2 Passed")
    else:
        print("Test 2 Failed")
        
    s = HashSet(list(range(100)))
    
    t.update(s)
    
    if len(s) == len(t):
        print("Test 3 Passed")
    else:
        print("Test 3 Failed")
        
    t.clear()
    t.update(u)
    
    if len(t) == len(u):
        print("Test 4 Passed")
    else:
        print("Test 4 Failed")
        
    s.difference_update(t)
    
    test5Passed = True
    test6Passed = True
    
    for x in range(1,10):
        if x in s:
            pass
        else:
            test5Passed = False
            print("Test 5 Failed on",x)
            
        if x not in s:
            test6Passed = False
            print("Test 6 Failed on",x)
            
    if test5Passed:
        print("Test 5 Passed")
    
    if test6Passed:
        print("Test 6 Passed")
        

    test7Passed = True
    test8Passed = True
    
    for x in range(20,100):
        if x in s:
            pass
        else:
            test7Passed = False
            print("Test 7 Failed on",x)
            
        if x not in s:
            test8Passed = False
            print("Test 8 Failed on",x)
            
    if test7Passed:
        print("Test 7 Passed")
    
    if test8Passed:
        print("Test 8 Passed")   
        
    x = HashSet(["a","b","c","d","e","f","g","h","i","j","k"])
    
    y = HashSet(["c","d","e","l","m","n"])
    
    z = x.difference(y)
    
    if len(z) == 8:
        print("Test 9 Passed")
    else:
        print("Test 9 Failed")
        
    test10Passed = True
    
    for item in z:
        if item not in ["a","b","f","g","h","i","j","k"]:
            test10Passed = False
            print("Test 10 Failed on", x)
            
    if test10Passed:
        print("Test 10 Passed")
        
    if z.issubset(x):
        print("Test 11 Passed")
    else:
        print("Test 11 Failed")
        
    if x.issuperset(z):
        print("Test 12 Passed")
    else:
        print("Test 12 Failed")
        
    if z == y:
        print("Test 13 Failed")
    else:
        print("Test 13 Passed")
        
    if z == z:
        print("Test 14 Passed")
    else:
        print("Test 14 Failed")
        
    r = z.copy()
    
    if r == z:
        print("Test 15 Passed")
    else:
        print("Test 15 Failed")
    
    for item in range(50):
        z.add(item)
        
    for item in range(50):
        z.discard(item)
        
    if r == z:
        print("Test 16 Passed")
    else:
        print("Test 16 Failed")    
        
    for item in range(50):
        z.add(item)
        
    for item in range(50):
        z.remove(item)  
    
    if r == z:
        print("Test 17 Passed")
    else:
        print("Test 17 Failed")    
   
    
if __name__ == "__main__":
    main()
    
    
                
        
        
        
        

5.2. The HashMap Datatype

You can download HashMap datatype implementation here. The implementation is partial. The remainder of the set implementation is left as an exercise for the reader. The HashMap datatype requires a module called hashset.py containing the HashSet datatype.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import hashset

class HashMap:
    class __KVPair:
        def __init__(self,key,value):
            self.key = key
            self.value = value
            
        def __eq__(self,other):
            if type(self) != type(other):
                return False
            
            return self.key == other.key
        
        def getKey(self):
            return self.key
        
        def getValue(self):
            return self.value
        
        def __hash__(self):
            return hash(self.key)        
        
    def __init__(self):
        self.hSet = hashset.HashSet()
        
    def __len__(self):
        return len(self.hSet)
    
    def __contains__(self,item):
        return HashMap.__KVPair(item,None) in self.hSet
    
    def not__contains__(self,item):
        return item not in self.hSet
    
    def __setitem__(self,key,value):
        self.hSet.add(HashMap.__KVPair(key,value))
        
    def __getitem__(self,key):
        if HashMap.__KVPair(key,None) in self.hSet:
            val = self.hSet[HashMap.__KVPair(key,None)].getValue()
            return val

        raise KeyError("Key " + str(key) + " not in HashMap")        
    
    def get(self,key,default=None):
        if HashMap.__KVPair(key,None) in self.hSet:
            return self.hSet[HashMap.__KVPair(key,None)].getValue()
        else:
            return default
        
    def __delitem__(self,key):
        if HashMap.__KVPair(key,None) in self.hSet:
            self.hSet.remove(key)
        else:
            raise KeyError("Key " + key + " not in HashMap")
        
    def items(self):
        result = []
        for x in self.hSet:
            result.append((x.getKey(),x.getValue()))
        return result
    
    def keys(self):
        result = []
        for x in self.hSet:
            result.append(x.getKey())
        return result    
    
    def values(self):
        result = []
        for x in self.hSet:
            result.append(x.getValue())
        return result   
    
    def pop(self, key):
        if HashMap.__KVPair(key,None) in self.hSet:
            item = self.hSet[key]   
            return item.getValue()
        else:
            raise KeyError("Key " + key + " not in HashMap")
        
    def popitem(self):
        item = self.hSet.pop()
        return (item.getKey(),item.getValue())
    
    def setdefault(self):
        pass
    
    def update(self,other):
        pass
    
    def clear(self):
        pass
    
    def copy(self):
        pass
    
    def __iter__(self):
        for x in self.hSet:
            yield x.getKey()
    
def main():
    d = HashMap()
    print(len(d))
    d["dog"] = "cat"
    d["batman"] = "joker"
    d["superman"] = "lex luther"
    for key in d:
        print(key)
    
    for key in d:
        print(key,d[key]) 
        
    d["dog"] = "skunk"
    
    print(d.popitem())
    
    for key in d:
        print(key,d[key])
        
if __name__ == "__main__":
    main()

5.3. Sudoku Puzzles

Here are six sudoku puzzles that can be solved using the sudoku solver rules that are discussed in this chapter.