Issues (718)

shards/mishards/hash_ring.py (1 issue)

Severity
1
import math
2
import sys
3
from bisect import bisect
4
5
if sys.version_info >= (2, 5):
6
    import hashlib
7
    md5_constructor = hashlib.md5
8
else:
9
    import md5
10
    md5_constructor = md5.new
11
12
13
class HashRing(object):
14
    def __init__(self, nodes=None, weights=None):
15
        """`nodes` is a list of objects that have a proper __str__ representation.
16
        `weights` is dictionary that sets weights to the nodes.  The default
17
        weight is that all nodes are equal.
18
        """
19
        self.ring = dict()
20
        self._sorted_keys = []
21
22
        self.nodes = nodes
23
24
        if not weights:
25
            weights = {}
26
        self.weights = weights
27
28
        self._generate_circle()
29
30
    def _generate_circle(self):
31
        """Generates the circle.
32
        """
33
        total_weight = 0
34
        for node in self.nodes:
35
            total_weight += self.weights.get(node, 1)
36
37
        for node in self.nodes:
38
            weight = 1
39
40
            if node in self.weights:
41
                weight = self.weights.get(node)
42
43
            factor = math.floor((40 * len(self.nodes) * weight) / total_weight)
44
45
            for j in range(0, int(factor)):
46
                b_key = self._hash_digest('%s-%s' % (node, j))
47
48
                for i in range(0, 3):
49
                    key = self._hash_val(b_key, lambda x: x + i * 4)
0 ignored issues
show
The variable i does not seem to be defined for all execution paths.
Loading history...
50
                    self.ring[key] = node
51
                    self._sorted_keys.append(key)
52
53
        self._sorted_keys.sort()
54
55
    def get_node(self, string_key):
56
        """Given a string key a corresponding node in the hash ring is returned.
57
58
        If the hash ring is empty, `None` is returned.
59
        """
60
        pos = self.get_node_pos(string_key)
61
        if pos is None:
62
            return None
63
        return self.ring[self._sorted_keys[pos]]
64
65
    def get_node_pos(self, string_key):
66
        """Given a string key a corresponding node in the hash ring is returned
67
        along with it's position in the ring.
68
69
        If the hash ring is empty, (`None`, `None`) is returned.
70
        """
71
        if not self.ring:
72
            return None
73
74
        key = self.gen_key(string_key)
75
76
        nodes = self._sorted_keys
77
        pos = bisect(nodes, key)
78
79
        if pos == len(nodes):
80
            return 0
81
        else:
82
            return pos
83
84
    def iterate_nodes(self, string_key, distinct=True):
85
        """Given a string key it returns the nodes as a generator that can hold the key.
86
87
        The generator iterates one time through the ring
88
        starting at the correct position.
89
90
        if `distinct` is set, then the nodes returned will be unique,
91
        i.e. no virtual copies will be returned.
92
        """
93
        if not self.ring:
94
            yield None, None
95
96
        returned_values = set()
97
98
        def distinct_filter(value):
99
            if str(value) not in returned_values:
100
                returned_values.add(str(value))
101
                return value
102
103
        pos = self.get_node_pos(string_key)
104
        for key in self._sorted_keys[pos:]:
105
            val = distinct_filter(self.ring[key])
106
            if val:
107
                yield val
108
109
        for i, key in enumerate(self._sorted_keys):
110
            if i < pos:
111
                val = distinct_filter(self.ring[key])
112
                if val:
113
                    yield val
114
115
    def gen_key(self, key):
116
        """Given a string key it returns a long value,
117
        this long value represents a place on the hash ring.
118
119
        md5 is currently used because it mixes well.
120
        """
121
        b_key = self._hash_digest(key)
122
        return self._hash_val(b_key, lambda x: x)
123
124
    def _hash_val(self, b_key, entry_fn):
125
        return (b_key[entry_fn(3)] << 24) | (b_key[entry_fn(2)] << 16) | (
126
            b_key[entry_fn(1)] << 8) | b_key[entry_fn(0)]
127
128
    def _hash_digest(self, key):
129
        m = md5_constructor()
130
        key = key.encode()
131
        m.update(key)
132
        return m.digest()
133
134
135
if __name__ == '__main__':
136
    from collections import defaultdict
137
    servers = [
138
        '192.168.0.246:11212', '192.168.0.247:11212', '192.168.0.248:11212',
139
        '192.168.0.249:11212'
140
    ]
141
142
    ring = HashRing(servers)
143
    keys = ['{}'.format(i) for i in range(100)]
144
    mapped = defaultdict(list)
145
    for k in keys:
146
        server = ring.get_node(k)
147
        mapped[server].append(k)
148
149
    for k, v in mapped.items():
150
        print(k, v)
151