Completed
Push — master ( 8f71e0...468202 )
by Roy
01:20
created

pyspider.libs.CounterManager.to_dict()   A

Complexity

Conditions 4

Size

Total Lines 12

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 4
dl 0
loc 12
rs 9.2
1
#!/usr/bin/env python
2
# -*- encoding: utf-8 -*-
3
# vim: set et sw=4 ts=4 sts=4 ff=unix fenc=utf8:
4
# Author: Binux<[email protected]>
5
#         http://binux.me
6
# Created on 2012-11-14 17:09:50
7
8
from __future__ import unicode_literals, division, absolute_import
9
10
import time
11
import logging
12
from collections import deque
13
try:
14
    from UserDict import DictMixin
15
except ImportError:
16
    from collections import Mapping as DictMixin
17
18
import six
19
from six import iteritems
20
from six.moves import cPickle
21
22
23
class BaseCounter(object):
24
25
    def __init__(self):
26
        raise NotImplementedError
27
28
    def event(self, value=1):
29
        """Fire a event."""
30
        raise NotImplementedError
31
32
    def value(self, value):
33
        """Set counter value."""
34
        raise NotImplementedError
35
36
    @property
37
    def avg(self):
38
        """Get average value"""
39
        raise NotImplementedError
40
41
    @property
42
    def sum(self):
43
        """Get sum of counter"""
44
        raise NotImplementedError
45
46
    def empty(self):
47
        """Clear counter"""
48
        raise NotImplementedError
49
50
51
class TotalCounter(BaseCounter):
52
    """Total counter"""
53
54
    def __init__(self):
55
        self.cnt = 0
56
57
    def event(self, value=1):
58
        self.cnt += value
59
60
    def value(self, value):
61
        self.cnt = value
62
63
    @property
64
    def avg(self):
65
        return self.cnt
66
67
    @property
68
    def sum(self):
69
        return self.cnt
70
71
    def empty(self):
72
        return self.cnt == 0
73
74
75
class AverageWindowCounter(BaseCounter):
76
    """
77
    Record last N(window) value
78
    """
79
80
    def __init__(self, window_size=300):
81
        self.window_size = window_size
82
        self.values = deque(maxlen=window_size)
83
84
    def event(self, value=1):
85
        self.values.append(value)
86
87
    value = event
88
89
    @property
90
    def avg(self):
91
        return self.sum / len(self.values)
92
93
    @property
94
    def sum(self):
95
        return sum(self.values)
96
97
    def empty(self):
98
        if not self.values:
99
            return True
100
101
102
class TimebaseAverageEventCounter(BaseCounter):
103
    """
104
    Record last window_size * window_interval seconds event.
105
106
    records will trim ever window_interval seconds
107
    """
108
109
    def __init__(self, window_size=30, window_interval=10):
110
        self.max_window_size = window_size
111
        self.window_size = 0
112
        self.window_interval = window_interval
113
        self.values = deque(maxlen=window_size)
114
        self.events = deque(maxlen=window_size)
115
        self.times = deque(maxlen=window_size)
116
117
        self.cache_value = 0
118
        self.cache_event = 0
119
        self.cache_start = None
120
        self._first_data_time = None
121
122
    def event(self, value=1):
123
        now = time.time()
124
        if self._first_data_time is None:
125
            self._first_data_time = now
126
127
        if self.cache_start is None:
128
            self.cache_value = value
129
            self.cache_event = 1
130
            self.cache_start = now
131
        elif now - self.cache_start > self.window_interval:
132
            self.values.append(self.cache_value)
133
            self.events.append(self.cache_event)
134
            self.times.append(self.cache_start)
135
            self.on_append(self.cache_value, self.cache_start)
136
            self.cache_value = value
137
            self.cache_event = 1
138
            self.cache_start = now
139
        else:
140
            self.cache_value += value
141
            self.cache_event += 1
142
        return self
143
144
    def value(self, value):
145
        self.cache_value = value
146
147
    def _trim_window(self):
148
        now = time.time()
149
        if self.cache_start and now - self.cache_start > self.window_interval:
150
            self.values.append(self.cache_value)
151
            self.events.append(self.cache_event)
152
            self.times.append(self.cache_start)
153
            self.on_append(self.cache_value, self.cache_start)
154
            self.cache_value = 0
155
            self.cache_start = None
156
157
        if self.window_size != self.max_window_size and self._first_data_time is not None:
158
            time_passed = now - self._first_data_time
159
            self.window_size = min(self.max_window_size, time_passed / self.window_interval)
160
        window_limit = now - self.window_size * self.window_interval
161
        while self.times and self.times[0] < window_limit:
162
            self.times.popleft()
163
            self.events.popleft()
164
            self.values.popleft()
165
166
    @property
167
    def avg(self):
168
        events = (sum(self.events) + self.cache_event)
169
        if not events:
170
            return 0
171
        return float(self.sum) / events
172
173
    @property
174
    def sum(self):
175
        self._trim_window()
176
        return sum(self.values) + self.cache_value
177
178
    def empty(self):
179
        self._trim_window()
180
        if not self.values and not self.cache_start:
181
            return True
182
183
    def on_append(self, value, time):
184
        pass
185
186
187
class TimebaseAverageWindowCounter(BaseCounter):
188
    """
189
    Record last window_size * window_interval seconds values.
190
191
    records will trim ever window_interval seconds
192
    """
193
194
    def __init__(self, window_size=30, window_interval=10):
195
        self.max_window_size = window_size
196
        self.window_size = 0
197
        self.window_interval = window_interval
198
        self.values = deque(maxlen=window_size)
199
        self.times = deque(maxlen=window_size)
200
201
        self.cache_value = 0
202
        self.cache_start = None
203
        self._first_data_time = None
204
205
    def event(self, value=1):
206
        now = time.time()
207
        if self._first_data_time is None:
208
            self._first_data_time = now
209
210
        if self.cache_start is None:
211
            self.cache_value = value
212
            self.cache_start = now
213
        elif now - self.cache_start > self.window_interval:
214
            self.values.append(self.cache_value)
215
            self.times.append(self.cache_start)
216
            self.on_append(self.cache_value, self.cache_start)
217
            self.cache_value = value
218
            self.cache_start = now
219
        else:
220
            self.cache_value += value
221
        return self
222
223
    def value(self, value):
224
        self.cache_value = value
225
226
    def _trim_window(self):
227
        now = time.time()
228
        if self.cache_start and now - self.cache_start > self.window_interval:
229
            self.values.append(self.cache_value)
230
            self.times.append(self.cache_start)
231
            self.on_append(self.cache_value, self.cache_start)
232
            self.cache_value = 0
233
            self.cache_start = None
234
235
        if self.window_size != self.max_window_size and self._first_data_time is not None:
236
            time_passed = now - self._first_data_time
237
            self.window_size = min(self.max_window_size, time_passed / self.window_interval)
238
        window_limit = now - self.window_size * self.window_interval
239
        while self.times and self.times[0] < window_limit:
240
            self.times.popleft()
241
            self.values.popleft()
242
243
    @property
244
    def avg(self):
245
        sum = float(self.sum)
246
        if not self.window_size:
247
            return 0
248
        return sum / self.window_size / self.window_interval
249
250
    @property
251
    def sum(self):
252
        self._trim_window()
253
        return sum(self.values) + self.cache_value
254
255
    def empty(self):
256
        self._trim_window()
257
        if not self.values and not self.cache_start:
258
            return True
259
260
    def on_append(self, value, time):
261
        pass
262
263
264
class CounterValue(DictMixin):
265
    """
266
    A dict like value item for CounterManager.
267
    """
268
269
    def __init__(self, manager, keys):
270
        self.manager = manager
271
        self._keys = keys
272
273
    def __getitem__(self, key):
274
        if key == '__value__':
275
            key = self._keys
276
            return self.manager.counters[key]
277
        else:
278
            key = self._keys + (key, )
279
280
        available_keys = []
281
        for _key in self.manager.counters:
282
            if _key[:len(key)] == key:
283
                available_keys.append(_key)
284
285
        if len(available_keys) == 0:
286
            raise KeyError
287
        elif len(available_keys) == 1:
288
            if available_keys[0] == key:
289
                return self.manager.counters[key]
290
            else:
291
                return CounterValue(self.manager, key)
292
        else:
293
            return CounterValue(self.manager, key)
294
295
    def __len__(self):
296
        return len(self.keys())
297
298
    def __iter__(self):
299
        return iter(self.keys())
300
301
    def __contains__(self, key):
302
        return key in self.keys()
303
304
    def keys(self):
305
        result = set()
306
        for key in self.manager.counters:
307
            if key[:len(self._keys)] == self._keys:
308
                key = key[len(self._keys):]
309
                result.add(key[0] if key else '__value__')
310
        return result
311
312
    def to_dict(self, get_value=None):
313
        """Dump counters as a dict"""
314
        result = {}
315
        for key, value in iteritems(self):
316
            if isinstance(value, BaseCounter):
317
                if get_value is not None:
318
                    value = getattr(value, get_value)
319
                result[key] = value
320
            else:
321
                result[key] = value.to_dict(get_value)
322
        return result
323
324
325
class CounterManager(DictMixin):
326
    """
327
    A dict like counter manager.
328
329
    When using a tuple as event key, say: ('foo', 'bar'), You can visite counter
330
    with manager['foo']['bar'].  Or get all counters which first element is 'foo'
331
    by manager['foo'].
332
333
    It's useful for a group of counters.
334
    """
335
336
    def __init__(self, cls=TimebaseAverageWindowCounter):
337
        """init manager with Counter cls"""
338
        self.cls = cls
339
        self.counters = {}
340
341
    def event(self, key, value=1):
342
        """Fire a event of a counter by counter key"""
343
        if isinstance(key, six.string_types):
344
            key = (key, )
345
        assert isinstance(key, tuple), "event key type error"
346
        if key not in self.counters:
347
            self.counters[key] = self.cls()
348
        self.counters[key].event(value)
349
        return self
350
351
    def value(self, key, value=1):
352
        """Set value of a counter by counter key"""
353
        if isinstance(key, six.string_types):
354
            key = (key, )
355
        assert isinstance(key, tuple), "event key type error"
356
        if key not in self.counters:
357
            self.counters[key] = self.cls()
358
        self.counters[key].value(value)
359
        return self
360
361
    def trim(self):
362
        """Clear not used counters"""
363
        for key, value in list(iteritems(self.counters)):
364
            if value.empty():
365
                del self.counters[key]
366
367
    def __getitem__(self, key):
368
        key = (key, )
369
        available_keys = []
370
        for _key in self.counters:
371
            if _key[:len(key)] == key:
372
                available_keys.append(_key)
373
374
        if len(available_keys) == 0:
375
            raise KeyError
376
        elif len(available_keys) == 1:
377
            if available_keys[0] == key:
378
                return self.counters[key]
379
            else:
380
                return CounterValue(self, key)
381
        else:
382
            return CounterValue(self, key)
383
384
    def __delitem__(self, key):
385
        key = (key, )
386
        available_keys = []
387
        for _key in self.counters:
388
            if _key[:len(key)] == key:
389
                available_keys.append(_key)
390
        for _key in available_keys:
391
            del self.counters[_key]
392
393
    def __iter__(self):
394
        return iter(self.keys())
395
396
    def __len__(self):
397
        return len(self.keys())
398
399
    def keys(self):
400
        result = set()
401
        for key in self.counters:
402
            result.add(key[0] if key else ())
403
        return result
404
405
    def to_dict(self, get_value=None):
406
        """Dump counters as a dict"""
407
        self.trim()
408
        result = {}
409
        for key, value in iteritems(self):
410
            if isinstance(value, BaseCounter):
411
                if get_value is not None:
412
                    value = getattr(value, get_value)
413
                result[key] = value
414
            else:
415
                result[key] = value.to_dict(get_value)
416
        return result
417
418
    def dump(self, filename):
419
        """Dump counters to file"""
420
        try:
421
            with open(filename, 'wb') as fp:
422
                cPickle.dump(self.counters, fp)
423
        except:
424
            logging.error("can't dump counter to file: %s" % filename)
425
            return False
426
        return True
427
428
    def load(self, filename):
429
        """Load counters to file"""
430
        try:
431
            with open(filename) as fp:
432
                self.counters = cPickle.load(fp)
433
        except:
434
            logging.debug("can't load counter from file: %s" % filename)
435
            return False
436
        return True
437