Completed
Push — master ( 39eece...c8d455 )
by Roy
01:11
created

pyspider.libs.PrettyPrinter._format()   F

Complexity

Conditions 32

Size

Total Lines 99

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 32
dl 0
loc 99
rs 2

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like pyspider.libs.PrettyPrinter._format() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
#  Author:      Fred L. Drake, Jr.
2
#               fdrake@...
3
#
4
#  This is a simple little module I wrote to make life easier.  I didn't
5
#  see anything quite like it in the library, though I may have overlooked
6
#  something.  I wrote this when I was trying to read some heavily nested
7
#  tuples with fairly non-descriptive content.  This is modeled very much
8
#  after Lisp/Scheme - style pretty-printing of lists.  If you find it
9
#  useful, thank small children who sleep at night.
10
11
"""Support to pretty-print lists, tuples, & dictionaries recursively.
12
13
Very simple, but useful, especially in debugging data structures.
14
15
Classes
16
-------
17
18
PrettyPrinter()
19
    Handle pretty-printing operations onto a stream using a configured
20
    set of formatting parameters.
21
22
Functions
23
---------
24
25
pformat()
26
    Format a Python object into a pretty-printed representation.
27
28
pprint()
29
    Pretty-print a Python object to a stream [default is sys.stdout].
30
31
saferepr()
32
    Generate a 'standard' repr()-like value, but protect against recursive
33
    data structures.
34
35
"""
36
37
from __future__ import print_function
38
39
import six
40
import sys as _sys
41
42
from io import BytesIO, StringIO
43
44
__all__ = ["pprint", "pformat", "isreadable", "isrecursive", "saferepr",
45
           "PrettyPrinter"]
46
47
# cache these for faster access:
48
_commajoin = ", ".join
49
_id = id
50
_len = len
51
_type = type
52
53
54
def pprint(object, stream=None, indent=1, width=80, depth=None):
55
    """Pretty-print a Python object to a stream [default is sys.stdout]."""
56
    printer = PrettyPrinter(
57
        stream=stream, indent=indent, width=width, depth=depth)
58
    printer.pprint(object)
59
60
61
def pformat(object, indent=1, width=80, depth=None):
62
    """Format a Python object into a pretty-printed representation."""
63
    return PrettyPrinter(indent=indent, width=width, depth=depth).pformat(object)
64
65
66
def saferepr(object):
67
    """Version of repr() which can handle recursive data structures."""
68
    return _safe_repr(object, {}, None, 0)[0]
69
70
71
def isreadable(object):
72
    """Determine if saferepr(object) is readable by eval()."""
73
    return _safe_repr(object, {}, None, 0)[1]
74
75
76
def isrecursive(object):
77
    """Determine if object requires a recursive representation."""
78
    return _safe_repr(object, {}, None, 0)[2]
79
80
81
def _sorted(iterable):
82
    return sorted(iterable)
83
84
85
class PrettyPrinter:
86
87
    def __init__(self, indent=1, width=80, depth=None, stream=None):
88
        """Handle pretty printing operations onto a stream using a set of
89
        configured parameters.
90
91
        indent
92
            Number of spaces to indent for each level of nesting.
93
94
        width
95
            Attempted maximum number of columns in the output.
96
97
        depth
98
            The maximum depth to print out nested structures.
99
100
        stream
101
            The desired output stream.  If omitted (or false), the standard
102
            output stream available at construction will be used.
103
104
        """
105
        indent = int(indent)
106
        width = int(width)
107
        assert indent >= 0, "indent must be >= 0"
108
        assert depth is None or depth > 0, "depth must be > 0"
109
        assert width, "width must be != 0"
110
        self._depth = depth
111
        self._indent_per_level = indent
112
        self._width = width
113
        if stream is not None:
114
            self._stream = stream
115
        else:
116
            self._stream = _sys.stdout
117
118
    def pprint(self, object):
119
        self._format(object, self._stream, 0, 0, {}, 0)
120
        self._stream.write("\n")
121
122
    def pformat(self, object):
123
        sio = BytesIO()
124
        self._format(object, sio, 0, 0, {}, 0)
125
        return sio.getvalue()
126
127
    def isrecursive(self, object):
128
        return self.format(object, {}, 0, 0)[2]
129
130
    def isreadable(self, object):
131
        s, readable, recursive = self.format(object, {}, 0, 0)
132
        return readable and not recursive
133
134
    def _format(self, object, stream, indent, allowance, context, level):
135
        level = level + 1
136
        objid = _id(object)
137
        if objid in context:
138
            stream.write(_recursion(object))
139
            self._recursive = True
140
            self._readable = False
141
            return
142
        rep = self._repr(object, context, level - 1)
143
        typ = _type(object)
144
        sepLines = _len(rep) > (self._width - 1 - indent - allowance)
145
        write = stream.write
146
147
        if self._depth and level > self._depth:
148
            write(rep)
149
            return
150
151
        r = getattr(typ, "__repr__", None)
152
        if issubclass(typ, dict) and r is dict.__repr__:
153
            write('{')
154
            if self._indent_per_level > 1:
155
                write((self._indent_per_level - 1) * ' ')
156
            length = _len(object)
157
            if length:
158
                context[objid] = 1
159
                indent = indent + self._indent_per_level
160
                items = _sorted(object.items())
161
                key, ent = items[0]
162
                rep = self._repr(key, context, level)
163
                write(rep)
164
                write(': ')
165
                self._format(ent, stream, indent + _len(rep) + 2,
166
                             allowance + 1, context, level)
167
                if length > 1:
168
                    for key, ent in items[1:]:
169
                        rep = self._repr(key, context, level)
170
                        if sepLines:
171
                            write(',\n%s%s: ' % (' ' * indent, rep))
172
                        else:
173
                            write(', %s: ' % rep)
174
                        self._format(ent, stream, indent + _len(rep) + 2,
175
                                     allowance + 1, context, level)
176
                indent = indent - self._indent_per_level
177
                del context[objid]
178
            write('}')
179
            return
180
181
        if (
182
                (issubclass(typ, list) and r is list.__repr__) or
183
                (issubclass(typ, tuple) and r is tuple.__repr__) or
184
                (issubclass(typ, set) and r is set.__repr__) or
185
                (issubclass(typ, frozenset) and r is frozenset.__repr__)
186
        ):
187
            length = _len(object)
188
            if issubclass(typ, list):
189
                write('[')
190
                endchar = ']'
191
            elif issubclass(typ, set):
192
                if not length:
193
                    write('set()')
194
                    return
195
                write('set([')
196
                endchar = '])'
197
                object = _sorted(object)
198
                indent += 4
199
            elif issubclass(typ, frozenset):
200
                if not length:
201
                    write('frozenset()')
202
                    return
203
                write('frozenset([')
204
                endchar = '])'
205
                object = _sorted(object)
206
                indent += 10
207
            else:
208
                write('(')
209
                endchar = ')'
210
            if self._indent_per_level > 1 and sepLines:
211
                write((self._indent_per_level - 1) * ' ')
212
            if length:
213
                context[objid] = 1
214
                indent = indent + self._indent_per_level
215
                self._format(object[0], stream, indent, allowance + 1,
216
                             context, level)
217
                if length > 1:
218
                    for ent in object[1:]:
219
                        if sepLines:
220
                            write(',\n' + ' ' * indent)
221
                        else:
222
                            write(', ')
223
                        self._format(ent, stream, indent,
224
                                     allowance + 1, context, level)
225
                indent = indent - self._indent_per_level
226
                del context[objid]
227
            if issubclass(typ, tuple) and length == 1:
228
                write(',')
229
            write(endchar)
230
            return
231
232
        write(rep)
233
234
    def _repr(self, object, context, level):
235
        repr, readable, recursive = self.format(object, context.copy(),
236
                                                self._depth, level)
237
        if not readable:
238
            self._readable = False
239
        if recursive:
240
            self._recursive = True
241
        return repr
242
243
    def format(self, object, context, maxlevels, level):
244
        """Format object for a specific context, returning a string
245
        and flags indicating whether the representation is 'readable'
246
        and whether the object represents a recursive construct.
247
        """
248
        return _safe_repr(object, context, maxlevels, level)
249
250
251
# Return triple (repr_string, isreadable, isrecursive).
252
253
def _safe_repr(object, context, maxlevels, level):
254
    typ = _type(object)
255
    if typ is str:
256
        string = object
257
        string = string.replace('\n', '\\n').replace('\r', '\\r').replace('\t', '\\t')
258
        if 'locale' not in _sys.modules:
259
            return repr(object), True, False
260
        if "'" in object and '"' not in object:
261
            closure = '"'
262
            quotes = {'"': '\\"'}
263
            string = string.replace('"', '\\"')
264
        else:
265
            closure = "'"
266
            quotes = {"'": "\\'"}
267
            string = string.replace("'", "\\'")
268
        try:
269
            string.decode('utf8').encode('gbk', 'replace')
270
            return ("%s%s%s" % (closure, string, closure)), True, False
271
        except:
272
            pass
273
        qget = quotes.get
274
        sio = StringIO()
275
        write = sio.write
276
        for char in object:
277
            if char.isalpha():
278
                write(char)
279
            else:
280
                write(qget(char, repr(char)[1:-1]))
281
        return ("%s%s%s" % (closure, sio.getvalue(), closure)), True, False
282
283
    if typ is six.text_type:
284
        string = object.encode("utf8", 'replace')
285
        string = string.replace('\n', '\\n').replace('\r', '\\r').replace('\t', '\\t')
286
        if "'" in object and '"' not in object:
287
            closure = '"'
288
            quotes = {'"': '\\"'}
289
            string = string.replace('"', '\\"')
290
        else:
291
            closure = "'"
292
            quotes = {"'": "\\'"}
293
            string = string.replace("'", "\\'")
294
        return ("u%s%s%s" % (closure, string, closure)), True, False
295
296
    r = getattr(typ, "__repr__", None)
297
    if issubclass(typ, dict) and r is dict.__repr__:
298
        if not object:
299
            return "{}", True, False
300
        objid = _id(object)
301
        if maxlevels and level >= maxlevels:
302
            return "{...}", False, objid in context
303
        if objid in context:
304
            return _recursion(object), False, True
305
        context[objid] = 1
306
        readable = True
307
        recursive = False
308
        components = []
309
        append = components.append
310
        level += 1
311
        saferepr = _safe_repr
312
        for k, v in _sorted(object.items()):
313
            krepr, kreadable, krecur = saferepr(k, context, maxlevels, level)
314
            vrepr, vreadable, vrecur = saferepr(v, context, maxlevels, level)
315
            append("%s: %s" % (krepr, vrepr))
316
            readable = readable and kreadable and vreadable
317
            if krecur or vrecur:
318
                recursive = True
319
        del context[objid]
320
        return "{%s}" % _commajoin(components), readable, recursive
321
322
    if (issubclass(typ, list) and r is list.__repr__) or \
323
            (issubclass(typ, tuple) and r is tuple.__repr__):
324
        if issubclass(typ, list):
325
            if not object:
326
                return "[]", True, False
327
            format = "[%s]"
328
        elif _len(object) == 1:
329
            format = "(%s,)"
330
        else:
331
            if not object:
332
                return "()", True, False
333
            format = "(%s)"
334
        objid = _id(object)
335
        if maxlevels and level >= maxlevels:
336
            return format % "...", False, objid in context
337
        if objid in context:
338
            return _recursion(object), False, True
339
        context[objid] = 1
340
        readable = True
341
        recursive = False
342
        components = []
343
        append = components.append
344
        level += 1
345
        for o in object:
346
            orepr, oreadable, orecur = _safe_repr(o, context, maxlevels, level)
347
            append(orepr)
348
            if not oreadable:
349
                readable = False
350
            if orecur:
351
                recursive = True
352
        del context[objid]
353
        return format % _commajoin(components), readable, recursive
354
355
    rep = repr(object)
356
    return rep, (rep and not rep.startswith('<')), False
357
358
359
def _recursion(object):
360
    return ("<Recursion on %s with id=%s>"
361
            % (_type(object).__name__, _id(object)))
362
363
364
def _perfcheck(object=None):
365
    import time
366
    if object is None:
367
        object = [("string", (1, 2), [3, 4], {5: 6, 7: 8})] * 100000
368
    p = PrettyPrinter()
369
    t1 = time.time()
370
    _safe_repr(object, {}, None, 0)
371
    t2 = time.time()
372
    p.pformat(object)
373
    t3 = time.time()
374
    print("_safe_repr:", t2 - t1)
375
    print("pformat:", t3 - t2)
376
377
if __name__ == "__main__":
378
    _perfcheck()
379