Total Complexity | 45 |
Total Lines | 174 |
Duplicated Lines | 0 % |
Complex classes like pyspider.libs.Response often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | #!/usr/bin/env python |
||
24 | class Response(object): |
||
25 | |||
26 | def __init__(self): |
||
27 | self.status_code = None |
||
28 | self.url = None |
||
29 | self.orig_url = None |
||
30 | self.headers = CaseInsensitiveDict() |
||
31 | self.content = '' |
||
32 | self.cookies = {} |
||
33 | self.error = None |
||
34 | self.save = None |
||
35 | self.js_script_result = None |
||
36 | self.time = 0 |
||
37 | |||
38 | def __repr__(self): |
||
39 | return u'<Response [%d]>' % self.status_code |
||
40 | |||
41 | def __bool__(self): |
||
42 | """Returns true if `status_code` is 200 and no error""" |
||
43 | return self.ok |
||
44 | |||
45 | def __nonzero__(self): |
||
46 | """Returns true if `status_code` is 200 and no error.""" |
||
47 | return self.ok |
||
48 | |||
49 | @property |
||
50 | def ok(self): |
||
51 | """Return true if `status_code` is 200 and no error.""" |
||
52 | try: |
||
53 | self.raise_for_status() |
||
54 | except: |
||
55 | return False |
||
56 | return True |
||
57 | |||
58 | @property |
||
59 | def encoding(self): |
||
60 | """ |
||
61 | encoding of Response.content. |
||
62 | |||
63 | if Response.encoding is None, encoding will be guessed |
||
64 | by header or content or chardet if available. |
||
65 | """ |
||
66 | if hasattr(self, '_encoding'): |
||
67 | return self._encoding |
||
68 | |||
69 | # content is unicode |
||
70 | if isinstance(self.content, six.text_type): |
||
71 | return 'unicode' |
||
72 | |||
73 | # Try charset from content-type |
||
74 | encoding = get_encoding_from_headers(self.headers) |
||
75 | if encoding == 'ISO-8859-1': |
||
76 | encoding = None |
||
77 | |||
78 | # Try charset from content |
||
79 | if not encoding and get_encodings_from_content: |
||
80 | if six.PY3: |
||
81 | encoding = get_encodings_from_content(utils.pretty_unicode(self.content[:100])) |
||
82 | else: |
||
83 | encoding = get_encodings_from_content(self.content) |
||
84 | encoding = encoding and encoding[0] or None |
||
85 | |||
86 | # Fallback to auto-detected encoding. |
||
87 | if not encoding and chardet is not None: |
||
88 | encoding = chardet.detect(self.content)['encoding'] |
||
89 | |||
90 | if encoding and encoding.lower() == 'gb2312': |
||
91 | encoding = 'gb18030' |
||
92 | |||
93 | self._encoding = encoding or 'utf-8' |
||
94 | return self._encoding |
||
95 | |||
96 | @encoding.setter |
||
97 | def encoding(self, value): |
||
98 | """ |
||
99 | set encoding of content manually |
||
100 | it will overwrite the guessed encoding |
||
101 | """ |
||
102 | self._encoding = value |
||
103 | self._text = None |
||
104 | |||
105 | @property |
||
106 | def text(self): |
||
107 | """ |
||
108 | Content of the response, in unicode. |
||
109 | |||
110 | if Response.encoding is None and chardet module is available, encoding |
||
111 | will be guessed. |
||
112 | """ |
||
113 | if hasattr(self, '_text') and self._text: |
||
114 | return self._text |
||
115 | if not self.content: |
||
116 | return u'' |
||
117 | if isinstance(self.content, six.text_type): |
||
118 | return self.content |
||
119 | |||
120 | content = None |
||
121 | encoding = self.encoding |
||
122 | |||
123 | # Decode unicode from given encoding. |
||
124 | try: |
||
125 | content = self.content.decode(encoding, 'replace') |
||
126 | except LookupError: |
||
127 | # A LookupError is raised if the encoding was not found which could |
||
128 | # indicate a misspelling or similar mistake. |
||
129 | # |
||
130 | # So we try blindly encoding. |
||
131 | content = self.content.decode('utf-8', 'replace') |
||
132 | |||
133 | self._text = content |
||
134 | return content |
||
135 | |||
136 | @property |
||
137 | def json(self): |
||
138 | """Returns the json-encoded content of the response, if any.""" |
||
139 | if hasattr(self, '_json'): |
||
140 | return self._json |
||
141 | try: |
||
142 | self._json = json.loads(self.text or self.content) |
||
143 | except ValueError: |
||
144 | self._json = None |
||
145 | return self._json |
||
146 | |||
147 | @property |
||
148 | def doc(self): |
||
149 | """Returns a PyQuery object of the response's content""" |
||
150 | if hasattr(self, '_doc'): |
||
151 | return self._doc |
||
152 | elements = self.etree |
||
153 | doc = self._doc = PyQuery(elements) |
||
154 | doc.make_links_absolute(self.url) |
||
155 | return doc |
||
156 | |||
157 | @property |
||
158 | def etree(self): |
||
159 | """Returns a lxml object of the response's content that can be selected by xpath""" |
||
160 | if not hasattr(self, '_elements'): |
||
161 | try: |
||
162 | parser = lxml.html.HTMLParser(encoding=self.encoding) |
||
163 | self._elements = lxml.html.fromstring(self.content, parser=parser) |
||
164 | except LookupError: |
||
165 | # lxml would raise LookupError when encoding not supported |
||
166 | # try fromstring without encoding instead. |
||
167 | # on windows, unicode is not availabe as encoding for lxml |
||
168 | self._elements = lxml.html.fromstring(self.content) |
||
169 | if isinstance(self._elements, lxml.etree._ElementTree): |
||
170 | self._elements = self._elements.getroot() |
||
171 | return self._elements |
||
172 | |||
173 | def raise_for_status(self, allow_redirects=True): |
||
174 | """Raises stored :class:`HTTPError` or :class:`URLError`, if one occurred.""" |
||
175 | |||
176 | if self.status_code == 304: |
||
177 | return |
||
178 | elif self.error: |
||
179 | http_error = HTTPError(self.error) |
||
180 | elif (self.status_code >= 300) and (self.status_code < 400) and not allow_redirects: |
||
181 | http_error = HTTPError('%s Redirection' % (self.status_code)) |
||
182 | elif (self.status_code >= 400) and (self.status_code < 500): |
||
183 | http_error = HTTPError('%s Client Error' % (self.status_code)) |
||
184 | elif (self.status_code >= 500) and (self.status_code < 600): |
||
185 | http_error = HTTPError('%s Server Error' % (self.status_code)) |
||
186 | else: |
||
187 | return |
||
188 | |||
189 | http_error.response = self |
||
190 | raise http_error |
||
191 | |||
192 | def isok(self): |
||
193 | try: |
||
194 | self.raise_for_status() |
||
195 | return True |
||
196 | except: |
||
197 | return False |
||
198 | |||
213 |