| Total Complexity | 45 |
| Total Lines | 174 |
| Duplicated Lines | 0 % |
Complex classes like pyspider.libs.Response often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | #!/usr/bin/env python |
||
| 24 | class Response(object): |
||
| 25 | |||
| 26 | def __init__(self): |
||
| 27 | self.status_code = None |
||
| 28 | self.url = None |
||
| 29 | self.orig_url = None |
||
| 30 | self.headers = CaseInsensitiveDict() |
||
| 31 | self.content = '' |
||
| 32 | self.cookies = {} |
||
| 33 | self.error = None |
||
| 34 | self.save = None |
||
| 35 | self.js_script_result = None |
||
| 36 | self.time = 0 |
||
| 37 | |||
| 38 | def __repr__(self): |
||
| 39 | return u'<Response [%d]>' % self.status_code |
||
| 40 | |||
| 41 | def __bool__(self): |
||
| 42 | """Returns true if `status_code` is 200 and no error""" |
||
| 43 | return self.ok |
||
| 44 | |||
| 45 | def __nonzero__(self): |
||
| 46 | """Returns true if `status_code` is 200 and no error.""" |
||
| 47 | return self.ok |
||
| 48 | |||
| 49 | @property |
||
| 50 | def ok(self): |
||
| 51 | """Return true if `status_code` is 200 and no error.""" |
||
| 52 | try: |
||
| 53 | self.raise_for_status() |
||
| 54 | except: |
||
| 55 | return False |
||
| 56 | return True |
||
| 57 | |||
| 58 | @property |
||
| 59 | def encoding(self): |
||
| 60 | """ |
||
| 61 | encoding of Response.content. |
||
| 62 | |||
| 63 | if Response.encoding is None, encoding will be guessed |
||
| 64 | by header or content or chardet if available. |
||
| 65 | """ |
||
| 66 | if hasattr(self, '_encoding'): |
||
| 67 | return self._encoding |
||
| 68 | |||
| 69 | # content is unicode |
||
| 70 | if isinstance(self.content, six.text_type): |
||
| 71 | return 'unicode' |
||
| 72 | |||
| 73 | # Try charset from content-type |
||
| 74 | encoding = get_encoding_from_headers(self.headers) |
||
| 75 | if encoding == 'ISO-8859-1': |
||
| 76 | encoding = None |
||
| 77 | |||
| 78 | # Try charset from content |
||
| 79 | if not encoding and get_encodings_from_content: |
||
| 80 | if six.PY3: |
||
| 81 | encoding = get_encodings_from_content(utils.pretty_unicode(self.content[:100])) |
||
| 82 | else: |
||
| 83 | encoding = get_encodings_from_content(self.content) |
||
| 84 | encoding = encoding and encoding[0] or None |
||
| 85 | |||
| 86 | # Fallback to auto-detected encoding. |
||
| 87 | if not encoding and chardet is not None: |
||
| 88 | encoding = chardet.detect(self.content)['encoding'] |
||
| 89 | |||
| 90 | if encoding and encoding.lower() == 'gb2312': |
||
| 91 | encoding = 'gb18030' |
||
| 92 | |||
| 93 | self._encoding = encoding or 'utf-8' |
||
| 94 | return self._encoding |
||
| 95 | |||
| 96 | @encoding.setter |
||
| 97 | def encoding(self, value): |
||
| 98 | """ |
||
| 99 | set encoding of content manually |
||
| 100 | it will overwrite the guessed encoding |
||
| 101 | """ |
||
| 102 | self._encoding = value |
||
| 103 | self._text = None |
||
| 104 | |||
| 105 | @property |
||
| 106 | def text(self): |
||
| 107 | """ |
||
| 108 | Content of the response, in unicode. |
||
| 109 | |||
| 110 | if Response.encoding is None and chardet module is available, encoding |
||
| 111 | will be guessed. |
||
| 112 | """ |
||
| 113 | if hasattr(self, '_text') and self._text: |
||
| 114 | return self._text |
||
| 115 | if not self.content: |
||
| 116 | return u'' |
||
| 117 | if isinstance(self.content, six.text_type): |
||
| 118 | return self.content |
||
| 119 | |||
| 120 | content = None |
||
| 121 | encoding = self.encoding |
||
| 122 | |||
| 123 | # Decode unicode from given encoding. |
||
| 124 | try: |
||
| 125 | content = self.content.decode(encoding, 'replace') |
||
| 126 | except LookupError: |
||
| 127 | # A LookupError is raised if the encoding was not found which could |
||
| 128 | # indicate a misspelling or similar mistake. |
||
| 129 | # |
||
| 130 | # So we try blindly encoding. |
||
| 131 | content = self.content.decode('utf-8', 'replace') |
||
| 132 | |||
| 133 | self._text = content |
||
| 134 | return content |
||
| 135 | |||
| 136 | @property |
||
| 137 | def json(self): |
||
| 138 | """Returns the json-encoded content of the response, if any.""" |
||
| 139 | if hasattr(self, '_json'): |
||
| 140 | return self._json |
||
| 141 | try: |
||
| 142 | self._json = json.loads(self.text or self.content) |
||
| 143 | except ValueError: |
||
| 144 | self._json = None |
||
| 145 | return self._json |
||
| 146 | |||
| 147 | @property |
||
| 148 | def doc(self): |
||
| 149 | """Returns a PyQuery object of the response's content""" |
||
| 150 | if hasattr(self, '_doc'): |
||
| 151 | return self._doc |
||
| 152 | elements = self.etree |
||
| 153 | doc = self._doc = PyQuery(elements) |
||
| 154 | doc.make_links_absolute(self.url) |
||
| 155 | return doc |
||
| 156 | |||
| 157 | @property |
||
| 158 | def etree(self): |
||
| 159 | """Returns a lxml object of the response's content that can be selected by xpath""" |
||
| 160 | if not hasattr(self, '_elements'): |
||
| 161 | try: |
||
| 162 | parser = lxml.html.HTMLParser(encoding=self.encoding) |
||
| 163 | self._elements = lxml.html.fromstring(self.content, parser=parser) |
||
| 164 | except LookupError: |
||
| 165 | # lxml would raise LookupError when encoding not supported |
||
| 166 | # try fromstring without encoding instead. |
||
| 167 | # on windows, unicode is not availabe as encoding for lxml |
||
| 168 | self._elements = lxml.html.fromstring(self.content) |
||
| 169 | if isinstance(self._elements, lxml.etree._ElementTree): |
||
| 170 | self._elements = self._elements.getroot() |
||
| 171 | return self._elements |
||
| 172 | |||
| 173 | def raise_for_status(self, allow_redirects=True): |
||
| 174 | """Raises stored :class:`HTTPError` or :class:`URLError`, if one occurred.""" |
||
| 175 | |||
| 176 | if self.status_code == 304: |
||
| 177 | return |
||
| 178 | elif self.error: |
||
| 179 | http_error = HTTPError(self.error) |
||
| 180 | elif (self.status_code >= 300) and (self.status_code < 400) and not allow_redirects: |
||
| 181 | http_error = HTTPError('%s Redirection' % (self.status_code)) |
||
| 182 | elif (self.status_code >= 400) and (self.status_code < 500): |
||
| 183 | http_error = HTTPError('%s Client Error' % (self.status_code)) |
||
| 184 | elif (self.status_code >= 500) and (self.status_code < 600): |
||
| 185 | http_error = HTTPError('%s Server Error' % (self.status_code)) |
||
| 186 | else: |
||
| 187 | return |
||
| 188 | |||
| 189 | http_error.response = self |
||
| 190 | raise http_error |
||
| 191 | |||
| 192 | def isok(self): |
||
| 193 | try: |
||
| 194 | self.raise_for_status() |
||
| 195 | return True |
||
| 196 | except: |
||
| 197 | return False |
||
| 198 | |||
| 213 |