Total Complexity | 99 |
Total Lines | 434 |
Duplicated Lines | 0 % |
Complex classes like postal_address.Address often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | # -*- coding: utf-8 -*- |
||
82 | class Address(object): |
||
83 | |||
84 | """ Define a postal address. |
||
85 | |||
86 | All addresses share the following fields: |
||
87 | * ``line1`` (required): a non-constrained string. |
||
88 | * ``line2``: a non-constrained string. |
||
89 | * ``postal_code`` (required): a non-constrained string (see issue #2). |
||
90 | * ``city_name`` (required): a non-constrained string. |
||
91 | * ``country_code`` (required): an ISO 3166-1 alpha-2 code. |
||
92 | * ``subdivision_code``: an ISO 3166-2 code. |
||
93 | |||
94 | At instanciation, the ``normalize()`` method is called. The latter try to |
||
95 | clean-up the data and populate empty fields that can be derived from |
||
96 | others. As such, ``city_name`` can be overriden by ``subdivision_code``. |
||
97 | See the internal ``SUBDIVISION_METADATA_WHITELIST`` constant. |
||
98 | |||
99 | If inconsistencies are found at the normalization step, they are left as-is |
||
100 | to give a chance to the ``validate()`` method to catch them. Which means |
||
101 | that, after each normalization (including the one at initialization), it is |
||
102 | your job to call the ``validate()`` method manually to check that the |
||
103 | address is good. |
||
104 | """ |
||
105 | |||
106 | # All normalized field's IDs and values of the address are stored here. |
||
107 | # _fields = {} |
||
108 | |||
109 | # Fields common to any postal address. Those are free-form fields, allowed |
||
110 | # to be set directly by the user, although their values might be normalized |
||
111 | # and clean-up automatticaly by the validation method. |
||
112 | BASE_FIELD_IDS = frozenset([ |
||
113 | 'line1', 'line2', 'postal_code', 'city_name', 'country_code', |
||
114 | 'subdivision_code']) |
||
115 | |||
116 | # List of subdivision-derived metadata IDs which are allowed to collide |
||
117 | # with base field IDs. |
||
118 | SUBDIVISION_METADATA_WHITELIST = frozenset(['city_name']) |
||
119 | assert SUBDIVISION_METADATA_WHITELIST.issubset(BASE_FIELD_IDS) |
||
120 | |||
121 | # Fields tested on validate(). |
||
122 | REQUIRED_FIELDS = frozenset([ |
||
123 | 'line1', 'postal_code', 'city_name', 'country_code']) |
||
124 | assert REQUIRED_FIELDS.issubset(BASE_FIELD_IDS) |
||
125 | |||
126 | def __init__(self, strict=True, **kwargs): |
||
127 | """ Set address' individual fields and normalize them. |
||
128 | |||
129 | By default, normalization is ``strict``. |
||
130 | """ |
||
131 | # Only common fields are allowed to be set directly. |
||
132 | unknown_fields = set(kwargs).difference(self.BASE_FIELD_IDS) |
||
133 | if unknown_fields: |
||
134 | raise KeyError( |
||
135 | "{!r} fields are not allowed to be set freely.".format( |
||
136 | unknown_fields)) |
||
137 | # Initialize base fields values. |
||
138 | self._fields = dict.fromkeys(self.BASE_FIELD_IDS) |
||
139 | # Load provided fields. |
||
140 | for field_id, field_value in kwargs.items(): |
||
141 | self[field_id] = field_value |
||
142 | # Normalize addresses fields. |
||
143 | self.normalize(strict=strict) |
||
144 | |||
145 | def __repr__(self): |
||
146 | """ Print all fields available from the address. |
||
147 | |||
148 | Also include internal fields disguised as properties. |
||
149 | """ |
||
150 | # Repr all plain fields. |
||
151 | fields_repr = ['{}={!r}'.format(k, v) for k, v in self.items()] |
||
152 | # Repr all internal properties. |
||
153 | for internal_id in [ |
||
154 | 'valid', 'empty', 'country_name', 'subdivision_name', |
||
155 | 'subdivision_type_name', 'subdivision_type_id']: |
||
156 | fields_repr.append( |
||
157 | '{}={!r}'.format(internal_id, getattr(self, internal_id))) |
||
158 | return '{}({})'.format( |
||
159 | self.__class__.__name__, ', '.join(sorted(fields_repr))) |
||
160 | |||
161 | def __unicode__(self): |
||
162 | """ Return a simple unicode string representation of the address block. |
||
163 | """ |
||
164 | return self.render() |
||
165 | |||
166 | def __str__(self): |
||
167 | """ Same as __unicode__ but for Python 2 compatibility. """ |
||
168 | return unicode(self).encode('utf-8') |
||
169 | |||
170 | def __getattr__(self, name): |
||
171 | """ Expose fields as attributes. """ |
||
172 | if name in self._fields: |
||
173 | return self._fields[name] |
||
174 | raise AttributeError |
||
175 | |||
176 | def __setattr__(self, name, value): |
||
177 | """ Allow update of address fields as attributes. """ |
||
178 | if name in self.BASE_FIELD_IDS: |
||
179 | self[name] = value |
||
180 | return |
||
181 | super(Address, self).__setattr__(name, value) |
||
182 | |||
183 | # Let an address be accessed like a dict of its fields IDs & values. |
||
184 | # This is a proxy to the internal _fields dict. |
||
185 | |||
186 | def __len__(self): |
||
187 | """ Return the number of fields. """ |
||
188 | return len(self._fields) |
||
189 | |||
190 | def __getitem__(self, key): |
||
191 | """ Return the value of a field. """ |
||
192 | if not isinstance(key, basestring): |
||
193 | raise TypeError |
||
194 | return self._fields[key] |
||
195 | |||
196 | def __setitem__(self, key, value): |
||
197 | """ Set a field's value. |
||
198 | |||
199 | Only base fields are allowed to be set explicitely. |
||
200 | """ |
||
201 | if not isinstance(key, basestring): |
||
202 | raise TypeError |
||
203 | if not (isinstance(value, basestring) or value is None): |
||
204 | raise TypeError |
||
205 | if key not in self.BASE_FIELD_IDS: |
||
206 | raise KeyError |
||
207 | self._fields[key] = value |
||
208 | |||
209 | def __delitem__(self, key): |
||
210 | """ Remove a field. """ |
||
211 | if key in self.BASE_FIELD_IDS: |
||
212 | self._fields[key] = None |
||
213 | else: |
||
214 | del self._fields[key] |
||
215 | |||
216 | def __iter__(self): |
||
217 | """ Iterate over field IDs. """ |
||
218 | for field_id in self._fields: |
||
219 | yield field_id |
||
220 | |||
221 | def keys(self): |
||
222 | """ Return a list of field IDs. """ |
||
223 | return self._fields.keys() |
||
224 | |||
225 | def values(self): |
||
226 | """ Return a list of field values. """ |
||
227 | return self._fields.values() |
||
228 | |||
229 | def items(self): |
||
230 | """ Return a list of field IDs & values. """ |
||
231 | return self._fields.items() |
||
232 | |||
233 | def render(self, separator='\n'): |
||
234 | """ Render a human-friendly address block. |
||
235 | |||
236 | The block is composed of: |
||
237 | * The ``line1`` field rendered as-is if not empty. |
||
238 | * The ``line2`` field rendered as-is if not empty. |
||
239 | * A third line made of the postal code, the city name and state name if |
||
240 | any is set. |
||
241 | * A fourth optionnal line with the subdivision name if its value does |
||
242 | not overlap with the city, state or country name. |
||
243 | * The last line feature country's common name. |
||
244 | """ |
||
245 | lines = [] |
||
246 | |||
247 | if self.line1: |
||
248 | lines.append(self.line1) |
||
249 | |||
250 | if self.line2: |
||
251 | lines.append(self.line2) |
||
252 | |||
253 | # Build the third line. |
||
254 | line3_elements = [] |
||
255 | if self.city_name: |
||
256 | line3_elements.append(self.city_name) |
||
257 | if hasattr(self, 'state_name'): |
||
258 | line3_elements.append(self.state_name) |
||
259 | # Separate city and state by a comma. |
||
260 | line3_elements = [', '.join(line3_elements)] |
||
261 | if self.postal_code: |
||
262 | line3_elements.insert(0, self.postal_code) |
||
263 | # Separate the leading zip code and the rest by a dash. |
||
264 | line3 = ' - '.join(line3_elements) |
||
265 | if line3: |
||
266 | lines.append(line3) |
||
267 | |||
268 | # Compare the vanilla subdivision name to properties that are based on |
||
269 | # it and used in the current ``render()`` method to produce a printable |
||
270 | # address. If none overlap, then print an additional line with the |
||
271 | # subdivision name as-is to provide extra, non-redundant, territory |
||
272 | # precision. |
||
273 | subdiv_based_properties = [ |
||
274 | 'city_name', 'state_name', 'country_name'] |
||
275 | subdiv_based_values = [ |
||
276 | getattr(self, prop_id) for prop_id in subdiv_based_properties |
||
277 | if hasattr(self, prop_id)] |
||
278 | if self.subdivision_name and \ |
||
279 | self.subdivision_name not in subdiv_based_values: |
||
280 | lines.append(self.subdivision_name) |
||
281 | |||
282 | # Place the country line at the end. |
||
283 | if self.country_name: |
||
284 | lines.append(self.country_name) |
||
285 | |||
286 | # Render the address block with the provided separator. |
||
287 | return separator.join(lines) |
||
288 | |||
289 | def normalize(self, strict=True): |
||
290 | """ Normalize address fields. |
||
291 | |||
292 | If values are unrecognized or invalid, they will be set to None. |
||
293 | |||
294 | By default, the normalization is ``strict``: metadata derived from |
||
295 | territory's parents are not allowed to overwrite valid address fields |
||
296 | entered by the user. If set to ``False``, territory-derived values |
||
297 | takes precedence over user's. |
||
298 | |||
299 | You need to call back the ``validate()`` method afterwards to properly |
||
300 | check that the fully-qualified address is ready for consumption. |
||
301 | """ |
||
302 | # Strip postal codes of any characters but alphanumerics, spaces and |
||
303 | # hyphens. |
||
304 | if self.postal_code: |
||
305 | self.postal_code = self.postal_code.upper() |
||
306 | # Remove unrecognized characters. |
||
307 | self.postal_code = re.compile( |
||
308 | r'[^A-Z0-9 -]').sub('', self.postal_code) |
||
309 | # Reduce sequences of mixed hyphens and spaces to single hyphen. |
||
310 | self.postal_code = re.compile( |
||
311 | r'[^A-Z0-9]*-+[^A-Z0-9]*').sub('-', self.postal_code) |
||
312 | # Edge case: remove leading and trailing hyphens and spaces. |
||
313 | self.postal_code = self.postal_code.strip('-') |
||
314 | |||
315 | # Normalize spaces. |
||
316 | for field_id, field_value in self.items(): |
||
317 | if isinstance(field_value, basestring): |
||
318 | self[field_id] = ' '.join(field_value.split()) |
||
319 | |||
320 | # Reset empty and blank strings. |
||
321 | empty_fields = [f_id for f_id, f_value in self.items() if not f_value] |
||
322 | for field_id in empty_fields: |
||
323 | del self[field_id] |
||
324 | |||
325 | # Swap lines if the first is empty. |
||
326 | if self.line2 and not self.line1: |
||
327 | self.line1, self.line2 = self.line2, self.line1 |
||
328 | |||
329 | # Normalize territory codes. Unrecognized territory codes are reset |
||
330 | # to None. |
||
331 | for territory_id in ['country_code', 'subdivision_code']: |
||
332 | territory_code = getattr(self, territory_id) |
||
333 | if territory_code: |
||
334 | try: |
||
335 | code = normalize_territory_code( |
||
336 | territory_code, resolve_aliases=False) |
||
337 | except ValueError: |
||
338 | code = None |
||
339 | setattr(self, territory_id, code) |
||
340 | |||
341 | # Try to set default subdivision from country if not set. |
||
342 | if self.country_code and not self.subdivision_code: |
||
343 | self.subdivision_code = default_subdivision_code(self.country_code) |
||
344 | # If the country set its own subdivision, reset it. It will be |
||
345 | # properly re-guessed below. |
||
346 | if self.subdivision_code: |
||
347 | self.country_code = None |
||
348 | |||
349 | # Automaticcaly populate address fields with metadata extracted from |
||
350 | # all subdivision parents. |
||
351 | if self.subdivision_code: |
||
352 | parent_metadata = { |
||
353 | # All subdivisions have a parent country. |
||
354 | 'country_code': country_from_subdivision( |
||
355 | self.subdivision_code)} |
||
356 | |||
357 | # Add metadata of each subdivision parent. |
||
358 | for parent_subdiv in territory_parents( |
||
359 | self.subdivision_code, include_country=False): |
||
360 | parent_metadata.update(subdivision_metadata(parent_subdiv)) |
||
361 | |||
362 | # Parent metadata are not allowed to overwrite address fields |
||
363 | # if not blank, unless strict mode is de-activated. |
||
364 | if strict: |
||
365 | for field_id, new_value in parent_metadata.items(): |
||
366 | # New metadata are not allowed to be blank. |
||
367 | assert new_value |
||
368 | current_value = self._fields.get(field_id) |
||
369 | if current_value and field_id in self.BASE_FIELD_IDS: |
||
370 | |||
371 | # Build the list of substitute values that are |
||
372 | # equivalent to our new normalized target. |
||
373 | alias_values = set([new_value]) |
||
374 | if field_id == 'country_code': |
||
375 | # Allow normalization if the current country code |
||
376 | # is the direct parent of a subdivision which also |
||
377 | # have its own country code. |
||
378 | alias_values.add(subdivisions.get( |
||
379 | code=self.subdivision_code).country_code) |
||
380 | |||
381 | # Change of current value is allowed if it is a direct |
||
382 | # substitute to our new normalized value. |
||
383 | if current_value not in alias_values: |
||
384 | raise InvalidAddress( |
||
385 | inconsistent_fields=set([ |
||
386 | tuple(sorted(( |
||
387 | field_id, 'subdivision_code')))]), |
||
388 | extra_msg="{} subdivision is trying to replace" |
||
389 | " {}={!r} field by {}={!r}".format( |
||
390 | self.subdivision_code, |
||
391 | field_id, current_value, |
||
392 | field_id, new_value)) |
||
393 | |||
394 | self._fields.update(parent_metadata) |
||
395 | |||
396 | def validate(self): |
||
397 | """ Check fields consistency and requirements in one go. |
||
398 | |||
399 | Properly check that fields are consistent between themselves, and only |
||
400 | raise an exception at the end, for the whole address object. Our custom |
||
401 | exception will provide a detailed status of bad fields. |
||
402 | """ |
||
403 | # Keep a classification of bad fields along the validation process. |
||
404 | required_fields = set() |
||
405 | invalid_fields = set() |
||
406 | inconsistent_fields = set() |
||
407 | |||
408 | # Check that all required fields are set. |
||
409 | for field_id in self.REQUIRED_FIELDS: |
||
410 | if not getattr(self, field_id): |
||
411 | required_fields.add(field_id) |
||
412 | |||
413 | # Check all fields for invalidity, only if not previously flagged as |
||
414 | # required. |
||
415 | if 'country_code' not in required_fields: |
||
416 | # Check that the country code exists. |
||
417 | try: |
||
418 | countries.get(alpha2=self.country_code) |
||
419 | except KeyError: |
||
420 | invalid_fields.add('country_code') |
||
421 | if self.subdivision_code and 'subdivision_code' not in required_fields: |
||
422 | # Check that the country code exists. |
||
423 | try: |
||
424 | subdivisions.get(code=self.subdivision_code) |
||
425 | except KeyError: |
||
426 | invalid_fields.add('subdivision_code') |
||
427 | |||
428 | # Check country consistency against subdivision, only if none of the |
||
429 | # two fields were previously flagged as required or invalid. |
||
430 | if self.subdivision_code and not set( |
||
431 | ['country_code', 'subdivision_code']).intersection( |
||
432 | required_fields.union(invalid_fields)) and \ |
||
433 | country_from_subdivision( |
||
434 | self.subdivision_code) != self.country_code: |
||
435 | inconsistent_fields.add( |
||
436 | tuple(sorted(('country_code', 'subdivision_code')))) |
||
437 | |||
438 | # Raise our custom exception at last. |
||
439 | if required_fields or invalid_fields or inconsistent_fields: |
||
440 | raise InvalidAddress( |
||
441 | required_fields, invalid_fields, inconsistent_fields) |
||
442 | |||
443 | @property |
||
444 | def valid(self): |
||
445 | """ Return a boolean indicating if the address is valid. """ |
||
446 | try: |
||
447 | self.validate() |
||
448 | except InvalidAddress: |
||
449 | return False |
||
450 | return True |
||
451 | |||
452 | @property |
||
453 | def empty(self): |
||
454 | """ Return True only if all fields are empty. """ |
||
455 | for value in set(self.values()): |
||
456 | if value: |
||
457 | return False |
||
458 | return True |
||
459 | |||
460 | def __bool__(self): |
||
461 | """ Consider the instance to be True if not empty. """ |
||
462 | return not self.empty |
||
463 | |||
464 | def __nonzero__(self): |
||
465 | """ Python2 retro-compatibility of ``__bool__()``. """ |
||
466 | return self.__bool__() |
||
467 | |||
468 | @property |
||
469 | def country(self): |
||
470 | """ Return country object. """ |
||
471 | if self.country_code: |
||
472 | return countries.get(alpha2=self.country_code) |
||
473 | return None |
||
474 | |||
475 | @property |
||
476 | def country_name(self): |
||
477 | """ Return country's name. |
||
478 | |||
479 | Common name always takes precedence over the default name, as the |
||
480 | latter isoften pompous, and sometimes false (i.e. not in sync with |
||
481 | current political situation). |
||
482 | """ |
||
483 | if self.country: |
||
484 | if hasattr(self.country, 'common_name'): |
||
485 | return self.country.common_name |
||
486 | return self.country.name |
||
487 | return None |
||
488 | |||
489 | @property |
||
490 | def subdivision(self): |
||
491 | """ Return subdivision object. """ |
||
492 | if self.subdivision_code: |
||
493 | return subdivisions.get(code=self.subdivision_code) |
||
494 | return None |
||
495 | |||
496 | @property |
||
497 | def subdivision_name(self): |
||
498 | """ Return subdivision's name. """ |
||
499 | if self.subdivision: |
||
500 | return self.subdivision.name |
||
501 | return None |
||
502 | |||
503 | @property |
||
504 | def subdivision_type_name(self): |
||
505 | """ Return subdivision's type human-readable name. """ |
||
506 | if self.subdivision: |
||
507 | return self.subdivision.type |
||
508 | return None |
||
509 | |||
510 | @property |
||
511 | def subdivision_type_id(self): |
||
512 | """ Return subdivision's type as a Python-friendly ID string. """ |
||
513 | if self.subdivision: |
||
514 | return subdivision_type_id(self.subdivision) |
||
515 | return None |
||
516 | |||
730 |