InvalidAddress.__str__()   C
last analyzed

Complexity

Conditions 9

Size

Total Lines 20

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 9
c 1
b 0
f 0
dl 0
loc 20
rs 6.4615
1
# -*- coding: utf-8 -*-
2
#
3
# Copyright (c) 2013-2017 Scaleway and Contributors. All Rights Reserved.
4
#                         Kevin Deldycke <[email protected]>
5
#                         Gilles Dartiguelongue <[email protected]>
6
#
7
# Licensed under the BSD 2-Clause License (the "License"); you may not use this
8
# file except in compliance with the License. You may obtain a copy of the
9
# License at http://opensource.org/licenses/BSD-2-Clause
10
11
u""" Utilities for address parsing and rendering.
12
13
Only provides address validation for the moment, but may be used in the future
14
for localized rendering (see issue #4).
15
"""
16
17
from __future__ import (
18
    absolute_import,
19
    division,
20
    print_function,
21
    unicode_literals
22
)
23
24
import random
25
import re
26
27
import faker
28
from boltons.strutils import slugify
29
from pycountry import countries, subdivisions
30
31
from . import PY2, PY3
32
from .territory import (
33
    country_from_subdivision,
34
    default_subdivision_code,
35
    normalize_territory_code,
36
    territory_children_codes,
37
    territory_parents
38
)
39
40
if PY3:
41
    basestring = (str, bytes)
42
43
44
class InvalidAddress(ValueError):
45
    """ Custom exception providing details about address failing validation.
46
    """
47
48
    def __init__(self, required_fields=None, invalid_fields=None,
49
                 inconsistent_fields=None, extra_msg=None):
50
        """ Exception keep internally a classification of bad fields. """
51
        super(InvalidAddress, self).__init__()
52
        self.required_fields = required_fields if required_fields else set()
53
        self.invalid_fields = invalid_fields if invalid_fields else dict()
54
        self.inconsistent_fields = inconsistent_fields if inconsistent_fields \
55
            else set()
56
        self.extra_msg = extra_msg
57
58
    def __str__(self):
59
        """ Human-readable error. """
60
        reasons = []
61
        if self.required_fields:
62
            reasons.append('{} {} required'.format(
63
                ', '.join(sorted(self.required_fields)),
64
                'is' if len(self.required_fields) == 1 else 'are'))
65
        if self.invalid_fields:
66
            reasons.append('{} {} invalid'.format(
67
                ', '.join(sorted([
68
                    '{}={!r}'.format(k, v)
69
                    for k, v in self.invalid_fields.items()])),
70
                'is' if len(self.invalid_fields) == 1 else 'are'))
71
        if self.inconsistent_fields:
72
            for field_id_1, field_id_2 in sorted(self.inconsistent_fields):
73
                reasons.append('{} is inconsistent with {}'.format(
74
                    field_id_1, field_id_2))
75
        if self.extra_msg:
76
            reasons.append(self.extra_msg)
77
        return '{}.'.format('; '.join(reasons))
78
79
80
class Address(object):
81
82
    """ Define a postal address.
83
84
    All addresses share the following fields:
85
    * ``line1`` (required): a non-constrained string.
86
    * ``line2``: a non-constrained string.
87
    * ``postal_code`` (required): a non-constrained string (see issue #2).
88
    * ``city_name`` (required): a non-constrained string.
89
    * ``country_code`` (required): an ISO 3166-1 alpha-2 code.
90
    * ``subdivision_code``: an ISO 3166-2 code.
91
92
    At instanciation, the ``normalize()`` method is called. The latter try to
93
    clean-up the data and populate empty fields that can be derived from
94
    others. As such, ``city_name`` can be overriden by ``subdivision_code``.
95
    See the internal ``SUBDIVISION_METADATA_WHITELIST`` constant.
96
97
    If inconsistencies are found at the normalization step, they are left as-is
98
    to give a chance to the ``validate()`` method to catch them. Which means
99
    that, after each normalization (including the one at initialization), it is
100
    your job to call the ``validate()`` method manually to check that the
101
    address is good.
102
    """
103
104
    # Fields common to any postal address. Those are free-form fields, allowed
105
    # to be set directly by the user, although their values might be normalized
106
    # and clean-up automatticaly by the validation method.
107
    BASE_FIELD_IDS = frozenset([
108
        'line1', 'line2', 'postal_code', 'city_name', 'country_code',
109
        'subdivision_code'])
110
111
    # List of subdivision-derived metadata IDs which are allowed to collide
112
    # with base field IDs.
113
    SUBDIVISION_METADATA_WHITELIST = frozenset(['city_name'])
114
    assert SUBDIVISION_METADATA_WHITELIST.issubset(BASE_FIELD_IDS)
115
116
    # Fields tested on validate().
117
    REQUIRED_FIELDS = frozenset([
118
        'line1', 'postal_code', 'city_name', 'country_code'])
119
    assert REQUIRED_FIELDS.issubset(BASE_FIELD_IDS)
120
121
    def __init__(self, strict=True, **kwargs):
122
        """ Set address' individual fields and normalize them.
123
124
        By default, normalization is ``strict``.
125
        """
126
        # Only common fields are allowed to be set directly.
127
        unknown_fields = set(kwargs).difference(self.BASE_FIELD_IDS)
128
        if unknown_fields:
129
            raise KeyError(
130
                "{!r} fields are not allowed to be set freely.".format(
131
                    unknown_fields))
132
133
        # Normalized field's IDs and values of the address are stored here.
134
        self._fields = dict.fromkeys(self.BASE_FIELD_IDS)
135
136
        # Load provided fields.
137
        for field_id, field_value in kwargs.items():
138
            self[field_id] = field_value
139
140
        # Normalize addresses fields.
141
        self.normalize(strict=strict)
142
143
    def __repr__(self):
144
        """ Print all fields available from the address.
145
146
        Also include internal fields disguised as properties.
147
        """
148
        # Repr all plain fields.
149
        fields_repr = ['{}={!r}'.format(k, v) for k, v in self.items()]
150
        # Repr all internal properties.
151
        for internal_id in [
152
                'valid', 'empty', 'country_name', 'subdivision_name',
153
                'subdivision_type_name', 'subdivision_type_id']:
154
            fields_repr.append(
155
                '{}={!r}'.format(internal_id, getattr(self, internal_id)))
156
        return '{}({})'.format(
157
            self.__class__.__name__, ', '.join(sorted(fields_repr)))
158
159
    def __unicode__(self):
160
        """ Return a simple unicode string representation of the address block.
161
        """
162
        return self.render()
163
164
    def __str__(self):
165
        """ Same as __unicode__ but with Python 2 compatibility. """
166
        string = self.__unicode__()
167
        if PY2:
168
            string = string.encode('utf-8')
169
        return string
170
171
    def __getattr__(self, name):
172
        """ Expose fields as attributes. """
173
        if name in self._fields:
174
            return self._fields[name]
175
        raise AttributeError
176
177
    def __setattr__(self, name, value):
178
        """ Allow update of address fields as attributes. """
179
        if name in self.BASE_FIELD_IDS:
180
            self[name] = value
181
            return
182
        super(Address, self).__setattr__(name, value)
183
184
    # Let an address be accessed like a dict of its fields IDs & values.
185
    # This is a proxy to the internal _fields dict.
186
187
    def __len__(self):
188
        """ Return the number of fields. """
189
        return len(self._fields)
190
191
    def __getitem__(self, key):
192
        """ Return the value of a field. """
193
        if not isinstance(key, basestring):
194
            raise TypeError
195
        return self._fields[key]
196
197
    def __setitem__(self, key, value):
198
        """ Set a field's value.
199
200
        Only base fields are allowed to be set explicitely.
201
        """
202
        if not isinstance(key, basestring):
203
            raise TypeError
204
        if not (isinstance(value, basestring) or value is None):
205
            raise TypeError
206
        if key not in self.BASE_FIELD_IDS:
207
            raise KeyError
208
        self._fields[key] = value
209
210
    def __delitem__(self, key):
211
        """ Remove a field. """
212
        if key in self.BASE_FIELD_IDS:
213
            self._fields[key] = None
214
        else:
215
            del self._fields[key]
216
217
    def __iter__(self):
218
        """ Iterate over field IDs. """
219
        for field_id in self._fields:
220
            yield field_id
221
222
    def keys(self):
223
        """ Return a list of field IDs. """
224
        return self._fields.keys()
225
226
    def values(self):
227
        """ Return a list of field values. """
228
        return self._fields.values()
229
230
    def items(self):
231
        """ Return a list of field IDs & values. """
232
        return self._fields.items()
233
234
    def render(self, separator='\n'):
235
        """ Render a human-friendly address block.
236
237
        The block is composed of:
238
        * The ``line1`` field rendered as-is if not empty.
239
        * The ``line2`` field rendered as-is if not empty.
240
        * A third line made of the postal code, the city name and state name if
241
          any is set.
242
        * A fourth optionnal line with the subdivision name if its value does
243
          not overlap with the city, state or country name.
244
        * The last line feature country's common name.
245
        """
246
        lines = []
247
248
        if self.line1:
249
            lines.append(self.line1)
250
251
        if self.line2:
252
            lines.append(self.line2)
253
254
        # Build the third line.
255
        line3_elements = []
256
        if self.city_name:
257
            line3_elements.append(self.city_name)
258
        if hasattr(self, 'state_name'):
259
            # XXX It might not be a good idea to deduplicate state and city.
260
            # See: https://en.wikipedia.org/wiki
261
            # /List_of_U.S._cities_named_after_their_state
262
            line3_elements.append(self.state_name)
263
        # Separate city and state by a comma.
264
        line3_elements = [', '.join(line3_elements)]
265
        if self.postal_code:
266
            line3_elements.insert(0, self.postal_code)
267
        # Separate the leading zip code and the rest by a dash.
268
        line3 = ' - '.join(line3_elements)
269
        if line3:
270
            lines.append(line3)
271
272
        # Compare the vanilla subdivision name to properties that are based on
273
        # it and used in the current ``render()`` method to produce a printable
274
        # address. If none overlap, then print an additional line with the
275
        # subdivision name as-is to provide extra, non-redundant, territory
276
        # precision.
277
        subdiv_based_properties = [
278
            'city_name', 'state_name', 'country_name']
279
        subdiv_based_values = [
280
            getattr(self, prop_id) for prop_id in subdiv_based_properties
281
            if hasattr(self, prop_id)]
282
        if self.subdivision_name and \
283
                self.subdivision_name not in subdiv_based_values:
284
            lines.append(self.subdivision_name)
285
286
        # Place the country line at the end.
287
        if self.country_name:
288
            lines.append(self.country_name)
289
290
        # Render the address block with the provided separator.
291
        return separator.join(lines)
292
293
    def normalize(self, strict=True):
294
        """ Normalize address fields.
295
296
        If values are unrecognized or invalid, they will be set to None.
297
298
        By default, the normalization is ``strict``: metadata derived from
299
        territory's parents are not allowed to overwrite valid address fields
300
        entered by the user. If set to ``False``, territory-derived values
301
        takes precedence over user's.
302
303
        You need to call back the ``validate()`` method afterwards to properly
304
        check that the fully-qualified address is ready for consumption.
305
        """
306
        # Strip postal codes of any characters but alphanumerics, spaces and
307
        # hyphens.
308
        if self.postal_code:
309
            self.postal_code = self.postal_code.upper()
310
            # Remove unrecognized characters.
311
            self.postal_code = re.compile(
312
                r'[^A-Z0-9 -]').sub('', self.postal_code)
313
            # Reduce sequences of mixed hyphens and spaces to single hyphen.
314
            self.postal_code = re.compile(
315
                r'[^A-Z0-9]*-+[^A-Z0-9]*').sub('-', self.postal_code)
316
            # Edge case: remove leading and trailing hyphens and spaces.
317
            self.postal_code = self.postal_code.strip('-')
318
319
        # Normalize spaces.
320
        for field_id, field_value in self.items():
321
            if isinstance(field_value, basestring):
322
                self[field_id] = ' '.join(field_value.split())
323
324
        # Reset empty and blank strings.
325
        empty_fields = [f_id for f_id, f_value in self.items() if not f_value]
326
        for field_id in empty_fields:
327
            del self[field_id]
328
329
        # Swap lines if the first is empty.
330
        if self.line2 and not self.line1:
331
            self.line1, self.line2 = self.line2, self.line1
332
333
        # Normalize territory codes. Unrecognized territory codes are reset
334
        # to None.
335
        for territory_id in ['country_code', 'subdivision_code']:
336
            territory_code = getattr(self, territory_id)
337
            if territory_code:
338
                try:
339
                    code = normalize_territory_code(
340
                        territory_code, resolve_aliases=False)
341
                except ValueError:
342
                    code = None
343
                setattr(self, territory_id, code)
344
345
        # Try to set default subdivision from country if not set.
346
        if self.country_code and not self.subdivision_code:
347
            self.subdivision_code = default_subdivision_code(self.country_code)
348
            # If the country set its own subdivision, reset it. It will be
349
            # properly re-guessed below.
350
            if self.subdivision_code:
351
                self.country_code = None
352
353
        # Automaticcaly populate address fields with metadata extracted from
354
        # all subdivision parents.
355
        if self.subdivision_code:
356
            parent_metadata = {
357
                # All subdivisions have a parent country.
358
                'country_code': country_from_subdivision(
359
                    self.subdivision_code)}
360
361
            # Add metadata of each subdivision parent.
362
            for parent_subdiv in territory_parents(
363
                    self.subdivision_code, include_country=False):
364
                parent_metadata.update(subdivision_metadata(parent_subdiv))
365
366
            # Parent metadata are not allowed to overwrite address fields
367
            # if not blank, unless strict mode is de-activated.
368
            if strict:
369
                for field_id, new_value in parent_metadata.items():
370
                    # New metadata are not allowed to be blank.
371
                    assert new_value
372
                    current_value = self._fields.get(field_id)
373
                    if current_value and field_id in self.BASE_FIELD_IDS:
374
375
                        # Build the list of substitute values that are
376
                        # equivalent to our new normalized target.
377
                        alias_values = set([new_value])
378
                        if field_id == 'country_code':
379
                            # Allow normalization if the current country code
380
                            # is the direct parent of a subdivision which also
381
                            # have its own country code.
382
                            alias_values.add(subdivisions.get(
383
                                code=self.subdivision_code).country_code)
384
385
                        # Change of current value is allowed if it is a direct
386
                        # substitute to our new normalized value.
387
                        if current_value not in alias_values:
388
                            raise InvalidAddress(
389
                                inconsistent_fields=set([
390
                                    tuple(sorted((
391
                                        field_id, 'subdivision_code')))]),
392
                                extra_msg="{} subdivision is trying to replace"
393
                                " {}={!r} field by {}={!r}".format(
394
                                    self.subdivision_code,
395
                                    field_id, current_value,
396
                                    field_id, new_value))
397
398
            self._fields.update(parent_metadata)
399
400
    def validate(self):
401
        """ Check fields consistency and requirements in one go.
402
403
        Properly check that fields are consistent between themselves, and only
404
        raise an exception at the end, for the whole address object. Our custom
405
        exception will provide a detailed status of bad fields.
406
        """
407
        # Keep a classification of bad fields along the validation process.
408
        required_fields = set()
409
        invalid_fields = dict()
410
        inconsistent_fields = set()
411
412
        # Check that all required fields are set.
413
        for field_id in self.REQUIRED_FIELDS:
414
            if not getattr(self, field_id):
415
                required_fields.add(field_id)
416
417
        # Check all fields for invalidity, only if not previously flagged as
418
        # required.
419
        if 'country_code' not in required_fields:
420
            # Check that the country code exists.
421
            try:
422
                countries.get(alpha_2=self.country_code)
423
            except KeyError:
424
                invalid_fields['country_code'] = self.country_code
425
        if self.subdivision_code and 'subdivision_code' not in required_fields:
426
            # Check that the country code exists.
427
            try:
428
                subdivisions.get(code=self.subdivision_code)
429
            except KeyError:
430
                invalid_fields['subdivision_code'] = self.subdivision_code
431
432
        # Check country consistency against subdivision, only if none of the
433
        # two fields were previously flagged as required or invalid.
434
        if self.subdivision_code and not set(
435
                ['country_code', 'subdivision_code']).intersection(
436
                    required_fields.union(invalid_fields)) and \
437
                country_from_subdivision(
438
                    self.subdivision_code) != self.country_code:
439
            inconsistent_fields.add(
440
                tuple(sorted(('country_code', 'subdivision_code'))))
441
442
        # Raise our custom exception at last.
443
        if required_fields or invalid_fields or inconsistent_fields:
444
            raise InvalidAddress(
445
                required_fields, invalid_fields, inconsistent_fields)
446
447
    @property
448
    def valid(self):
449
        """ Return a boolean indicating if the address is valid. """
450
        try:
451
            self.validate()
452
        except InvalidAddress:
453
            return False
454
        return True
455
456
    @property
457
    def empty(self):
458
        """ Return True only if all fields are empty. """
459
        for value in set(self.values()):
460
            if value:
461
                return False
462
        return True
463
464
    def __bool__(self):
465
        """ Consider the instance to be True if not empty. """
466
        return not self.empty
467
468
    def __nonzero__(self):
469
        """ Python2 retro-compatibility of ``__bool__()``. """
470
        return self.__bool__()
471
472
    @property
473
    def country(self):
474
        """ Return country object. """
475
        if self.country_code:
476
            return countries.get(alpha_2=self.country_code)
477
        return None
478
479
    @property
480
    def country_name(self):
481
        """ Return country's name.
482
483
        Common name always takes precedence over the default name, as the
484
        latter isoften pompous, and sometimes false (i.e. not in sync with
485
        current political situation).
486
        """
487
        if self.country:
488
            if hasattr(self.country, 'common_name'):
489
                return self.country.common_name
490
            return self.country.name
491
        return None
492
493
    @property
494
    def subdivision(self):
495
        """ Return subdivision object. """
496
        if self.subdivision_code:
497
            return subdivisions.get(code=self.subdivision_code)
498
        return None
499
500
    @property
501
    def subdivision_name(self):
502
        """ Return subdivision's name. """
503
        if self.subdivision:
504
            return self.subdivision.name
505
        return None
506
507
    @property
508
    def subdivision_type_name(self):
509
        """ Return subdivision's type human-readable name. """
510
        if self.subdivision:
511
            return self.subdivision.type
512
        return None
513
514
    @property
515
    def subdivision_type_id(self):
516
        """ Return subdivision's type as a Python-friendly ID string. """
517
        if self.subdivision:
518
            return subdivision_type_id(self.subdivision)
519
        return None
520
521
522
# Address utils.
523
524
def random_address(locale=None):
525
    """ Return a random, valid address.
526
527
    A ``locale`` parameter try to produce a localized-consistent address. Else
528
    a random locale is picked-up.
529
    """
530
    # XXX Exclude 'ar_PS' that doesn't work currently (it's defined in Faker
531
    # but not in pycountry).
532
    # See: https://github.com/scaleway/postal-address/issues/20
533
    while locale in [None, 'ar_PS']:
534
        locale = random.choice(list(faker.config.AVAILABLE_LOCALES))
535
    fake = faker.Faker(locale=locale)
536
537
    components = {
538
        'line1': fake.street_address(),
539
        'line2': fake.sentence(),
540
        'postal_code': fake.postcode(),
541
        'city_name': fake.city(),
542
        'country_code': fake.country_code()}
543
544
    subdiv_codes = list(territory_children_codes(components['country_code']))
545
    if subdiv_codes:
546
        components['subdivision_code'] = random.choice(subdiv_codes)
547
548
    return Address(strict=False, **components)
549
550
551
# Subdivisions utils.
552
553
def subdivision_type_id(subdivision):
554
    """ Normalize subdivision type name into a Python-friendly ID.
555
556
    Here is the list of all subdivision types defined by ``pycountry`` v1.8::
557
558
        >>> print '\n'.join(sorted(set([x.type for x in subdivisions])))
559
        Administration
560
        Administrative Region
561
        Administrative Territory
562
        Administrative atoll
563
        Administrative region
564
        Arctic Region
565
        Area
566
        Autonomous City
567
        Autonomous District
568
        Autonomous Province
569
        Autonomous Region
570
        Autonomous city
571
        Autonomous community
572
        Autonomous municipality
573
        Autonomous province
574
        Autonomous region
575
        Autonomous republic
576
        Autonomous sector
577
        Autonomous territorial unit
578
        Borough
579
        Canton
580
        Capital District
581
        Capital Metropolitan City
582
        Capital Territory
583
        Capital city
584
        Capital district
585
        Capital territory
586
        Chains (of islands)
587
        City
588
        City corporation
589
        City with county rights
590
        Commune
591
        Constitutional province
592
        Council area
593
        Country
594
        County
595
        Department
596
        Dependency
597
        Development region
598
        District
599
        District council area
600
        Division
601
        Economic Prefecture
602
        Economic region
603
        Emirate
604
        Entity
605
        Federal Dependency
606
        Federal District
607
        Federal Territories
608
        Federal district
609
        Geographical Entity
610
        Geographical region
611
        Geographical unit
612
        Governorate
613
        Included for completeness
614
        Indigenous region
615
        Island
616
        Island council
617
        Island group
618
        Local council
619
        London borough
620
        Metropolitan cities
621
        Metropolitan department
622
        Metropolitan district
623
        Metropolitan region
624
        Municipalities
625
        Municipality
626
        Oblast
627
        Outlying area
628
        Overseas region/department
629
        Overseas territorial collectivity
630
        Parish
631
        Popularates
632
        Prefecture
633
        Province
634
        Quarter
635
        Rayon
636
        Region
637
        Regional council
638
        Republic
639
        Republican City
640
        Self-governed part
641
        Special District
642
        Special Municipality
643
        Special Region
644
        Special administrative region
645
        Special city
646
        Special island authority
647
        Special municipality
648
        Special zone
649
        State
650
        Territorial unit
651
        Territory
652
        Town council
653
        Two-tier county
654
        Union territory
655
        Unitary authority
656
        Unitary authority (England)
657
        Unitary authority (Wales)
658
        district
659
        state
660
        zone
661
662
    This method transform and normalize any of these into Python-friendly IDs.
663
    """
664
    type_id = slugify(subdivision.type)
665
666
    # Any occurence of the 'city' or 'municipality' string in the type
667
    # overrides its classification to a city.
668
    if set(['city', 'municipality']).intersection(type_id.split('_')):
669
        type_id = 'city'
670
671
    return type_id
672
673
674
def subdivision_metadata(subdivision):
675
    """ Return a serialize dict of subdivision metadata.
676
677
    Metadata IDs are derived from subdivision type.
678
    """
679
    subdiv_type_id = subdivision_type_id(subdivision)
680
    metadata = {
681
        '{}'.format(subdiv_type_id): subdivision,
682
        '{}_code'.format(subdiv_type_id): subdivision.code,
683
        '{}_name'.format(subdiv_type_id): subdivision.name,
684
        '{}_type_name'.format(subdiv_type_id): subdivision.type}
685
686
    # Check that we are not producing metadata IDs colliding with address
687
    # fields.
688
    assert not set(metadata).difference(
689
        Address.SUBDIVISION_METADATA_WHITELIST).issubset(
690
            Address.BASE_FIELD_IDS)
691
692
    return metadata
693