Completed
Push — develop ( 755dac...ae6a18 )
by A
01:11
created

postal_address.random_address()   A

Complexity

Conditions 3

Size

Total Lines 21

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 3
dl 0
loc 21
rs 9.3142
1
# -*- coding: utf-8 -*-
2
#
3
# Copyright (c) 2013-2016 Scaleway and Contributors. All Rights Reserved.
4
#                         Kevin Deldycke <[email protected]>
5
#                         Gilles Dartiguelongue <[email protected]>
6
#
7
# Licensed under the BSD 2-Clause License (the "License"); you may not use this
8
# file except in compliance with the License. You may obtain a copy of the
9
# License at http://opensource.org/licenses/BSD-2-Clause
10
11
u""" Utilities for address parsing and rendering.
12
13
Only provides address validation for the moment, but may be used in the future
14
for localized rendering (see issue #4).
15
"""
16
17
from __future__ import (
18
    absolute_import,
19
    division,
20
    print_function,
21
    unicode_literals
22
)
23
24
import random
25
import re
26
27
import faker
28
from boltons.strutils import slugify
29
30
from pycountry import countries, subdivisions
31
32
from . import PY2, PY3
33
from .territory import (
34
    country_from_subdivision,
35
    default_subdivision_code,
36
    normalize_territory_code,
37
    territory_children_codes,
38
    territory_parents
39
)
40
41
if PY3:
42
    basestring = (str, bytes)
43
44
45
class InvalidAddress(ValueError):
46
    """ Custom exception providing details about address failing validation.
47
    """
48
49
    def __init__(self, required_fields=None, invalid_fields=None,
50
                 inconsistent_fields=None, extra_msg=None):
51
        """ Exception keep internally a classification of bad fields. """
52
        super(InvalidAddress, self).__init__()
53
        self.required_fields = required_fields if required_fields else set()
54
        self.invalid_fields = invalid_fields if invalid_fields else set()
55
        self.inconsistent_fields = inconsistent_fields if inconsistent_fields \
56
            else set()
57
        self.extra_msg = extra_msg
58
59
    def __str__(self):
60
        """ Human-readable error. """
61
        reasons = []
62
        if self.required_fields:
63
            reasons.append('{} {} required'.format(
64
                ', '.join(sorted(self.required_fields)),
65
                'is' if len(self.required_fields) == 1 else 'are'))
66
        if self.invalid_fields:
67
            reasons.append('{} {} invalid'.format(
68
                ', '.join(sorted(self.invalid_fields)),
69
                'is' if len(self.invalid_fields) == 1 else 'are'))
70
        if self.inconsistent_fields:
71
            for field_id_1, field_id_2 in sorted(self.inconsistent_fields):
72
                reasons.append('{} is inconsistent with {}'.format(
73
                    field_id_1, field_id_2))
74
        if self.extra_msg:
75
            reasons.append(self.extra_msg)
76
        return '{}.'.format('; '.join(reasons))
77
78
79
class Address(object):
80
81
    """ Define a postal address.
82
83
    All addresses share the following fields:
84
    * ``line1`` (required): a non-constrained string.
85
    * ``line2``: a non-constrained string.
86
    * ``postal_code`` (required): a non-constrained string (see issue #2).
87
    * ``city_name`` (required): a non-constrained string.
88
    * ``country_code`` (required): an ISO 3166-1 alpha-2 code.
89
    * ``subdivision_code``: an ISO 3166-2 code.
90
91
    At instanciation, the ``normalize()`` method is called. The latter try to
92
    clean-up the data and populate empty fields that can be derived from
93
    others. As such, ``city_name`` can be overriden by ``subdivision_code``.
94
    See the internal ``SUBDIVISION_METADATA_WHITELIST`` constant.
95
96
    If inconsistencies are found at the normalization step, they are left as-is
97
    to give a chance to the ``validate()`` method to catch them. Which means
98
    that, after each normalization (including the one at initialization), it is
99
    your job to call the ``validate()`` method manually to check that the
100
    address is good.
101
    """
102
103
    # All normalized field's IDs and values of the address are stored here.
104
    # _fields = {}
105
106
    # Fields common to any postal address. Those are free-form fields, allowed
107
    # to be set directly by the user, although their values might be normalized
108
    # and clean-up automatticaly by the validation method.
109
    BASE_FIELD_IDS = frozenset([
110
        'line1', 'line2', 'postal_code', 'city_name', 'country_code',
111
        'subdivision_code'])
112
113
    # List of subdivision-derived metadata IDs which are allowed to collide
114
    # with base field IDs.
115
    SUBDIVISION_METADATA_WHITELIST = frozenset(['city_name'])
116
    assert SUBDIVISION_METADATA_WHITELIST.issubset(BASE_FIELD_IDS)
117
118
    # Fields tested on validate().
119
    REQUIRED_FIELDS = frozenset([
120
        'line1', 'postal_code', 'city_name', 'country_code'])
121
    assert REQUIRED_FIELDS.issubset(BASE_FIELD_IDS)
122
123
    def __init__(self, strict=True, **kwargs):
124
        """ Set address' individual fields and normalize them.
125
126
        By default, normalization is ``strict``.
127
        """
128
        # Only common fields are allowed to be set directly.
129
        unknown_fields = set(kwargs).difference(self.BASE_FIELD_IDS)
130
        if unknown_fields:
131
            raise KeyError(
132
                "{!r} fields are not allowed to be set freely.".format(
133
                    unknown_fields))
134
        # Initialize base fields values.
135
        self._fields = dict.fromkeys(self.BASE_FIELD_IDS)
136
        # Load provided fields.
137
        for field_id, field_value in kwargs.items():
138
            self[field_id] = field_value
139
        # Normalize addresses fields.
140
        self.normalize(strict=strict)
141
142
    def __repr__(self):
143
        """ Print all fields available from the address.
144
145
        Also include internal fields disguised as properties.
146
        """
147
        # Repr all plain fields.
148
        fields_repr = ['{}={!r}'.format(k, v) for k, v in self.items()]
149
        # Repr all internal properties.
150
        for internal_id in [
151
                'valid', 'empty', 'country_name', 'subdivision_name',
152
                'subdivision_type_name', 'subdivision_type_id']:
153
            fields_repr.append(
154
                '{}={!r}'.format(internal_id, getattr(self, internal_id)))
155
        return '{}({})'.format(
156
            self.__class__.__name__, ', '.join(sorted(fields_repr)))
157
158
    def __unicode__(self):
159
        """ Return a simple unicode string representation of the address block.
160
        """
161
        return self.render()
162
163
    if PY2:
164
        __str__ = lambda self: self.__unicode__().encode('utf-8')
165
    else:
166
        __str__ = __unicode__
167
168
    def __getattr__(self, name):
169
        """ Expose fields as attributes. """
170
        if name in self._fields:
171
            return self._fields[name]
172
        raise AttributeError
173
174
    def __setattr__(self, name, value):
175
        """ Allow update of address fields as attributes. """
176
        if name in self.BASE_FIELD_IDS:
177
            self[name] = value
178
            return
179
        super(Address, self).__setattr__(name, value)
180
181
    # Let an address be accessed like a dict of its fields IDs & values.
182
    # This is a proxy to the internal _fields dict.
183
184
    def __len__(self):
185
        """ Return the number of fields. """
186
        return len(self._fields)
187
188
    def __getitem__(self, key):
189
        """ Return the value of a field. """
190
        if not isinstance(key, basestring):
191
            raise TypeError
192
        return self._fields[key]
193
194
    def __setitem__(self, key, value):
195
        """ Set a field's value.
196
197
        Only base fields are allowed to be set explicitely.
198
        """
199
        if not isinstance(key, basestring):
200
            raise TypeError
201
        if not (isinstance(value, basestring) or value is None):
202
            raise TypeError
203
        if key not in self.BASE_FIELD_IDS:
204
            raise KeyError
205
        self._fields[key] = value
206
207
    def __delitem__(self, key):
208
        """ Remove a field. """
209
        if key in self.BASE_FIELD_IDS:
210
            self._fields[key] = None
211
        else:
212
            del self._fields[key]
213
214
    def __iter__(self):
215
        """ Iterate over field IDs. """
216
        for field_id in self._fields:
217
            yield field_id
218
219
    def keys(self):
220
        """ Return a list of field IDs. """
221
        return self._fields.keys()
222
223
    def values(self):
224
        """ Return a list of field values. """
225
        return self._fields.values()
226
227
    def items(self):
228
        """ Return a list of field IDs & values. """
229
        return self._fields.items()
230
231
    def render(self, separator='\n'):
232
        """ Render a human-friendly address block.
233
234
        The block is composed of:
235
        * The ``line1`` field rendered as-is if not empty.
236
        * The ``line2`` field rendered as-is if not empty.
237
        * A third line made of the postal code, the city name and state name if
238
          any is set.
239
        * A fourth optionnal line with the subdivision name if its value does
240
          not overlap with the city, state or country name.
241
        * The last line feature country's common name.
242
        """
243
        lines = []
244
245
        if self.line1:
246
            lines.append(self.line1)
247
248
        if self.line2:
249
            lines.append(self.line2)
250
251
        # Build the third line.
252
        line3_elements = []
253
        if self.city_name:
254
            line3_elements.append(self.city_name)
255
        if hasattr(self, 'state_name'):
256
            line3_elements.append(self.state_name)
257
        # Separate city and state by a comma.
258
        line3_elements = [', '.join(line3_elements)]
259
        if self.postal_code:
260
            line3_elements.insert(0, self.postal_code)
261
        # Separate the leading zip code and the rest by a dash.
262
        line3 = ' - '.join(line3_elements)
263
        if line3:
264
            lines.append(line3)
265
266
        # Compare the vanilla subdivision name to properties that are based on
267
        # it and used in the current ``render()`` method to produce a printable
268
        # address. If none overlap, then print an additional line with the
269
        # subdivision name as-is to provide extra, non-redundant, territory
270
        # precision.
271
        subdiv_based_properties = [
272
            'city_name', 'state_name', 'country_name']
273
        subdiv_based_values = [
274
            getattr(self, prop_id) for prop_id in subdiv_based_properties
275
            if hasattr(self, prop_id)]
276
        if self.subdivision_name and \
277
                self.subdivision_name not in subdiv_based_values:
278
            lines.append(self.subdivision_name)
279
280
        # Place the country line at the end.
281
        if self.country_name:
282
            lines.append(self.country_name)
283
284
        # Render the address block with the provided separator.
285
        return separator.join(lines)
286
287
    def normalize(self, strict=True):
288
        """ Normalize address fields.
289
290
        If values are unrecognized or invalid, they will be set to None.
291
292
        By default, the normalization is ``strict``: metadata derived from
293
        territory's parents are not allowed to overwrite valid address fields
294
        entered by the user. If set to ``False``, territory-derived values
295
        takes precedence over user's.
296
297
        You need to call back the ``validate()`` method afterwards to properly
298
        check that the fully-qualified address is ready for consumption.
299
        """
300
        # Strip postal codes of any characters but alphanumerics, spaces and
301
        # hyphens.
302
        if self.postal_code:
303
            self.postal_code = self.postal_code.upper()
304
            # Remove unrecognized characters.
305
            self.postal_code = re.compile(
306
                r'[^A-Z0-9 -]').sub('', self.postal_code)
307
            # Reduce sequences of mixed hyphens and spaces to single hyphen.
308
            self.postal_code = re.compile(
309
                r'[^A-Z0-9]*-+[^A-Z0-9]*').sub('-', self.postal_code)
310
            # Edge case: remove leading and trailing hyphens and spaces.
311
            self.postal_code = self.postal_code.strip('-')
312
313
        # Normalize spaces.
314
        for field_id, field_value in self.items():
315
            if isinstance(field_value, basestring):
316
                self[field_id] = ' '.join(field_value.split())
317
318
        # Reset empty and blank strings.
319
        empty_fields = [f_id for f_id, f_value in self.items() if not f_value]
320
        for field_id in empty_fields:
321
            del self[field_id]
322
323
        # Swap lines if the first is empty.
324
        if self.line2 and not self.line1:
325
            self.line1, self.line2 = self.line2, self.line1
326
327
        # Normalize territory codes. Unrecognized territory codes are reset
328
        # to None.
329
        for territory_id in ['country_code', 'subdivision_code']:
330
            territory_code = getattr(self, territory_id)
331
            if territory_code:
332
                try:
333
                    code = normalize_territory_code(
334
                        territory_code, resolve_aliases=False)
335
                except ValueError:
336
                    code = None
337
                setattr(self, territory_id, code)
338
339
        # Try to set default subdivision from country if not set.
340
        if self.country_code and not self.subdivision_code:
341
            self.subdivision_code = default_subdivision_code(self.country_code)
342
            # If the country set its own subdivision, reset it. It will be
343
            # properly re-guessed below.
344
            if self.subdivision_code:
345
                self.country_code = None
346
347
        # Automaticcaly populate address fields with metadata extracted from
348
        # all subdivision parents.
349
        if self.subdivision_code:
350
            parent_metadata = {
351
                # All subdivisions have a parent country.
352
                'country_code': country_from_subdivision(
353
                    self.subdivision_code)}
354
355
            # Add metadata of each subdivision parent.
356
            for parent_subdiv in territory_parents(
357
                    self.subdivision_code, include_country=False):
358
                parent_metadata.update(subdivision_metadata(parent_subdiv))
359
360
            # Parent metadata are not allowed to overwrite address fields
361
            # if not blank, unless strict mode is de-activated.
362
            if strict:
363
                for field_id, new_value in parent_metadata.items():
364
                    # New metadata are not allowed to be blank.
365
                    assert new_value
366
                    current_value = self._fields.get(field_id)
367
                    if current_value and field_id in self.BASE_FIELD_IDS:
368
369
                        # Build the list of substitute values that are
370
                        # equivalent to our new normalized target.
371
                        alias_values = set([new_value])
372
                        if field_id == 'country_code':
373
                            # Allow normalization if the current country code
374
                            # is the direct parent of a subdivision which also
375
                            # have its own country code.
376
                            alias_values.add(subdivisions.get(
377
                                code=self.subdivision_code).country_code)
378
379
                        # Change of current value is allowed if it is a direct
380
                        # substitute to our new normalized value.
381
                        if current_value not in alias_values:
382
                            raise InvalidAddress(
383
                                inconsistent_fields=set([
384
                                    tuple(sorted((
385
                                        field_id, 'subdivision_code')))]),
386
                                extra_msg="{} subdivision is trying to replace"
387
                                " {}={!r} field by {}={!r}".format(
388
                                    self.subdivision_code,
389
                                    field_id, current_value,
390
                                    field_id, new_value))
391
392
            self._fields.update(parent_metadata)
393
394
    def validate(self):
395
        """ Check fields consistency and requirements in one go.
396
397
        Properly check that fields are consistent between themselves, and only
398
        raise an exception at the end, for the whole address object. Our custom
399
        exception will provide a detailed status of bad fields.
400
        """
401
        # Keep a classification of bad fields along the validation process.
402
        required_fields = set()
403
        invalid_fields = set()
404
        inconsistent_fields = set()
405
406
        # Check that all required fields are set.
407
        for field_id in self.REQUIRED_FIELDS:
408
            if not getattr(self, field_id):
409
                required_fields.add(field_id)
410
411
        # Check all fields for invalidity, only if not previously flagged as
412
        # required.
413
        if 'country_code' not in required_fields:
414
            # Check that the country code exists.
415
            try:
416
                countries.get(alpha2=self.country_code)
417
            except KeyError:
418
                invalid_fields.add('country_code')
419
        if self.subdivision_code and 'subdivision_code' not in required_fields:
420
            # Check that the country code exists.
421
            try:
422
                subdivisions.get(code=self.subdivision_code)
423
            except KeyError:
424
                invalid_fields.add('subdivision_code')
425
426
        # Check country consistency against subdivision, only if none of the
427
        # two fields were previously flagged as required or invalid.
428
        if self.subdivision_code and not set(
429
                ['country_code', 'subdivision_code']).intersection(
430
                    required_fields.union(invalid_fields)) and \
431
                country_from_subdivision(
432
                    self.subdivision_code) != self.country_code:
433
            inconsistent_fields.add(
434
                tuple(sorted(('country_code', 'subdivision_code'))))
435
436
        # Raise our custom exception at last.
437
        if required_fields or invalid_fields or inconsistent_fields:
438
            raise InvalidAddress(
439
                required_fields, invalid_fields, inconsistent_fields)
440
441
    @property
442
    def valid(self):
443
        """ Return a boolean indicating if the address is valid. """
444
        try:
445
            self.validate()
446
        except InvalidAddress:
447
            return False
448
        return True
449
450
    @property
451
    def empty(self):
452
        """ Return True only if all fields are empty. """
453
        for value in set(self.values()):
454
            if value:
455
                return False
456
        return True
457
458
    def __bool__(self):
459
        """ Consider the instance to be True if not empty. """
460
        return not self.empty
461
462
    def __nonzero__(self):
463
        """ Python2 retro-compatibility of ``__bool__()``. """
464
        return self.__bool__()
465
466
    @property
467
    def country(self):
468
        """ Return country object. """
469
        if self.country_code:
470
            return countries.get(alpha2=self.country_code)
471
        return None
472
473
    @property
474
    def country_name(self):
475
        """ Return country's name.
476
477
        Common name always takes precedence over the default name, as the
478
        latter isoften pompous, and sometimes false (i.e. not in sync with
479
        current political situation).
480
        """
481
        if self.country:
482
            if hasattr(self.country, 'common_name'):
483
                return self.country.common_name
484
            return self.country.name
485
        return None
486
487
    @property
488
    def subdivision(self):
489
        """ Return subdivision object. """
490
        if self.subdivision_code:
491
            return subdivisions.get(code=self.subdivision_code)
492
        return None
493
494
    @property
495
    def subdivision_name(self):
496
        """ Return subdivision's name. """
497
        if self.subdivision:
498
            return self.subdivision.name
499
        return None
500
501
    @property
502
    def subdivision_type_name(self):
503
        """ Return subdivision's type human-readable name. """
504
        if self.subdivision:
505
            return self.subdivision.type
506
        return None
507
508
    @property
509
    def subdivision_type_id(self):
510
        """ Return subdivision's type as a Python-friendly ID string. """
511
        if self.subdivision:
512
            return subdivision_type_id(self.subdivision)
513
        return None
514
515
516
# Address utils.
517
518
def random_address(locale=None):
519
    """ Return a random, valid address.
520
521
    A ``locale`` parameter try to produce a localized-consistent address. Else
522
    a random locale is picked-up.
523
    """
524
    if locale is None:
525
        locale = faker.providers.misc.Provider.language_code()
526
    fake = faker.Faker(locale=locale)
527
528
    components = {
529
        'line1': fake.street_address(),
530
        'line2': fake.sentence(),
531
        'postal_code': fake.postcode(),
532
        'city_name': fake.city(),
533
        'country_code': fake.country_code()}
534
    subdiv_codes = list(territory_children_codes(components['country_code']))
535
    if subdiv_codes:
536
        components['subdivision_code'] = random.choice(subdiv_codes)
537
538
    return Address(strict=False, **components)
539
540
541
# Subdivisions utils.
542
543
def subdivision_type_id(subdivision):
544
    """ Normalize subdivision type name into a Python-friendly ID.
545
546
    Here is the list of all subdivision types defined by ``pycountry`` v1.8::
547
548
        >>> print '\n'.join(sorted(set([x.type for x in subdivisions])))
549
        Administration
550
        Administrative Region
551
        Administrative Territory
552
        Administrative atoll
553
        Administrative region
554
        Arctic Region
555
        Area
556
        Autonomous City
557
        Autonomous District
558
        Autonomous Province
559
        Autonomous Region
560
        Autonomous city
561
        Autonomous community
562
        Autonomous municipality
563
        Autonomous province
564
        Autonomous region
565
        Autonomous republic
566
        Autonomous sector
567
        Autonomous territorial unit
568
        Borough
569
        Canton
570
        Capital District
571
        Capital Metropolitan City
572
        Capital Territory
573
        Capital city
574
        Capital district
575
        Capital territory
576
        Chains (of islands)
577
        City
578
        City corporation
579
        City with county rights
580
        Commune
581
        Constitutional province
582
        Council area
583
        Country
584
        County
585
        Department
586
        Dependency
587
        Development region
588
        District
589
        District council area
590
        Division
591
        Economic Prefecture
592
        Economic region
593
        Emirate
594
        Entity
595
        Federal Dependency
596
        Federal District
597
        Federal Territories
598
        Federal district
599
        Geographical Entity
600
        Geographical region
601
        Geographical unit
602
        Governorate
603
        Included for completeness
604
        Indigenous region
605
        Island
606
        Island council
607
        Island group
608
        Local council
609
        London borough
610
        Metropolitan cities
611
        Metropolitan department
612
        Metropolitan district
613
        Metropolitan region
614
        Municipalities
615
        Municipality
616
        Oblast
617
        Outlying area
618
        Overseas region/department
619
        Overseas territorial collectivity
620
        Parish
621
        Popularates
622
        Prefecture
623
        Province
624
        Quarter
625
        Rayon
626
        Region
627
        Regional council
628
        Republic
629
        Republican City
630
        Self-governed part
631
        Special District
632
        Special Municipality
633
        Special Region
634
        Special administrative region
635
        Special city
636
        Special island authority
637
        Special municipality
638
        Special zone
639
        State
640
        Territorial unit
641
        Territory
642
        Town council
643
        Two-tier county
644
        Union territory
645
        Unitary authority
646
        Unitary authority (England)
647
        Unitary authority (Wales)
648
        district
649
        state
650
        zone
651
652
    This method transform and normalize any of these into Python-friendly IDs.
653
    """
654
    type_id = slugify(subdivision.type)
655
656
    # Any occurence of the 'city' or 'municipality' string in the type
657
    # overrides its classification to a city.
658
    if set(['city', 'municipality']).intersection(type_id.split('_')):
659
        type_id = 'city'
660
661
    return type_id
662
663
664
def subdivision_metadata(subdivision):
665
    """ Return a serialize dict of subdivision metadata.
666
667
    Metadata IDs are derived from subdivision type.
668
    """
669
    subdiv_type_id = subdivision_type_id(subdivision)
670
    metadata = {
671
        '{}'.format(subdiv_type_id): subdivision,
672
        '{}_code'.format(subdiv_type_id): subdivision.code,
673
        '{}_name'.format(subdiv_type_id): subdivision.name,
674
        '{}_type_name'.format(subdiv_type_id): subdivision.type}
675
676
    # Check that we are not producing metadata IDs colliding with address
677
    # fields.
678
    assert not set(metadata).difference(
679
        Address.SUBDIVISION_METADATA_WHITELIST).issubset(
680
            Address.BASE_FIELD_IDS)
681
682
    return metadata
683