Completed
Push — develop ( 481b38...838dfc )
by A
17:11
created

postal_address.Address.__setitem__()   B

Complexity

Conditions 5

Size

Total Lines 12

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 5
dl 0
loc 12
rs 8.5454
1
# -*- coding: utf-8 -*-
2
#
3
# Copyright (c) 2013-2016 Scaleway and Contributors. All Rights Reserved.
4
#                         Kevin Deldycke <[email protected]>
5
#                         Gilles Dartiguelongue <[email protected]>
6
#
7
# Licensed under the BSD 2-Clause License (the "License"); you may not use this
8
# file except in compliance with the License. You may obtain a copy of the
9
# License at http://opensource.org/licenses/BSD-2-Clause
10
11
u""" Utilities for address parsing and rendering.
12
13
Only provides address validation for the moment, but may be used in the future
14
for localized rendering (see issue #4).
15
"""
16
17
from __future__ import (
18
    absolute_import,
19
    division,
20
    print_function,
21
    unicode_literals
22
)
23
24
import re
25
import string
26
import warnings
27
from random import choice, randint
28
29
import faker
30
31
from pycountry import countries, subdivisions
32
from slugify import slugify
33
34
from .territory import (
35
    country_from_subdivision,
36
    default_subdivision_code,
37
    normalize_territory_code,
38
    territory_children_codes,
39
    territory_parents
40
)
41
42
try:
43
    basestring
44
except NameError:  # pragma: no cover
45
    basestring = (str, bytes)
46
47
48
class InvalidAddress(ValueError):
49
    """ Custom exception providing details about address failing validation.
50
    """
51
52
    def __init__(self, required_fields=None, invalid_fields=None,
53
                 inconsistent_fields=None, extra_msg=None):
54
        """ Exception keep internally a classification of bad fields. """
55
        super(InvalidAddress, self).__init__()
56
        self.required_fields = required_fields if required_fields else set()
57
        self.invalid_fields = invalid_fields if invalid_fields else set()
58
        self.inconsistent_fields = inconsistent_fields if inconsistent_fields \
59
            else set()
60
        self.extra_msg = extra_msg
61
62
    def __str__(self):
63
        """ Human-readable error. """
64
        reasons = []
65
        if self.required_fields:
66
            reasons.append('{} {} required'.format(
67
                ', '.join(sorted(self.required_fields)),
68
                'is' if len(self.required_fields) == 1 else 'are'))
69
        if self.invalid_fields:
70
            reasons.append('{} {} invalid'.format(
71
                ', '.join(sorted(self.invalid_fields)),
72
                'is' if len(self.invalid_fields) == 1 else 'are'))
73
        if self.inconsistent_fields:
74
            for field_id_1, field_id_2 in sorted(self.inconsistent_fields):
75
                reasons.append('{} is inconsistent with {}'.format(
76
                    field_id_1, field_id_2))
77
        if self.extra_msg:
78
            reasons.append(self.extra_msg)
79
        return '{}.'.format('; '.join(reasons))
80
81
82
class Address(object):
83
84
    """ Define a postal address.
85
86
    All addresses share the following fields:
87
    * ``line1`` (required): a non-constrained string.
88
    * ``line2``: a non-constrained string.
89
    * ``postal_code`` (required): a non-constrained string (see issue #2).
90
    * ``city_name`` (required): a non-constrained string.
91
    * ``country_code`` (required): an ISO 3166-1 alpha-2 code.
92
    * ``subdivision_code``: an ISO 3166-2 code.
93
94
    At instanciation, the ``normalize()`` method is called. The latter try to
95
    clean-up the data and populate empty fields that can be derived from
96
    others. As such, ``city_name`` can be overriden by ``subdivision_code``.
97
    See the internal ``SUBDIVISION_METADATA_WHITELIST`` constant.
98
99
    If inconsistencies are found at the normalization step, they are left as-is
100
    to give a chance to the ``validate()`` method to catch them. Which means
101
    that, after each normalization (including the one at initialization), it is
102
    your job to call the ``validate()`` method manually to check that the
103
    address is good.
104
    """
105
106
    # All normalized field's IDs and values of the address are stored here.
107
    # _fields = {}
108
109
    # Fields common to any postal address. Those are free-form fields, allowed
110
    # to be set directly by the user, although their values might be normalized
111
    # and clean-up automatticaly by the validation method.
112
    BASE_FIELD_IDS = frozenset([
113
        'line1', 'line2', 'postal_code', 'city_name', 'country_code',
114
        'subdivision_code'])
115
116
    # List of subdivision-derived metadata IDs which are allowed to collide
117
    # with base field IDs.
118
    SUBDIVISION_METADATA_WHITELIST = frozenset(['city_name'])
119
    assert SUBDIVISION_METADATA_WHITELIST.issubset(BASE_FIELD_IDS)
120
121
    # Fields tested on validate().
122
    REQUIRED_FIELDS = frozenset([
123
        'line1', 'postal_code', 'city_name', 'country_code'])
124
    assert REQUIRED_FIELDS.issubset(BASE_FIELD_IDS)
125
126
    def __init__(self, strict=True, **kwargs):
127
        """ Set address' individual fields and normalize them.
128
129
        By default, normalization is ``strict``.
130
        """
131
        # Only common fields are allowed to be set directly.
132
        unknown_fields = set(kwargs).difference(self.BASE_FIELD_IDS)
133
        if unknown_fields:
134
            raise KeyError(
135
                "{!r} fields are not allowed to be set freely.".format(
136
                    unknown_fields))
137
        # Initialize base fields values.
138
        self._fields = dict.fromkeys(self.BASE_FIELD_IDS)
139
        # Load provided fields.
140
        for field_id, field_value in kwargs.items():
141
            self[field_id] = field_value
142
        # Normalize addresses fields.
143
        self.normalize(strict=strict)
144
145
    def __repr__(self):
146
        """ Print all fields available from the address.
147
148
        Also include internal fields disguised as properties.
149
        """
150
        # Repr all plain fields.
151
        fields_repr = ['{}={!r}'.format(k, v) for k, v in self.items()]
152
        # Repr all internal properties.
153
        for internal_id in [
154
                'valid', 'empty', 'country_name', 'subdivision_name',
155
                'subdivision_type_name', 'subdivision_type_id']:
156
            fields_repr.append(
157
                '{}={!r}'.format(internal_id, getattr(self, internal_id)))
158
        return '{}({})'.format(
159
            self.__class__.__name__, ', '.join(sorted(fields_repr)))
160
161
    def __str__(self):
162
        """ Return a simple string representation of the address block. """
163
        return self.render()
164
165
    def __getattr__(self, name):
166
        """ Expose fields as attributes. """
167
        if name in self._fields:
168
            return self._fields[name]
169
        raise AttributeError
170
171
    def __setattr__(self, name, value):
172
        """ Allow update of address fields as attributes. """
173
        if name in self.BASE_FIELD_IDS:
174
            self[name] = value
175
            return
176
        super(Address, self).__setattr__(name, value)
177
178
    # Let an address be accessed like a dict of its fields IDs & values.
179
    # This is a proxy to the internal _fields dict.
180
181
    def __len__(self):
182
        """ Return the number of fields. """
183
        return len(self._fields)
184
185
    def __getitem__(self, key):
186
        """ Return the value of a field. """
187
        if not isinstance(key, basestring):
188
            raise TypeError
189
        return self._fields[key]
190
191
    def __setitem__(self, key, value):
192
        """ Set a field's value.
193
194
        Only base fields are allowed to be set explicitely.
195
        """
196
        if not isinstance(key, basestring):
197
            raise TypeError
198
        if not (isinstance(value, basestring) or value is None):
199
            raise TypeError
200
        if key not in self.BASE_FIELD_IDS:
201
            raise KeyError
202
        self._fields[key] = value
203
204
    def __delitem__(self, key):
205
        """ Remove a field. """
206
        if key in self.BASE_FIELD_IDS:
207
            self._fields[key] = None
208
        else:
209
            del self._fields[key]
210
211
    def __iter__(self):
212
        """ Iterate over field IDs. """
213
        for field_id in self._fields:
214
            yield field_id
215
216
    def keys(self):
217
        """ Return a list of field IDs. """
218
        return self._fields.keys()
219
220
    def values(self):
221
        """ Return a list of field values. """
222
        return self._fields.values()
223
224
    def items(self):
225
        """ Return a list of field IDs & values. """
226
        return self._fields.items()
227
228
    def render(self, separator='\n'):
229
        """ Render a human-friendly address block.
230
231
        The block is composed of:
232
        * The ``line1`` field rendered as-is if not empty.
233
        * The ``line2`` field rendered as-is if not empty.
234
        * A third line made of the postal code, the city name and state name if
235
          any is set.
236
        * A fourth optionnal line with the subdivision name if its value does
237
          not overlap with the city, state or country name.
238
        * The last line feature country's common name.
239
        """
240
        lines = []
241
242
        if self.line1:
243
            lines.append(self.line1)
244
245
        if self.line2:
246
            lines.append(self.line2)
247
248
        # Build the third line.
249
        line3_elements = []
250
        if self.city_name:
251
            line3_elements.append(self.city_name)
252
        if hasattr(self, 'state_name'):
253
            line3_elements.append(self.state_name)
254
        # Separate city and state by a comma.
255
        line3_elements = [', '.join(line3_elements)]
256
        if self.postal_code:
257
            line3_elements.insert(0, self.postal_code)
258
        # Separate the leading zip code and the rest by a dash.
259
        line3 = ' - '.join(line3_elements)
260
        if line3:
261
            lines.append(line3)
262
263
        # Compare the vanilla subdivision name to properties that are based on
264
        # it and used in the current ``render()`` method to produce a printable
265
        # address. If none overlap, then print an additional line with the
266
        # subdivision name as-is to provide extra, non-redundant, territory
267
        # precision.
268
        subdiv_based_properties = [
269
            'city_name', 'state_name', 'country_name']
270
        subdiv_based_values = [
271
            getattr(self, prop_id) for prop_id in subdiv_based_properties
272
            if hasattr(self, prop_id)]
273
        if self.subdivision_name and \
274
                self.subdivision_name not in subdiv_based_values:
275
            lines.append(self.subdivision_name)
276
277
        # Place the country line at the end.
278
        if self.country_name:
279
            lines.append(self.country_name)
280
281
        # Render the address block with the provided separator.
282
        return separator.join(lines)
283
284
    def normalize(self, strict=True):
285
        """ Normalize address fields.
286
287
        If values are unrecognized or invalid, they will be set to None.
288
289
        By default, the normalization is ``strict``: metadata derived from
290
        territory's parents are not allowed to overwrite valid address fields
291
        entered by the user. If set to ``False``, territory-derived values
292
        takes precedence over user's.
293
294
        You need to call back the ``validate()`` method afterwards to properly
295
        check that the fully-qualified address is ready for consumption.
296
        """
297
        # Strip postal codes of any characters but alphanumerics, spaces and
298
        # hyphens.
299
        if self.postal_code:
300
            self.postal_code = self.postal_code.upper()
301
            # Remove unrecognized characters.
302
            self.postal_code = re.compile(
303
                r'[^A-Z0-9 -]').sub('', self.postal_code)
304
            # Reduce sequences of mixed hyphens and spaces to single hyphen.
305
            self.postal_code = re.compile(
306
                r'[^A-Z0-9]*-+[^A-Z0-9]*').sub('-', self.postal_code)
307
            # Edge case: remove leading and trailing hyphens and spaces.
308
            self.postal_code = self.postal_code.strip('-')
309
310
        # Normalize spaces.
311
        for field_id, field_value in self.items():
312
            if isinstance(field_value, basestring):
313
                self[field_id] = ' '.join(field_value.split())
314
315
        # Reset empty and blank strings.
316
        empty_fields = [f_id for f_id, f_value in self.items() if not f_value]
317
        for field_id in empty_fields:
318
            del self[field_id]
319
320
        # Swap lines if the first is empty.
321
        if self.line2 and not self.line1:
322
            self.line1, self.line2 = self.line2, self.line1
323
324
        # Normalize territory codes. Unrecognized territory codes are reset
325
        # to None.
326
        for territory_id in ['country_code', 'subdivision_code']:
327
            territory_code = getattr(self, territory_id)
328
            if territory_code:
329
                try:
330
                    code = normalize_territory_code(
331
                        territory_code, resolve_aliases=False)
332
                except ValueError:
333
                    code = None
334
                setattr(self, territory_id, code)
335
336
        # Try to set default subdivision from country if not set.
337
        if self.country_code and not self.subdivision_code:
338
            self.subdivision_code = default_subdivision_code(self.country_code)
339
            # If the country set its own subdivision, reset it. It will be
340
            # properly re-guessed below.
341
            if self.subdivision_code:
342
                self.country_code = None
343
344
        # Automaticcaly populate address fields with metadata extracted from
345
        # all subdivision parents.
346
        if self.subdivision_code:
347
            parent_metadata = {
348
                # All subdivisions have a parent country.
349
                'country_code': country_from_subdivision(
350
                    self.subdivision_code)}
351
352
            # Add metadata of each subdivision parent.
353
            for parent_subdiv in territory_parents(
354
                    self.subdivision_code, include_country=False):
355
                parent_metadata.update(subdivision_metadata(parent_subdiv))
356
357
            # Parent metadata are not allowed to overwrite address fields
358
            # if not blank, unless strict mode is de-activated.
359
            if strict:
360
                for field_id, new_value in parent_metadata.items():
361
                    # New metadata are not allowed to be blank.
362
                    assert new_value
363
                    current_value = self._fields.get(field_id)
364
                    if current_value and field_id in self.BASE_FIELD_IDS:
365
366
                        # Build the list of substitute values that are
367
                        # equivalent to our new normalized target.
368
                        alias_values = set([new_value])
369
                        if field_id == 'country_code':
370
                            # Allow normalization if the current country code
371
                            # is the direct parent of a subdivision which also
372
                            # have its own country code.
373
                            alias_values.add(subdivisions.get(
374
                                code=self.subdivision_code).country_code)
375
376
                        # Change of current value is allowed if it is a direct
377
                        # substitute to our new normalized value.
378
                        if current_value not in alias_values:
379
                            raise InvalidAddress(
380
                                inconsistent_fields=set([
381
                                    tuple(sorted((
382
                                        field_id, 'subdivision_code')))]),
383
                                extra_msg="{} subdivision is trying to replace"
384
                                " {}={!r} field by {}={!r}".format(
385
                                    self.subdivision_code,
386
                                    field_id, current_value,
387
                                    field_id, new_value))
388
389
            self._fields.update(parent_metadata)
390
391
    def validate(self):
392
        """ Check fields consistency and requirements in one go.
393
394
        Properly check that fields are consistent between themselves, and only
395
        raise an exception at the end, for the whole address object. Our custom
396
        exception will provide a detailed status of bad fields.
397
        """
398
        # Keep a classification of bad fields along the validation process.
399
        required_fields = set()
400
        invalid_fields = set()
401
        inconsistent_fields = set()
402
403
        # Check that all required fields are set.
404
        for field_id in self.REQUIRED_FIELDS:
405
            if not getattr(self, field_id):
406
                required_fields.add(field_id)
407
408
        # Check all fields for invalidity, only if not previously flagged as
409
        # required.
410
        if 'country_code' not in required_fields:
411
            # Check that the country code exists.
412
            try:
413
                countries.get(alpha2=self.country_code)
414
            except KeyError:
415
                invalid_fields.add('country_code')
416
        if self.subdivision_code and 'subdivision_code' not in required_fields:
417
            # Check that the country code exists.
418
            try:
419
                subdivisions.get(code=self.subdivision_code)
420
            except KeyError:
421
                invalid_fields.add('subdivision_code')
422
423
        # Check country consistency against subdivision, only if none of the
424
        # two fields were previously flagged as required or invalid.
425
        if self.subdivision_code and not set(
426
                ['country_code', 'subdivision_code']).intersection(
427
                    required_fields.union(invalid_fields)) and \
428
                country_from_subdivision(
429
                    self.subdivision_code) != self.country_code:
430
            inconsistent_fields.add(
431
                tuple(sorted(('country_code', 'subdivision_code'))))
432
433
        # Raise our custom exception at last.
434
        if required_fields or invalid_fields or inconsistent_fields:
435
            raise InvalidAddress(
436
                required_fields, invalid_fields, inconsistent_fields)
437
438
    @property
439
    def valid(self):
440
        """ Return a boolean indicating if the address is valid. """
441
        try:
442
            self.validate()
443
        except InvalidAddress:
444
            return False
445
        return True
446
447
    @property
448
    def empty(self):
449
        """ Return True only if all fields are empty. """
450
        for value in set(self.values()):
451
            if value:
452
                return False
453
        return True
454
455
    def __bool__(self):
456
        """ Consider the instance to be True if not empty. """
457
        return not self.empty
458
459
    def __nonzero__(self):
460
        """ Python2 retro-compatibility of ``__bool__()``. """
461
        return self.__bool__()
462
463
    @property
464
    def country(self):
465
        """ Return country object. """
466
        if self.country_code:
467
            return countries.get(alpha2=self.country_code)
468
        return None
469
470
    @property
471
    def country_name(self):
472
        """ Return country's name.
473
474
        Common name always takes precedence over the default name, as the
475
        latter isoften pompous, and sometimes false (i.e. not in sync with
476
        current political situation).
477
        """
478
        if self.country:
479
            if hasattr(self.country, 'common_name'):
480
                return self.country.common_name
481
            return self.country.name
482
        return None
483
484
    @property
485
    def subdivision(self):
486
        """ Return subdivision object. """
487
        if self.subdivision_code:
488
            return subdivisions.get(code=self.subdivision_code)
489
        return None
490
491
    @property
492
    def subdivision_name(self):
493
        """ Return subdivision's name. """
494
        if self.subdivision:
495
            return self.subdivision.name
496
        return None
497
498
    @property
499
    def subdivision_type_name(self):
500
        """ Return subdivision's type human-readable name. """
501
        if self.subdivision:
502
            return self.subdivision.type
503
        return None
504
505
    @property
506
    def subdivision_type_id(self):
507
        """ Return subdivision's type as a Python-friendly ID string. """
508
        if self.subdivision:
509
            return subdivision_type_id(self.subdivision)
510
        return None
511
512
513
# Address utils.
514
515
def random_word(word_lenght=8):
516
    """ Return a readable random string.
517
518
    Source:
519
    http://code.activestate.com/recipes/526619-friendly-readable-id-strings/#c3
520
521
    .. deprecated:: 0.10.0
522
523
       Use faker package instead.
524
    """
525
    warnings.warn('Use faker package instead.', DeprecationWarning)
526
    return ''.join([choice(
527
        'aeiou' if i % 2 else 'bcdfghklmnprstvw') for i in range(word_lenght)])
528
529
530
def random_phrase(word_count=4, min_word_lenght=2, max_word_lenght=10):
531
    """ Return a readable random phrase.
532
533
    Source:
534
    http://code.activestate.com/recipes/526619-friendly-readable-id-strings/#c3
535
536
    .. deprecated:: 0.10.0
537
538
       Use faker package instead.
539
    """
540
    warnings.warn('Use faker package instead.', DeprecationWarning)
541
    return ' '.join([random_word(randint(
542
        min_word_lenght, max_word_lenght)) for _ in range(word_count)])
543
544
545
def random_postal_code():
546
    """ Return a parsable random postal code.
547
548
    .. deprecated:: 0.10.0
549
550
       Use faker package instead.
551
    """
552
    warnings.warn('Use faker package instead.', DeprecationWarning)
553
    return ''.join([
554
        choice(string.ascii_uppercase + string.digits + '- ')
555
        for _ in range(randint(4, 10))])
556
557
558
def random_address(locale=None):
559
    """ Return a random, valid address.
560
561
    A ``locale`` parameter try to produce a localized-consistent address. Else
562
    a random locale is picked-up.
563
    """
564
    # Exclude temporaryly the chinese locale, while we waiting for a new faker
565
    # release. See: https://github.com/joke2k/faker/pull/329
566
    while locale in [None, 'cn']:
567
        locale = faker.providers.misc.Provider.language_code()
568
    fake = faker.Faker(locale=locale)
569
570
    components = {
571
        'line1': fake.street_address(),
572
        'line2': fake.sentence(),
573
        'postal_code': fake.postcode(),
574
        'city_name': fake.city(),
575
        'country_code': fake.country_code()}
576
    subdiv_codes = list(territory_children_codes(components['country_code']))
577
    if subdiv_codes:
578
        components['subdivision_code'] = choice(subdiv_codes)
579
580
    return Address(strict=False, **components)
581
582
583
# Subdivisions utils.
584
585
def subdivision_type_id(subdivision):
586
    """ Normalize subdivision type name into a Python-friendly ID.
587
588
    Here is the list of all subdivision types defined by ``pycountry`` v1.8::
589
590
        >>> print '\n'.join(sorted(set([x.type for x in subdivisions])))
591
        Administration
592
        Administrative Region
593
        Administrative Territory
594
        Administrative atoll
595
        Administrative region
596
        Arctic Region
597
        Area
598
        Autonomous City
599
        Autonomous District
600
        Autonomous Province
601
        Autonomous Region
602
        Autonomous city
603
        Autonomous community
604
        Autonomous municipality
605
        Autonomous province
606
        Autonomous region
607
        Autonomous republic
608
        Autonomous sector
609
        Autonomous territorial unit
610
        Borough
611
        Canton
612
        Capital District
613
        Capital Metropolitan City
614
        Capital Territory
615
        Capital city
616
        Capital district
617
        Capital territory
618
        Chains (of islands)
619
        City
620
        City corporation
621
        City with county rights
622
        Commune
623
        Constitutional province
624
        Council area
625
        Country
626
        County
627
        Department
628
        Dependency
629
        Development region
630
        District
631
        District council area
632
        Division
633
        Economic Prefecture
634
        Economic region
635
        Emirate
636
        Entity
637
        Federal Dependency
638
        Federal District
639
        Federal Territories
640
        Federal district
641
        Geographical Entity
642
        Geographical region
643
        Geographical unit
644
        Governorate
645
        Included for completeness
646
        Indigenous region
647
        Island
648
        Island council
649
        Island group
650
        Local council
651
        London borough
652
        Metropolitan cities
653
        Metropolitan department
654
        Metropolitan district
655
        Metropolitan region
656
        Municipalities
657
        Municipality
658
        Oblast
659
        Outlying area
660
        Overseas region/department
661
        Overseas territorial collectivity
662
        Parish
663
        Popularates
664
        Prefecture
665
        Province
666
        Quarter
667
        Rayon
668
        Region
669
        Regional council
670
        Republic
671
        Republican City
672
        Self-governed part
673
        Special District
674
        Special Municipality
675
        Special Region
676
        Special administrative region
677
        Special city
678
        Special island authority
679
        Special municipality
680
        Special zone
681
        State
682
        Territorial unit
683
        Territory
684
        Town council
685
        Two-tier county
686
        Union territory
687
        Unitary authority
688
        Unitary authority (England)
689
        Unitary authority (Wales)
690
        district
691
        state
692
        zone
693
694
    This method transform and normalize any of these into Python-friendly IDs.
695
    """
696
    type_id = slugify(subdivision.type, to_lower=True).replace('-', '_')
697
698
    # Any occurence of the 'city' or 'municipality' string in the type
699
    # overrides its classification to a city.
700
    if set(['city', 'municipality']).intersection(type_id.split('_')):
701
        type_id = 'city'
702
703
    return type_id
704
705
706
def subdivision_metadata(subdivision):
707
    """ Return a serialize dict of subdivision metadata.
708
709
    Metadata IDs are derived from subdivision type.
710
    """
711
    subdiv_type_id = subdivision_type_id(subdivision)
712
    metadata = {
713
        '{}'.format(subdiv_type_id): subdivision,
714
        '{}_code'.format(subdiv_type_id): subdivision.code,
715
        '{}_name'.format(subdiv_type_id): subdivision.name,
716
        '{}_type_name'.format(subdiv_type_id): subdivision.type}
717
718
    # Check that we are not producing metadata IDs colliding with address
719
    # fields.
720
    assert not set(metadata).difference(
721
        Address.SUBDIVISION_METADATA_WHITELIST).issubset(
722
            Address.BASE_FIELD_IDS)
723
724
    return metadata
725