Completed
Push — develop ( 599911...5d373a )
by A
01:35
created

postal_address.Address.__str__()   A

Complexity

Conditions 1

Size

Total Lines 3

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 1
dl 0
loc 3
rs 10
1
# -*- coding: utf-8 -*-
2
#
3
# Copyright (c) 2013-2016 Scaleway and Contributors. All Rights Reserved.
4
#                         Kevin Deldycke <[email protected]>
5
#                         Gilles Dartiguelongue <[email protected]>
6
#
7
# Licensed under the BSD 2-Clause License (the "License"); you may not use this
8
# file except in compliance with the License. You may obtain a copy of the
9
# License at http://opensource.org/licenses/BSD-2-Clause
10
11
u""" Utilities for address parsing and rendering.
12
13
Only provides address validation for the moment, but may be used in the future
14
for localized rendering (see issue #4).
15
"""
16
17
from __future__ import (
18
    absolute_import,
19
    division,
20
    print_function,
21
    unicode_literals
22
)
23
24
import re
25
import string
26
import warnings
27
from random import choice, randint
28
29
import faker
30
from boltons.strutils import slugify
31
32
from pycountry import countries, subdivisions
33
34
from . import PY2, PY3
35
from .territory import (
36
    country_from_subdivision,
37
    default_subdivision_code,
38
    normalize_territory_code,
39
    territory_children_codes,
40
    territory_parents
41
)
42
43
44
if PY3:
45
    basestring = (str, bytes)
46
47
48
class InvalidAddress(ValueError):
49
    """ Custom exception providing details about address failing validation.
50
    """
51
52
    def __init__(self, required_fields=None, invalid_fields=None,
53
                 inconsistent_fields=None, extra_msg=None):
54
        """ Exception keep internally a classification of bad fields. """
55
        super(InvalidAddress, self).__init__()
56
        self.required_fields = required_fields if required_fields else set()
57
        self.invalid_fields = invalid_fields if invalid_fields else set()
58
        self.inconsistent_fields = inconsistent_fields if inconsistent_fields \
59
            else set()
60
        self.extra_msg = extra_msg
61
62
    def __str__(self):
63
        """ Human-readable error. """
64
        reasons = []
65
        if self.required_fields:
66
            reasons.append('{} {} required'.format(
67
                ', '.join(sorted(self.required_fields)),
68
                'is' if len(self.required_fields) == 1 else 'are'))
69
        if self.invalid_fields:
70
            reasons.append('{} {} invalid'.format(
71
                ', '.join(sorted(self.invalid_fields)),
72
                'is' if len(self.invalid_fields) == 1 else 'are'))
73
        if self.inconsistent_fields:
74
            for field_id_1, field_id_2 in sorted(self.inconsistent_fields):
75
                reasons.append('{} is inconsistent with {}'.format(
76
                    field_id_1, field_id_2))
77
        if self.extra_msg:
78
            reasons.append(self.extra_msg)
79
        return '{}.'.format('; '.join(reasons))
80
81
82
class Address(object):
83
84
    """ Define a postal address.
85
86
    All addresses share the following fields:
87
    * ``line1`` (required): a non-constrained string.
88
    * ``line2``: a non-constrained string.
89
    * ``postal_code`` (required): a non-constrained string (see issue #2).
90
    * ``city_name`` (required): a non-constrained string.
91
    * ``country_code`` (required): an ISO 3166-1 alpha-2 code.
92
    * ``subdivision_code``: an ISO 3166-2 code.
93
94
    At instanciation, the ``normalize()`` method is called. The latter try to
95
    clean-up the data and populate empty fields that can be derived from
96
    others. As such, ``city_name`` can be overriden by ``subdivision_code``.
97
    See the internal ``SUBDIVISION_METADATA_WHITELIST`` constant.
98
99
    If inconsistencies are found at the normalization step, they are left as-is
100
    to give a chance to the ``validate()`` method to catch them. Which means
101
    that, after each normalization (including the one at initialization), it is
102
    your job to call the ``validate()`` method manually to check that the
103
    address is good.
104
    """
105
106
    # All normalized field's IDs and values of the address are stored here.
107
    # _fields = {}
108
109
    # Fields common to any postal address. Those are free-form fields, allowed
110
    # to be set directly by the user, although their values might be normalized
111
    # and clean-up automatticaly by the validation method.
112
    BASE_FIELD_IDS = frozenset([
113
        'line1', 'line2', 'postal_code', 'city_name', 'country_code',
114
        'subdivision_code'])
115
116
    # List of subdivision-derived metadata IDs which are allowed to collide
117
    # with base field IDs.
118
    SUBDIVISION_METADATA_WHITELIST = frozenset(['city_name'])
119
    assert SUBDIVISION_METADATA_WHITELIST.issubset(BASE_FIELD_IDS)
120
121
    # Fields tested on validate().
122
    REQUIRED_FIELDS = frozenset([
123
        'line1', 'postal_code', 'city_name', 'country_code'])
124
    assert REQUIRED_FIELDS.issubset(BASE_FIELD_IDS)
125
126
    def __init__(self, strict=True, **kwargs):
127
        """ Set address' individual fields and normalize them.
128
129
        By default, normalization is ``strict``.
130
        """
131
        # Only common fields are allowed to be set directly.
132
        unknown_fields = set(kwargs).difference(self.BASE_FIELD_IDS)
133
        if unknown_fields:
134
            raise KeyError(
135
                "{!r} fields are not allowed to be set freely.".format(
136
                    unknown_fields))
137
        # Initialize base fields values.
138
        self._fields = dict.fromkeys(self.BASE_FIELD_IDS)
139
        # Load provided fields.
140
        for field_id, field_value in kwargs.items():
141
            self[field_id] = field_value
142
        # Normalize addresses fields.
143
        self.normalize(strict=strict)
144
145
    def __repr__(self):
146
        """ Print all fields available from the address.
147
148
        Also include internal fields disguised as properties.
149
        """
150
        # Repr all plain fields.
151
        fields_repr = ['{}={!r}'.format(k, v) for k, v in self.items()]
152
        # Repr all internal properties.
153
        for internal_id in [
154
                'valid', 'empty', 'country_name', 'subdivision_name',
155
                'subdivision_type_name', 'subdivision_type_id']:
156
            fields_repr.append(
157
                '{}={!r}'.format(internal_id, getattr(self, internal_id)))
158
        return '{}({})'.format(
159
            self.__class__.__name__, ', '.join(sorted(fields_repr)))
160
161
    def __unicode__(self):
162
        """ Return a simple unicode string representation of the address block.
163
        """
164
        return self.render()
165
166
    if PY2:
167
        __str__ = lambda self: self.__unicode__().encode('utf-8')
168
    else:
169
        __str__ = __unicode__
170
171
    def __getattr__(self, name):
172
        """ Expose fields as attributes. """
173
        if name in self._fields:
174
            return self._fields[name]
175
        raise AttributeError
176
177
    def __setattr__(self, name, value):
178
        """ Allow update of address fields as attributes. """
179
        if name in self.BASE_FIELD_IDS:
180
            self[name] = value
181
            return
182
        super(Address, self).__setattr__(name, value)
183
184
    # Let an address be accessed like a dict of its fields IDs & values.
185
    # This is a proxy to the internal _fields dict.
186
187
    def __len__(self):
188
        """ Return the number of fields. """
189
        return len(self._fields)
190
191
    def __getitem__(self, key):
192
        """ Return the value of a field. """
193
        if not isinstance(key, basestring):
194
            raise TypeError
195
        return self._fields[key]
196
197
    def __setitem__(self, key, value):
198
        """ Set a field's value.
199
200
        Only base fields are allowed to be set explicitely.
201
        """
202
        if not isinstance(key, basestring):
203
            raise TypeError
204
        if not (isinstance(value, basestring) or value is None):
205
            raise TypeError
206
        if key not in self.BASE_FIELD_IDS:
207
            raise KeyError
208
        self._fields[key] = value
209
210
    def __delitem__(self, key):
211
        """ Remove a field. """
212
        if key in self.BASE_FIELD_IDS:
213
            self._fields[key] = None
214
        else:
215
            del self._fields[key]
216
217
    def __iter__(self):
218
        """ Iterate over field IDs. """
219
        for field_id in self._fields:
220
            yield field_id
221
222
    def keys(self):
223
        """ Return a list of field IDs. """
224
        return self._fields.keys()
225
226
    def values(self):
227
        """ Return a list of field values. """
228
        return self._fields.values()
229
230
    def items(self):
231
        """ Return a list of field IDs & values. """
232
        return self._fields.items()
233
234
    def render(self, separator='\n'):
235
        """ Render a human-friendly address block.
236
237
        The block is composed of:
238
        * The ``line1`` field rendered as-is if not empty.
239
        * The ``line2`` field rendered as-is if not empty.
240
        * A third line made of the postal code, the city name and state name if
241
          any is set.
242
        * A fourth optionnal line with the subdivision name if its value does
243
          not overlap with the city, state or country name.
244
        * The last line feature country's common name.
245
        """
246
        lines = []
247
248
        if self.line1:
249
            lines.append(self.line1)
250
251
        if self.line2:
252
            lines.append(self.line2)
253
254
        # Build the third line.
255
        line3_elements = []
256
        if self.city_name:
257
            line3_elements.append(self.city_name)
258
        if hasattr(self, 'state_name'):
259
            line3_elements.append(self.state_name)
260
        # Separate city and state by a comma.
261
        line3_elements = [', '.join(line3_elements)]
262
        if self.postal_code:
263
            line3_elements.insert(0, self.postal_code)
264
        # Separate the leading zip code and the rest by a dash.
265
        line3 = ' - '.join(line3_elements)
266
        if line3:
267
            lines.append(line3)
268
269
        # Compare the vanilla subdivision name to properties that are based on
270
        # it and used in the current ``render()`` method to produce a printable
271
        # address. If none overlap, then print an additional line with the
272
        # subdivision name as-is to provide extra, non-redundant, territory
273
        # precision.
274
        subdiv_based_properties = [
275
            'city_name', 'state_name', 'country_name']
276
        subdiv_based_values = [
277
            getattr(self, prop_id) for prop_id in subdiv_based_properties
278
            if hasattr(self, prop_id)]
279
        if self.subdivision_name and \
280
                self.subdivision_name not in subdiv_based_values:
281
            lines.append(self.subdivision_name)
282
283
        # Place the country line at the end.
284
        if self.country_name:
285
            lines.append(self.country_name)
286
287
        # Render the address block with the provided separator.
288
        return separator.join(lines)
289
290
    def normalize(self, strict=True):
291
        """ Normalize address fields.
292
293
        If values are unrecognized or invalid, they will be set to None.
294
295
        By default, the normalization is ``strict``: metadata derived from
296
        territory's parents are not allowed to overwrite valid address fields
297
        entered by the user. If set to ``False``, territory-derived values
298
        takes precedence over user's.
299
300
        You need to call back the ``validate()`` method afterwards to properly
301
        check that the fully-qualified address is ready for consumption.
302
        """
303
        # Strip postal codes of any characters but alphanumerics, spaces and
304
        # hyphens.
305
        if self.postal_code:
306
            self.postal_code = self.postal_code.upper()
307
            # Remove unrecognized characters.
308
            self.postal_code = re.compile(
309
                r'[^A-Z0-9 -]').sub('', self.postal_code)
310
            # Reduce sequences of mixed hyphens and spaces to single hyphen.
311
            self.postal_code = re.compile(
312
                r'[^A-Z0-9]*-+[^A-Z0-9]*').sub('-', self.postal_code)
313
            # Edge case: remove leading and trailing hyphens and spaces.
314
            self.postal_code = self.postal_code.strip('-')
315
316
        # Normalize spaces.
317
        for field_id, field_value in self.items():
318
            if isinstance(field_value, basestring):
319
                self[field_id] = ' '.join(field_value.split())
320
321
        # Reset empty and blank strings.
322
        empty_fields = [f_id for f_id, f_value in self.items() if not f_value]
323
        for field_id in empty_fields:
324
            del self[field_id]
325
326
        # Swap lines if the first is empty.
327
        if self.line2 and not self.line1:
328
            self.line1, self.line2 = self.line2, self.line1
329
330
        # Normalize territory codes. Unrecognized territory codes are reset
331
        # to None.
332
        for territory_id in ['country_code', 'subdivision_code']:
333
            territory_code = getattr(self, territory_id)
334
            if territory_code:
335
                try:
336
                    code = normalize_territory_code(
337
                        territory_code, resolve_aliases=False)
338
                except ValueError:
339
                    code = None
340
                setattr(self, territory_id, code)
341
342
        # Try to set default subdivision from country if not set.
343
        if self.country_code and not self.subdivision_code:
344
            self.subdivision_code = default_subdivision_code(self.country_code)
345
            # If the country set its own subdivision, reset it. It will be
346
            # properly re-guessed below.
347
            if self.subdivision_code:
348
                self.country_code = None
349
350
        # Automaticcaly populate address fields with metadata extracted from
351
        # all subdivision parents.
352
        if self.subdivision_code:
353
            parent_metadata = {
354
                # All subdivisions have a parent country.
355
                'country_code': country_from_subdivision(
356
                    self.subdivision_code)}
357
358
            # Add metadata of each subdivision parent.
359
            for parent_subdiv in territory_parents(
360
                    self.subdivision_code, include_country=False):
361
                parent_metadata.update(subdivision_metadata(parent_subdiv))
362
363
            # Parent metadata are not allowed to overwrite address fields
364
            # if not blank, unless strict mode is de-activated.
365
            if strict:
366
                for field_id, new_value in parent_metadata.items():
367
                    # New metadata are not allowed to be blank.
368
                    assert new_value
369
                    current_value = self._fields.get(field_id)
370
                    if current_value and field_id in self.BASE_FIELD_IDS:
371
372
                        # Build the list of substitute values that are
373
                        # equivalent to our new normalized target.
374
                        alias_values = set([new_value])
375
                        if field_id == 'country_code':
376
                            # Allow normalization if the current country code
377
                            # is the direct parent of a subdivision which also
378
                            # have its own country code.
379
                            alias_values.add(subdivisions.get(
380
                                code=self.subdivision_code).country_code)
381
382
                        # Change of current value is allowed if it is a direct
383
                        # substitute to our new normalized value.
384
                        if current_value not in alias_values:
385
                            raise InvalidAddress(
386
                                inconsistent_fields=set([
387
                                    tuple(sorted((
388
                                        field_id, 'subdivision_code')))]),
389
                                extra_msg="{} subdivision is trying to replace"
390
                                " {}={!r} field by {}={!r}".format(
391
                                    self.subdivision_code,
392
                                    field_id, current_value,
393
                                    field_id, new_value))
394
395
            self._fields.update(parent_metadata)
396
397
    def validate(self):
398
        """ Check fields consistency and requirements in one go.
399
400
        Properly check that fields are consistent between themselves, and only
401
        raise an exception at the end, for the whole address object. Our custom
402
        exception will provide a detailed status of bad fields.
403
        """
404
        # Keep a classification of bad fields along the validation process.
405
        required_fields = set()
406
        invalid_fields = set()
407
        inconsistent_fields = set()
408
409
        # Check that all required fields are set.
410
        for field_id in self.REQUIRED_FIELDS:
411
            if not getattr(self, field_id):
412
                required_fields.add(field_id)
413
414
        # Check all fields for invalidity, only if not previously flagged as
415
        # required.
416
        if 'country_code' not in required_fields:
417
            # Check that the country code exists.
418
            try:
419
                countries.get(alpha2=self.country_code)
420
            except KeyError:
421
                invalid_fields.add('country_code')
422
        if self.subdivision_code and 'subdivision_code' not in required_fields:
423
            # Check that the country code exists.
424
            try:
425
                subdivisions.get(code=self.subdivision_code)
426
            except KeyError:
427
                invalid_fields.add('subdivision_code')
428
429
        # Check country consistency against subdivision, only if none of the
430
        # two fields were previously flagged as required or invalid.
431
        if self.subdivision_code and not set(
432
                ['country_code', 'subdivision_code']).intersection(
433
                    required_fields.union(invalid_fields)) and \
434
                country_from_subdivision(
435
                    self.subdivision_code) != self.country_code:
436
            inconsistent_fields.add(
437
                tuple(sorted(('country_code', 'subdivision_code'))))
438
439
        # Raise our custom exception at last.
440
        if required_fields or invalid_fields or inconsistent_fields:
441
            raise InvalidAddress(
442
                required_fields, invalid_fields, inconsistent_fields)
443
444
    @property
445
    def valid(self):
446
        """ Return a boolean indicating if the address is valid. """
447
        try:
448
            self.validate()
449
        except InvalidAddress:
450
            return False
451
        return True
452
453
    @property
454
    def empty(self):
455
        """ Return True only if all fields are empty. """
456
        for value in set(self.values()):
457
            if value:
458
                return False
459
        return True
460
461
    def __bool__(self):
462
        """ Consider the instance to be True if not empty. """
463
        return not self.empty
464
465
    def __nonzero__(self):
466
        """ Python2 retro-compatibility of ``__bool__()``. """
467
        return self.__bool__()
468
469
    @property
470
    def country(self):
471
        """ Return country object. """
472
        if self.country_code:
473
            return countries.get(alpha2=self.country_code)
474
        return None
475
476
    @property
477
    def country_name(self):
478
        """ Return country's name.
479
480
        Common name always takes precedence over the default name, as the
481
        latter isoften pompous, and sometimes false (i.e. not in sync with
482
        current political situation).
483
        """
484
        if self.country:
485
            if hasattr(self.country, 'common_name'):
486
                return self.country.common_name
487
            return self.country.name
488
        return None
489
490
    @property
491
    def subdivision(self):
492
        """ Return subdivision object. """
493
        if self.subdivision_code:
494
            return subdivisions.get(code=self.subdivision_code)
495
        return None
496
497
    @property
498
    def subdivision_name(self):
499
        """ Return subdivision's name. """
500
        if self.subdivision:
501
            return self.subdivision.name
502
        return None
503
504
    @property
505
    def subdivision_type_name(self):
506
        """ Return subdivision's type human-readable name. """
507
        if self.subdivision:
508
            return self.subdivision.type
509
        return None
510
511
    @property
512
    def subdivision_type_id(self):
513
        """ Return subdivision's type as a Python-friendly ID string. """
514
        if self.subdivision:
515
            return subdivision_type_id(self.subdivision)
516
        return None
517
518
519
# Address utils.
520
521
def random_word(word_lenght=8):
522
    """ Return a readable random string.
523
524
    Source:
525
    http://code.activestate.com/recipes/526619-friendly-readable-id-strings/#c3
526
527
    .. deprecated:: 0.10.0
528
529
       Use faker package instead.
530
    """
531
    warnings.warn('Use faker package instead.', DeprecationWarning)
532
    return ''.join([choice(
533
        'aeiou' if i % 2 else 'bcdfghklmnprstvw') for i in range(word_lenght)])
534
535
536
def random_phrase(word_count=4, min_word_lenght=2, max_word_lenght=10):
537
    """ Return a readable random phrase.
538
539
    Source:
540
    http://code.activestate.com/recipes/526619-friendly-readable-id-strings/#c3
541
542
    .. deprecated:: 0.10.0
543
544
       Use faker package instead.
545
    """
546
    warnings.warn('Use faker package instead.', DeprecationWarning)
547
    return ' '.join([random_word(randint(
548
        min_word_lenght, max_word_lenght)) for _ in range(word_count)])
549
550
551
def random_postal_code():
552
    """ Return a parsable random postal code.
553
554
    .. deprecated:: 0.10.0
555
556
       Use faker package instead.
557
    """
558
    warnings.warn('Use faker package instead.', DeprecationWarning)
559
    return ''.join([
560
        choice(string.ascii_uppercase + string.digits + '- ')
561
        for _ in range(randint(4, 10))])
562
563
564
def random_address(locale=None):
565
    """ Return a random, valid address.
566
567
    A ``locale`` parameter try to produce a localized-consistent address. Else
568
    a random locale is picked-up.
569
    """
570
    # Exclude temporaryly the chinese locale, while we waiting for a new faker
571
    # release. See: https://github.com/joke2k/faker/pull/329
572
    while locale in [None, 'cn']:
573
        locale = faker.providers.misc.Provider.language_code()
574
    fake = faker.Faker(locale=locale)
575
576
    components = {
577
        'line1': fake.street_address(),
578
        'line2': fake.sentence(),
579
        'postal_code': fake.postcode(),
580
        'city_name': fake.city(),
581
        'country_code': fake.country_code()}
582
    subdiv_codes = list(territory_children_codes(components['country_code']))
583
    if subdiv_codes:
584
        components['subdivision_code'] = choice(subdiv_codes)
585
586
    return Address(strict=False, **components)
587
588
589
# Subdivisions utils.
590
591
def subdivision_type_id(subdivision):
592
    """ Normalize subdivision type name into a Python-friendly ID.
593
594
    Here is the list of all subdivision types defined by ``pycountry`` v1.8::
595
596
        >>> print '\n'.join(sorted(set([x.type for x in subdivisions])))
597
        Administration
598
        Administrative Region
599
        Administrative Territory
600
        Administrative atoll
601
        Administrative region
602
        Arctic Region
603
        Area
604
        Autonomous City
605
        Autonomous District
606
        Autonomous Province
607
        Autonomous Region
608
        Autonomous city
609
        Autonomous community
610
        Autonomous municipality
611
        Autonomous province
612
        Autonomous region
613
        Autonomous republic
614
        Autonomous sector
615
        Autonomous territorial unit
616
        Borough
617
        Canton
618
        Capital District
619
        Capital Metropolitan City
620
        Capital Territory
621
        Capital city
622
        Capital district
623
        Capital territory
624
        Chains (of islands)
625
        City
626
        City corporation
627
        City with county rights
628
        Commune
629
        Constitutional province
630
        Council area
631
        Country
632
        County
633
        Department
634
        Dependency
635
        Development region
636
        District
637
        District council area
638
        Division
639
        Economic Prefecture
640
        Economic region
641
        Emirate
642
        Entity
643
        Federal Dependency
644
        Federal District
645
        Federal Territories
646
        Federal district
647
        Geographical Entity
648
        Geographical region
649
        Geographical unit
650
        Governorate
651
        Included for completeness
652
        Indigenous region
653
        Island
654
        Island council
655
        Island group
656
        Local council
657
        London borough
658
        Metropolitan cities
659
        Metropolitan department
660
        Metropolitan district
661
        Metropolitan region
662
        Municipalities
663
        Municipality
664
        Oblast
665
        Outlying area
666
        Overseas region/department
667
        Overseas territorial collectivity
668
        Parish
669
        Popularates
670
        Prefecture
671
        Province
672
        Quarter
673
        Rayon
674
        Region
675
        Regional council
676
        Republic
677
        Republican City
678
        Self-governed part
679
        Special District
680
        Special Municipality
681
        Special Region
682
        Special administrative region
683
        Special city
684
        Special island authority
685
        Special municipality
686
        Special zone
687
        State
688
        Territorial unit
689
        Territory
690
        Town council
691
        Two-tier county
692
        Union territory
693
        Unitary authority
694
        Unitary authority (England)
695
        Unitary authority (Wales)
696
        district
697
        state
698
        zone
699
700
    This method transform and normalize any of these into Python-friendly IDs.
701
    """
702
    type_id = slugify(subdivision.type)
703
704
    # Any occurence of the 'city' or 'municipality' string in the type
705
    # overrides its classification to a city.
706
    if set(['city', 'municipality']).intersection(type_id.split('_')):
707
        type_id = 'city'
708
709
    return type_id
710
711
712
def subdivision_metadata(subdivision):
713
    """ Return a serialize dict of subdivision metadata.
714
715
    Metadata IDs are derived from subdivision type.
716
    """
717
    subdiv_type_id = subdivision_type_id(subdivision)
718
    metadata = {
719
        '{}'.format(subdiv_type_id): subdivision,
720
        '{}_code'.format(subdiv_type_id): subdivision.code,
721
        '{}_name'.format(subdiv_type_id): subdivision.name,
722
        '{}_type_name'.format(subdiv_type_id): subdivision.type}
723
724
    # Check that we are not producing metadata IDs colliding with address
725
    # fields.
726
    assert not set(metadata).difference(
727
        Address.SUBDIVISION_METADATA_WHITELIST).issubset(
728
            Address.BASE_FIELD_IDS)
729
730
    return metadata
731