Completed
Push — develop ( b22b83...ef57bc )
by A
03:03
created

postal_address.Address.__unicode__()   A

Complexity

Conditions 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 1
dl 0
loc 4
rs 10
1
# -*- coding: utf-8 -*-
2
#
3
# Copyright (c) 2013-2016 Scaleway and Contributors. All Rights Reserved.
4
#                         Kevin Deldycke <[email protected]>
5
#                         Gilles Dartiguelongue <[email protected]>
6
#
7
# Licensed under the BSD 2-Clause License (the "License"); you may not use this
8
# file except in compliance with the License. You may obtain a copy of the
9
# License at http://opensource.org/licenses/BSD-2-Clause
10
11
u""" Utilities for address parsing and rendering.
12
13
Only provides address validation for the moment, but may be used in the future
14
for localized rendering (see issue #4).
15
"""
16
17
from __future__ import (
18
    absolute_import,
19
    division,
20
    print_function,
21
    unicode_literals
22
)
23
24
import re
25
import string
26
import warnings
27
from random import choice, randint
28
29
import faker
30
from boltons.strutils import slugify
31
32
from pycountry import countries, subdivisions
33
34
from .territory import (
35
    country_from_subdivision,
36
    default_subdivision_code,
37
    normalize_territory_code,
38
    territory_children_codes,
39
    territory_parents
40
)
41
42
try:
43
    basestring
44
except NameError:  # pragma: no cover
45
    basestring = (str, bytes)
46
47
48
class InvalidAddress(ValueError):
49
    """ Custom exception providing details about address failing validation.
50
    """
51
52
    def __init__(self, required_fields=None, invalid_fields=None,
53
                 inconsistent_fields=None, extra_msg=None):
54
        """ Exception keep internally a classification of bad fields. """
55
        super(InvalidAddress, self).__init__()
56
        self.required_fields = required_fields if required_fields else set()
57
        self.invalid_fields = invalid_fields if invalid_fields else set()
58
        self.inconsistent_fields = inconsistent_fields if inconsistent_fields \
59
            else set()
60
        self.extra_msg = extra_msg
61
62
    def __str__(self):
63
        """ Human-readable error. """
64
        reasons = []
65
        if self.required_fields:
66
            reasons.append('{} {} required'.format(
67
                ', '.join(sorted(self.required_fields)),
68
                'is' if len(self.required_fields) == 1 else 'are'))
69
        if self.invalid_fields:
70
            reasons.append('{} {} invalid'.format(
71
                ', '.join(sorted(self.invalid_fields)),
72
                'is' if len(self.invalid_fields) == 1 else 'are'))
73
        if self.inconsistent_fields:
74
            for field_id_1, field_id_2 in sorted(self.inconsistent_fields):
75
                reasons.append('{} is inconsistent with {}'.format(
76
                    field_id_1, field_id_2))
77
        if self.extra_msg:
78
            reasons.append(self.extra_msg)
79
        return '{}.'.format('; '.join(reasons))
80
81
82
class Address(object):
83
84
    """ Define a postal address.
85
86
    All addresses share the following fields:
87
    * ``line1`` (required): a non-constrained string.
88
    * ``line2``: a non-constrained string.
89
    * ``postal_code`` (required): a non-constrained string (see issue #2).
90
    * ``city_name`` (required): a non-constrained string.
91
    * ``country_code`` (required): an ISO 3166-1 alpha-2 code.
92
    * ``subdivision_code``: an ISO 3166-2 code.
93
94
    At instanciation, the ``normalize()`` method is called. The latter try to
95
    clean-up the data and populate empty fields that can be derived from
96
    others. As such, ``city_name`` can be overriden by ``subdivision_code``.
97
    See the internal ``SUBDIVISION_METADATA_WHITELIST`` constant.
98
99
    If inconsistencies are found at the normalization step, they are left as-is
100
    to give a chance to the ``validate()`` method to catch them. Which means
101
    that, after each normalization (including the one at initialization), it is
102
    your job to call the ``validate()`` method manually to check that the
103
    address is good.
104
    """
105
106
    # All normalized field's IDs and values of the address are stored here.
107
    # _fields = {}
108
109
    # Fields common to any postal address. Those are free-form fields, allowed
110
    # to be set directly by the user, although their values might be normalized
111
    # and clean-up automatticaly by the validation method.
112
    BASE_FIELD_IDS = frozenset([
113
        'line1', 'line2', 'postal_code', 'city_name', 'country_code',
114
        'subdivision_code'])
115
116
    # List of subdivision-derived metadata IDs which are allowed to collide
117
    # with base field IDs.
118
    SUBDIVISION_METADATA_WHITELIST = frozenset(['city_name'])
119
    assert SUBDIVISION_METADATA_WHITELIST.issubset(BASE_FIELD_IDS)
120
121
    # Fields tested on validate().
122
    REQUIRED_FIELDS = frozenset([
123
        'line1', 'postal_code', 'city_name', 'country_code'])
124
    assert REQUIRED_FIELDS.issubset(BASE_FIELD_IDS)
125
126
    def __init__(self, strict=True, **kwargs):
127
        """ Set address' individual fields and normalize them.
128
129
        By default, normalization is ``strict``.
130
        """
131
        # Only common fields are allowed to be set directly.
132
        unknown_fields = set(kwargs).difference(self.BASE_FIELD_IDS)
133
        if unknown_fields:
134
            raise KeyError(
135
                "{!r} fields are not allowed to be set freely.".format(
136
                    unknown_fields))
137
        # Initialize base fields values.
138
        self._fields = dict.fromkeys(self.BASE_FIELD_IDS)
139
        # Load provided fields.
140
        for field_id, field_value in kwargs.items():
141
            self[field_id] = field_value
142
        # Normalize addresses fields.
143
        self.normalize(strict=strict)
144
145
    def __repr__(self):
146
        """ Print all fields available from the address.
147
148
        Also include internal fields disguised as properties.
149
        """
150
        # Repr all plain fields.
151
        fields_repr = ['{}={!r}'.format(k, v) for k, v in self.items()]
152
        # Repr all internal properties.
153
        for internal_id in [
154
                'valid', 'empty', 'country_name', 'subdivision_name',
155
                'subdivision_type_name', 'subdivision_type_id']:
156
            fields_repr.append(
157
                '{}={!r}'.format(internal_id, getattr(self, internal_id)))
158
        return '{}({})'.format(
159
            self.__class__.__name__, ', '.join(sorted(fields_repr)))
160
161
    def __unicode__(self):
162
        """ Return a simple unicode string representation of the address block.
163
        """
164
        return self.render()
165
166
    def __str__(self):
167
        """ Same as __unicode__ but for Python 2 compatibility. """
168
        return unicode(self).encode('utf-8')
169
170
    def __getattr__(self, name):
171
        """ Expose fields as attributes. """
172
        if name in self._fields:
173
            return self._fields[name]
174
        raise AttributeError
175
176
    def __setattr__(self, name, value):
177
        """ Allow update of address fields as attributes. """
178
        if name in self.BASE_FIELD_IDS:
179
            self[name] = value
180
            return
181
        super(Address, self).__setattr__(name, value)
182
183
    # Let an address be accessed like a dict of its fields IDs & values.
184
    # This is a proxy to the internal _fields dict.
185
186
    def __len__(self):
187
        """ Return the number of fields. """
188
        return len(self._fields)
189
190
    def __getitem__(self, key):
191
        """ Return the value of a field. """
192
        if not isinstance(key, basestring):
193
            raise TypeError
194
        return self._fields[key]
195
196
    def __setitem__(self, key, value):
197
        """ Set a field's value.
198
199
        Only base fields are allowed to be set explicitely.
200
        """
201
        if not isinstance(key, basestring):
202
            raise TypeError
203
        if not (isinstance(value, basestring) or value is None):
204
            raise TypeError
205
        if key not in self.BASE_FIELD_IDS:
206
            raise KeyError
207
        self._fields[key] = value
208
209
    def __delitem__(self, key):
210
        """ Remove a field. """
211
        if key in self.BASE_FIELD_IDS:
212
            self._fields[key] = None
213
        else:
214
            del self._fields[key]
215
216
    def __iter__(self):
217
        """ Iterate over field IDs. """
218
        for field_id in self._fields:
219
            yield field_id
220
221
    def keys(self):
222
        """ Return a list of field IDs. """
223
        return self._fields.keys()
224
225
    def values(self):
226
        """ Return a list of field values. """
227
        return self._fields.values()
228
229
    def items(self):
230
        """ Return a list of field IDs & values. """
231
        return self._fields.items()
232
233
    def render(self, separator='\n'):
234
        """ Render a human-friendly address block.
235
236
        The block is composed of:
237
        * The ``line1`` field rendered as-is if not empty.
238
        * The ``line2`` field rendered as-is if not empty.
239
        * A third line made of the postal code, the city name and state name if
240
          any is set.
241
        * A fourth optionnal line with the subdivision name if its value does
242
          not overlap with the city, state or country name.
243
        * The last line feature country's common name.
244
        """
245
        lines = []
246
247
        if self.line1:
248
            lines.append(self.line1)
249
250
        if self.line2:
251
            lines.append(self.line2)
252
253
        # Build the third line.
254
        line3_elements = []
255
        if self.city_name:
256
            line3_elements.append(self.city_name)
257
        if hasattr(self, 'state_name'):
258
            line3_elements.append(self.state_name)
259
        # Separate city and state by a comma.
260
        line3_elements = [', '.join(line3_elements)]
261
        if self.postal_code:
262
            line3_elements.insert(0, self.postal_code)
263
        # Separate the leading zip code and the rest by a dash.
264
        line3 = ' - '.join(line3_elements)
265
        if line3:
266
            lines.append(line3)
267
268
        # Compare the vanilla subdivision name to properties that are based on
269
        # it and used in the current ``render()`` method to produce a printable
270
        # address. If none overlap, then print an additional line with the
271
        # subdivision name as-is to provide extra, non-redundant, territory
272
        # precision.
273
        subdiv_based_properties = [
274
            'city_name', 'state_name', 'country_name']
275
        subdiv_based_values = [
276
            getattr(self, prop_id) for prop_id in subdiv_based_properties
277
            if hasattr(self, prop_id)]
278
        if self.subdivision_name and \
279
                self.subdivision_name not in subdiv_based_values:
280
            lines.append(self.subdivision_name)
281
282
        # Place the country line at the end.
283
        if self.country_name:
284
            lines.append(self.country_name)
285
286
        # Render the address block with the provided separator.
287
        return separator.join(lines)
288
289
    def normalize(self, strict=True):
290
        """ Normalize address fields.
291
292
        If values are unrecognized or invalid, they will be set to None.
293
294
        By default, the normalization is ``strict``: metadata derived from
295
        territory's parents are not allowed to overwrite valid address fields
296
        entered by the user. If set to ``False``, territory-derived values
297
        takes precedence over user's.
298
299
        You need to call back the ``validate()`` method afterwards to properly
300
        check that the fully-qualified address is ready for consumption.
301
        """
302
        # Strip postal codes of any characters but alphanumerics, spaces and
303
        # hyphens.
304
        if self.postal_code:
305
            self.postal_code = self.postal_code.upper()
306
            # Remove unrecognized characters.
307
            self.postal_code = re.compile(
308
                r'[^A-Z0-9 -]').sub('', self.postal_code)
309
            # Reduce sequences of mixed hyphens and spaces to single hyphen.
310
            self.postal_code = re.compile(
311
                r'[^A-Z0-9]*-+[^A-Z0-9]*').sub('-', self.postal_code)
312
            # Edge case: remove leading and trailing hyphens and spaces.
313
            self.postal_code = self.postal_code.strip('-')
314
315
        # Normalize spaces.
316
        for field_id, field_value in self.items():
317
            if isinstance(field_value, basestring):
318
                self[field_id] = ' '.join(field_value.split())
319
320
        # Reset empty and blank strings.
321
        empty_fields = [f_id for f_id, f_value in self.items() if not f_value]
322
        for field_id in empty_fields:
323
            del self[field_id]
324
325
        # Swap lines if the first is empty.
326
        if self.line2 and not self.line1:
327
            self.line1, self.line2 = self.line2, self.line1
328
329
        # Normalize territory codes. Unrecognized territory codes are reset
330
        # to None.
331
        for territory_id in ['country_code', 'subdivision_code']:
332
            territory_code = getattr(self, territory_id)
333
            if territory_code:
334
                try:
335
                    code = normalize_territory_code(
336
                        territory_code, resolve_aliases=False)
337
                except ValueError:
338
                    code = None
339
                setattr(self, territory_id, code)
340
341
        # Try to set default subdivision from country if not set.
342
        if self.country_code and not self.subdivision_code:
343
            self.subdivision_code = default_subdivision_code(self.country_code)
344
            # If the country set its own subdivision, reset it. It will be
345
            # properly re-guessed below.
346
            if self.subdivision_code:
347
                self.country_code = None
348
349
        # Automaticcaly populate address fields with metadata extracted from
350
        # all subdivision parents.
351
        if self.subdivision_code:
352
            parent_metadata = {
353
                # All subdivisions have a parent country.
354
                'country_code': country_from_subdivision(
355
                    self.subdivision_code)}
356
357
            # Add metadata of each subdivision parent.
358
            for parent_subdiv in territory_parents(
359
                    self.subdivision_code, include_country=False):
360
                parent_metadata.update(subdivision_metadata(parent_subdiv))
361
362
            # Parent metadata are not allowed to overwrite address fields
363
            # if not blank, unless strict mode is de-activated.
364
            if strict:
365
                for field_id, new_value in parent_metadata.items():
366
                    # New metadata are not allowed to be blank.
367
                    assert new_value
368
                    current_value = self._fields.get(field_id)
369
                    if current_value and field_id in self.BASE_FIELD_IDS:
370
371
                        # Build the list of substitute values that are
372
                        # equivalent to our new normalized target.
373
                        alias_values = set([new_value])
374
                        if field_id == 'country_code':
375
                            # Allow normalization if the current country code
376
                            # is the direct parent of a subdivision which also
377
                            # have its own country code.
378
                            alias_values.add(subdivisions.get(
379
                                code=self.subdivision_code).country_code)
380
381
                        # Change of current value is allowed if it is a direct
382
                        # substitute to our new normalized value.
383
                        if current_value not in alias_values:
384
                            raise InvalidAddress(
385
                                inconsistent_fields=set([
386
                                    tuple(sorted((
387
                                        field_id, 'subdivision_code')))]),
388
                                extra_msg="{} subdivision is trying to replace"
389
                                " {}={!r} field by {}={!r}".format(
390
                                    self.subdivision_code,
391
                                    field_id, current_value,
392
                                    field_id, new_value))
393
394
            self._fields.update(parent_metadata)
395
396
    def validate(self):
397
        """ Check fields consistency and requirements in one go.
398
399
        Properly check that fields are consistent between themselves, and only
400
        raise an exception at the end, for the whole address object. Our custom
401
        exception will provide a detailed status of bad fields.
402
        """
403
        # Keep a classification of bad fields along the validation process.
404
        required_fields = set()
405
        invalid_fields = set()
406
        inconsistent_fields = set()
407
408
        # Check that all required fields are set.
409
        for field_id in self.REQUIRED_FIELDS:
410
            if not getattr(self, field_id):
411
                required_fields.add(field_id)
412
413
        # Check all fields for invalidity, only if not previously flagged as
414
        # required.
415
        if 'country_code' not in required_fields:
416
            # Check that the country code exists.
417
            try:
418
                countries.get(alpha2=self.country_code)
419
            except KeyError:
420
                invalid_fields.add('country_code')
421
        if self.subdivision_code and 'subdivision_code' not in required_fields:
422
            # Check that the country code exists.
423
            try:
424
                subdivisions.get(code=self.subdivision_code)
425
            except KeyError:
426
                invalid_fields.add('subdivision_code')
427
428
        # Check country consistency against subdivision, only if none of the
429
        # two fields were previously flagged as required or invalid.
430
        if self.subdivision_code and not set(
431
                ['country_code', 'subdivision_code']).intersection(
432
                    required_fields.union(invalid_fields)) and \
433
                country_from_subdivision(
434
                    self.subdivision_code) != self.country_code:
435
            inconsistent_fields.add(
436
                tuple(sorted(('country_code', 'subdivision_code'))))
437
438
        # Raise our custom exception at last.
439
        if required_fields or invalid_fields or inconsistent_fields:
440
            raise InvalidAddress(
441
                required_fields, invalid_fields, inconsistent_fields)
442
443
    @property
444
    def valid(self):
445
        """ Return a boolean indicating if the address is valid. """
446
        try:
447
            self.validate()
448
        except InvalidAddress:
449
            return False
450
        return True
451
452
    @property
453
    def empty(self):
454
        """ Return True only if all fields are empty. """
455
        for value in set(self.values()):
456
            if value:
457
                return False
458
        return True
459
460
    def __bool__(self):
461
        """ Consider the instance to be True if not empty. """
462
        return not self.empty
463
464
    def __nonzero__(self):
465
        """ Python2 retro-compatibility of ``__bool__()``. """
466
        return self.__bool__()
467
468
    @property
469
    def country(self):
470
        """ Return country object. """
471
        if self.country_code:
472
            return countries.get(alpha2=self.country_code)
473
        return None
474
475
    @property
476
    def country_name(self):
477
        """ Return country's name.
478
479
        Common name always takes precedence over the default name, as the
480
        latter isoften pompous, and sometimes false (i.e. not in sync with
481
        current political situation).
482
        """
483
        if self.country:
484
            if hasattr(self.country, 'common_name'):
485
                return self.country.common_name
486
            return self.country.name
487
        return None
488
489
    @property
490
    def subdivision(self):
491
        """ Return subdivision object. """
492
        if self.subdivision_code:
493
            return subdivisions.get(code=self.subdivision_code)
494
        return None
495
496
    @property
497
    def subdivision_name(self):
498
        """ Return subdivision's name. """
499
        if self.subdivision:
500
            return self.subdivision.name
501
        return None
502
503
    @property
504
    def subdivision_type_name(self):
505
        """ Return subdivision's type human-readable name. """
506
        if self.subdivision:
507
            return self.subdivision.type
508
        return None
509
510
    @property
511
    def subdivision_type_id(self):
512
        """ Return subdivision's type as a Python-friendly ID string. """
513
        if self.subdivision:
514
            return subdivision_type_id(self.subdivision)
515
        return None
516
517
518
# Address utils.
519
520
def random_word(word_lenght=8):
521
    """ Return a readable random string.
522
523
    Source:
524
    http://code.activestate.com/recipes/526619-friendly-readable-id-strings/#c3
525
526
    .. deprecated:: 0.10.0
527
528
       Use faker package instead.
529
    """
530
    warnings.warn('Use faker package instead.', DeprecationWarning)
531
    return ''.join([choice(
532
        'aeiou' if i % 2 else 'bcdfghklmnprstvw') for i in range(word_lenght)])
533
534
535
def random_phrase(word_count=4, min_word_lenght=2, max_word_lenght=10):
536
    """ Return a readable random phrase.
537
538
    Source:
539
    http://code.activestate.com/recipes/526619-friendly-readable-id-strings/#c3
540
541
    .. deprecated:: 0.10.0
542
543
       Use faker package instead.
544
    """
545
    warnings.warn('Use faker package instead.', DeprecationWarning)
546
    return ' '.join([random_word(randint(
547
        min_word_lenght, max_word_lenght)) for _ in range(word_count)])
548
549
550
def random_postal_code():
551
    """ Return a parsable random postal code.
552
553
    .. deprecated:: 0.10.0
554
555
       Use faker package instead.
556
    """
557
    warnings.warn('Use faker package instead.', DeprecationWarning)
558
    return ''.join([
559
        choice(string.ascii_uppercase + string.digits + '- ')
560
        for _ in range(randint(4, 10))])
561
562
563
def random_address(locale=None):
564
    """ Return a random, valid address.
565
566
    A ``locale`` parameter try to produce a localized-consistent address. Else
567
    a random locale is picked-up.
568
    """
569
    # Exclude temporaryly the chinese locale, while we waiting for a new faker
570
    # release. See: https://github.com/joke2k/faker/pull/329
571
    while locale in [None, 'cn']:
572
        locale = faker.providers.misc.Provider.language_code()
573
    fake = faker.Faker(locale=locale)
574
575
    components = {
576
        'line1': fake.street_address(),
577
        'line2': fake.sentence(),
578
        'postal_code': fake.postcode(),
579
        'city_name': fake.city(),
580
        'country_code': fake.country_code()}
581
    subdiv_codes = list(territory_children_codes(components['country_code']))
582
    if subdiv_codes:
583
        components['subdivision_code'] = choice(subdiv_codes)
584
585
    return Address(strict=False, **components)
586
587
588
# Subdivisions utils.
589
590
def subdivision_type_id(subdivision):
591
    """ Normalize subdivision type name into a Python-friendly ID.
592
593
    Here is the list of all subdivision types defined by ``pycountry`` v1.8::
594
595
        >>> print '\n'.join(sorted(set([x.type for x in subdivisions])))
596
        Administration
597
        Administrative Region
598
        Administrative Territory
599
        Administrative atoll
600
        Administrative region
601
        Arctic Region
602
        Area
603
        Autonomous City
604
        Autonomous District
605
        Autonomous Province
606
        Autonomous Region
607
        Autonomous city
608
        Autonomous community
609
        Autonomous municipality
610
        Autonomous province
611
        Autonomous region
612
        Autonomous republic
613
        Autonomous sector
614
        Autonomous territorial unit
615
        Borough
616
        Canton
617
        Capital District
618
        Capital Metropolitan City
619
        Capital Territory
620
        Capital city
621
        Capital district
622
        Capital territory
623
        Chains (of islands)
624
        City
625
        City corporation
626
        City with county rights
627
        Commune
628
        Constitutional province
629
        Council area
630
        Country
631
        County
632
        Department
633
        Dependency
634
        Development region
635
        District
636
        District council area
637
        Division
638
        Economic Prefecture
639
        Economic region
640
        Emirate
641
        Entity
642
        Federal Dependency
643
        Federal District
644
        Federal Territories
645
        Federal district
646
        Geographical Entity
647
        Geographical region
648
        Geographical unit
649
        Governorate
650
        Included for completeness
651
        Indigenous region
652
        Island
653
        Island council
654
        Island group
655
        Local council
656
        London borough
657
        Metropolitan cities
658
        Metropolitan department
659
        Metropolitan district
660
        Metropolitan region
661
        Municipalities
662
        Municipality
663
        Oblast
664
        Outlying area
665
        Overseas region/department
666
        Overseas territorial collectivity
667
        Parish
668
        Popularates
669
        Prefecture
670
        Province
671
        Quarter
672
        Rayon
673
        Region
674
        Regional council
675
        Republic
676
        Republican City
677
        Self-governed part
678
        Special District
679
        Special Municipality
680
        Special Region
681
        Special administrative region
682
        Special city
683
        Special island authority
684
        Special municipality
685
        Special zone
686
        State
687
        Territorial unit
688
        Territory
689
        Town council
690
        Two-tier county
691
        Union territory
692
        Unitary authority
693
        Unitary authority (England)
694
        Unitary authority (Wales)
695
        district
696
        state
697
        zone
698
699
    This method transform and normalize any of these into Python-friendly IDs.
700
    """
701
    type_id = slugify(subdivision.type)
702
703
    # Any occurence of the 'city' or 'municipality' string in the type
704
    # overrides its classification to a city.
705
    if set(['city', 'municipality']).intersection(type_id.split('_')):
706
        type_id = 'city'
707
708
    return type_id
709
710
711
def subdivision_metadata(subdivision):
712
    """ Return a serialize dict of subdivision metadata.
713
714
    Metadata IDs are derived from subdivision type.
715
    """
716
    subdiv_type_id = subdivision_type_id(subdivision)
717
    metadata = {
718
        '{}'.format(subdiv_type_id): subdivision,
719
        '{}_code'.format(subdiv_type_id): subdivision.code,
720
        '{}_name'.format(subdiv_type_id): subdivision.name,
721
        '{}_type_name'.format(subdiv_type_id): subdivision.type}
722
723
    # Check that we are not producing metadata IDs colliding with address
724
    # fields.
725
    assert not set(metadata).difference(
726
        Address.SUBDIVISION_METADATA_WHITELIST).issubset(
727
            Address.BASE_FIELD_IDS)
728
729
    return metadata
730