Completed
Pull Request — develop (#22)
by
unknown
01:58
created

Address.valid_subdivision_country()   A

Complexity

Conditions 2

Size

Total Lines 11

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
c 0
b 0
f 0
dl 0
loc 11
rs 9.85
1
# -*- coding: utf-8 -*-
2
#
3
# Copyright (c) 2013-2017 Scaleway and Contributors. All Rights Reserved.
4
#                         Kevin Deldycke <[email protected]>
5
#                         Gilles Dartiguelongue <[email protected]>
6
#
7
# Licensed under the BSD 2-Clause License (the "License"); you may not use this
8
# file except in compliance with the License. You may obtain a copy of the
9
# License at http://opensource.org/licenses/BSD-2-Clause
10
11
u""" Utilities for address parsing and rendering.
12
13
Only provides address validation for the moment, but may be used in the future
14
for localized rendering (see issue #4).
15
"""
16
17
from __future__ import (
18
    absolute_import,
19
    division,
20
    print_function,
21
    unicode_literals
22
)
23
24
import random
25
import re
26
27
import faker
28
from boltons.strutils import slugify
29
from pycountry import countries, subdivisions
30
31
from . import PY2, PY3
32
from .territory import (
33
    country_from_subdivision,
34
    default_subdivision_code,
35
    normalize_territory_code,
36
    territory_children_codes,
37
    territory_parents
38
)
39
40
if PY3:
41
    basestring = (str, bytes)
42
43
44
class InvalidAddress(ValueError):
45
    """ Custom exception providing details about address failing validation.
46
    """
47
48
    def __init__(self, required_fields=None, invalid_fields=None,
49
                 inconsistent_fields=None, extra_msg=None):
50
        """ Exception keep internally a classification of bad fields. """
51
        super(InvalidAddress, self).__init__()
52
        self.required_fields = required_fields if required_fields else set()
53
        self.invalid_fields = invalid_fields if invalid_fields else dict()
54
        self.inconsistent_fields = inconsistent_fields if inconsistent_fields \
55
            else set()
56
        self.extra_msg = extra_msg
57
58
    def __str__(self):
59
        """ Human-readable error. """
60
        reasons = []
61
        if self.required_fields:
62
            reasons.append('{} {} required'.format(
63
                ', '.join(sorted(self.required_fields)),
64
                'is' if len(self.required_fields) == 1 else 'are'))
65
        if self.invalid_fields:
66
            reasons.append('{} {} invalid'.format(
67
                ', '.join(sorted([
68
                    '{}={!r}'.format(k, v)
69
                    for k, v in self.invalid_fields.items()])),
70
                'is' if len(self.invalid_fields) == 1 else 'are'))
71
        if self.inconsistent_fields:
72
            for field_id_1, field_id_2 in sorted(self.inconsistent_fields):
73
                reasons.append('{} is inconsistent with {}'.format(
74
                    field_id_1, field_id_2))
75
        if self.extra_msg:
76
            reasons.append(self.extra_msg)
77
        return '{}.'.format('; '.join(reasons))
78
79
80
class Address(object):
81
82
    """ Define a postal address.
83
84
    All addresses share the following fields:
85
    * ``line1`` (required): a non-constrained string.
86
    * ``line2``: a non-constrained string.
87
    * ``postal_code`` (required): a non-constrained string (see issue #2).
88
    * ``city_name`` (required): a non-constrained string.
89
    * ``country_code`` (required): an ISO 3166-1 alpha-2 code.
90
    * ``subdivision_code``: an ISO 3166-2 code.
91
92
    At instanciation, the ``normalize()`` method is called. The latter try to
93
    clean-up the data and populate empty fields that can be derived from
94
    others. As such, ``city_name`` can be overriden by ``subdivision_code``.
95
    See the internal ``SUBDIVISION_METADATA_WHITELIST`` constant.
96
97
    If inconsistencies are found at the normalization step, they are left as-is
98
    to give a chance to the ``validate()`` method to catch them. Which means
99
    that, after each normalization (including the one at initialization), it is
100
    your job to call the ``validate()`` method manually to check that the
101
    address is good.
102
    """
103
104
    # Fields common to any postal address. Those are free-form fields, allowed
105
    # to be set directly by the user, although their values might be normalized
106
    # and clean-up automatticaly by the validation method.
107
    BASE_FIELD_IDS = frozenset([
108
        'line1', 'line2', 'postal_code', 'city_name', 'country_code',
109
        'subdivision_code'])
110
111
    # List of subdivision-derived metadata IDs which are allowed to collide
112
    # with base field IDs.
113
    SUBDIVISION_METADATA_WHITELIST = frozenset(['city_name'])
114
    assert SUBDIVISION_METADATA_WHITELIST.issubset(BASE_FIELD_IDS)
115
116
    # Fields tested on validate().
117
    REQUIRED_FIELDS = frozenset([
118
        'line1', 'postal_code', 'city_name', 'country_code'])
119
    assert REQUIRED_FIELDS.issubset(BASE_FIELD_IDS)
120
121
    def __init__(self, strict=True, **kwargs):
122
        """ Set address' individual fields and normalize them.
123
124
        By default, normalization is ``strict``.
125
        """
126
        # Only common fields are allowed to be set directly.
127
        unknown_fields = set(kwargs).difference(self.BASE_FIELD_IDS)
128
        if unknown_fields:
129
            raise KeyError(
130
                "{!r} fields are not allowed to be set freely.".format(
131
                    unknown_fields))
132
133
        # Normalized field's IDs and values of the address are stored here.
134
        self._fields = dict.fromkeys(self.BASE_FIELD_IDS)
135
136
        # Load provided fields.
137
        for field_id, field_value in kwargs.items():
138
            self[field_id] = field_value
139
140
        # Normalize addresses fields.
141
        self.normalize(strict=strict)
142
143
    def __repr__(self):
144
        """ Print all fields available from the address.
145
146
        Also include internal fields disguised as properties.
147
        """
148
        # Repr all plain fields.
149
        fields_repr = ['{}={!r}'.format(k, v) for k, v in self.items()]
150
        # Repr all internal properties.
151
        for internal_id in [
152
                'valid', 'empty', 'country_name', 'subdivision_name',
153
                'subdivision_type_name', 'subdivision_type_id']:
154
            fields_repr.append(
155
                '{}={!r}'.format(internal_id, getattr(self, internal_id)))
156
        return '{}({})'.format(
157
            self.__class__.__name__, ', '.join(sorted(fields_repr)))
158
159
    def __unicode__(self):
160
        """ Return a simple unicode string representation of the address block.
161
        """
162
        return self.render()
163
164
    def __str__(self):
165
        """ Same as __unicode__ but with Python 2 compatibility. """
166
        string = self.__unicode__()
167
        if PY2:
168
            string = string.encode('utf-8')
169
        return string
170
171
    def __getattr__(self, name):
172
        """ Expose fields as attributes. """
173
        if name in self._fields:
174
            return self._fields[name]
175
        raise AttributeError
176
177
    def __setattr__(self, name, value):
178
        """ Allow update of address fields as attributes. """
179
        if name in self.BASE_FIELD_IDS:
180
            self[name] = value
181
            return
182
        super(Address, self).__setattr__(name, value)
183
184
    # Let an address be accessed like a dict of its fields IDs & values.
185
    # This is a proxy to the internal _fields dict.
186
187
    def __len__(self):
188
        """ Return the number of fields. """
189
        return len(self._fields)
190
191
    def __getitem__(self, key):
192
        """ Return the value of a field. """
193
        if not isinstance(key, basestring):
194
            raise TypeError
195
        return self._fields[key]
196
197
    def __setitem__(self, key, value):
198
        """ Set a field's value.
199
200
        Only base fields are allowed to be set explicitely.
201
        """
202
        if not isinstance(key, basestring):
203
            raise TypeError
204
        if not (isinstance(value, basestring) or value is None):
205
            raise TypeError
206
        if key not in self.BASE_FIELD_IDS:
207
            raise KeyError
208
        self._fields[key] = value
209
210
    def __delitem__(self, key):
211
        """ Remove a field. """
212
        if key in self.BASE_FIELD_IDS:
213
            self._fields[key] = None
214
        else:
215
            del self._fields[key]
216
217
    def __iter__(self):
218
        """ Iterate over field IDs. """
219
        for field_id in self._fields:
220
            yield field_id
221
222
    def keys(self):
223
        """ Return a list of field IDs. """
224
        return self._fields.keys()
225
226
    def values(self):
227
        """ Return a list of field values. """
228
        return self._fields.values()
229
230
    def items(self):
231
        """ Return a list of field IDs & values. """
232
        return self._fields.items()
233
234
    def render(self, separator='\n'):
235
        """ Render a human-friendly address block.
236
237
        The block is composed of:
238
        * The ``line1`` field rendered as-is if not empty.
239
        * The ``line2`` field rendered as-is if not empty.
240
        * A third line made of the postal code, the city name and state name if
241
          any is set.
242
        * A fourth optionnal line with the subdivision name if its value does
243
          not overlap with the city, state or country name.
244
        * The last line feature country's common name.
245
        """
246
        lines = []
247
248
        if self.line1:
249
            lines.append(self.line1)
250
251
        if self.line2:
252
            lines.append(self.line2)
253
254
        # Build the third line.
255
        line3_elements = []
256
        if self.city_name:
257
            line3_elements.append(self.city_name)
258
        if hasattr(self, 'state_name'):
259
            # XXX It might not be a good idea to deduplicate state and city.
260
            # See: https://en.wikipedia.org/wiki
261
            # /List_of_U.S._cities_named_after_their_state
262
            line3_elements.append(self.state_name)
263
        # Separate city and state by a comma.
264
        line3_elements = [', '.join(line3_elements)]
265
        if self.postal_code:
266
            line3_elements.insert(0, self.postal_code)
267
        # Separate the leading zip code and the rest by a dash.
268
        line3 = ' - '.join(line3_elements)
269
        if line3:
270
            lines.append(line3)
271
272
        # Compare the vanilla subdivision name to properties that are based on
273
        # it and used in the current ``render()`` method to produce a printable
274
        # address. If none overlap, then print an additional line with the
275
        # subdivision name as-is to provide extra, non-redundant, territory
276
        # precision.
277
        subdiv_based_properties = [
278
            'city_name', 'state_name', 'country_name']
279
        subdiv_based_values = [
280
            getattr(self, prop_id) for prop_id in subdiv_based_properties
281
            if hasattr(self, prop_id)]
282
        if self.subdivision_name and \
283
                self.subdivision_name not in subdiv_based_values:
284
            lines.append(self.subdivision_name)
285
286
        # Place the country line at the end.
287
        if self.country_name:
288
            lines.append(self.country_name)
289
290
        # Render the address block with the provided separator.
291
        return separator.join(lines)
292
293
    def normalize(self, strict=True):
294
        """ Normalize address fields.
295
296
        If values are unrecognized or invalid, they will be set to None.
297
298
        By default, the normalization is ``strict``: metadata derived from
299
        territory's parents are not allowed to overwrite valid address fields
300
        entered by the user. If set to ``False``, territory-derived values
301
        takes precedence over user's.
302
303
        You need to call back the ``validate()`` method afterwards to properly
304
        check that the fully-qualified address is ready for consumption.
305
        """
306
        # Strip postal codes of any characters but alphanumerics, spaces and
307
        # hyphens.
308
        if self.postal_code:
309
            self.postal_code = self.postal_code.upper()
310
            # Remove unrecognized characters.
311
            self.postal_code = re.compile(
312
                r'[^A-Z0-9 -]').sub('', self.postal_code)
313
            # Reduce sequences of mixed hyphens and spaces to single hyphen.
314
            self.postal_code = re.compile(
315
                r'[^A-Z0-9]*-+[^A-Z0-9]*').sub('-', self.postal_code)
316
            # Edge case: remove leading and trailing hyphens and spaces.
317
            self.postal_code = self.postal_code.strip('-')
318
319
        # Normalize spaces.
320
        for field_id, field_value in self.items():
321
            if isinstance(field_value, basestring):
322
                try:
323
                    self[field_id] = ' '.join(field_value.split())
324
                except KeyError:
325
                    # Invalid field_id, usually all the 'subdivision_metadata'
326
                    pass
327
328
        # Reset empty and blank strings.
329
        empty_fields = [f_id for f_id, f_value in self.items() if not f_value]
330
        for field_id in empty_fields:
331
            del self[field_id]
332
333
        # Swap lines if the first is empty.
334
        if self.line2 and not self.line1:
335
            self.line1, self.line2 = self.line2, self.line1
336
337
        # Normalize territory codes. Unrecognized territory codes are reset
338
        # to None.
339
        for territory_id in ['country_code', 'subdivision_code']:
340
            territory_code = getattr(self, territory_id)
341
            if territory_code:
342
                try:
343
                    code = normalize_territory_code(
344
                        territory_code, resolve_aliases=False)
345
                except ValueError:
346
                    code = None
347
                setattr(self, territory_id, code)
348
349
        # Try to set default subdivision from country if not set.
350
        if self.country_code and not self.subdivision_code:
351
            self.subdivision_code = default_subdivision_code(self.country_code)
352
            # If the country set its own subdivision, reset it. It will be
353
            # properly re-guessed below.
354
            if self.subdivision_code:
355
                self.country_code = None
356
357
        # Automatically populate address fields with metadata extracted from
358
        # all subdivision parents.
359
        if self.subdivision_code:
360
            parent_metadata = {
361
                # All subdivisions have a parent country.
362
                'country_code': country_from_subdivision(
363
                    self.subdivision_code)}
364
365
            # Add metadata of each subdivision parent.
366
            for parent_subdiv in territory_parents(
367
                    self.subdivision_code, include_country=False):
368
                parent_metadata.update(subdivision_metadata(parent_subdiv))
369
370
            # Parent metadata are not allowed to overwrite address fields
371
            # if not blank, unless strict mode is de-activated.
372
            if strict:
373
                for field_id, new_value in parent_metadata.items():
374
                    # New metadata are not allowed to be blank.
375
                    assert new_value
376
                    current_value = self._fields.get(field_id)
377
                    if current_value and field_id in self.BASE_FIELD_IDS:
378
379
                        # Build the list of substitute values that are
380
                        # equivalent to our new normalized target.
381
                        alias_values = {new_value}
382
                        if field_id == 'country_code':
383
                            # Allow normalization if the current country code
384
                            # is the direct parent of a subdivision which also
385
                            # have its own country code.
386
                            alias_values.add(subdivisions.get(
387
                                code=self.subdivision_code).country_code)
388
389
                        # Change of current value is allowed if it is a direct
390
                        # substitute to our new normalized value.
391
                        if current_value not in alias_values:
392
                            raise InvalidAddress(
393
                                inconsistent_fields={tuple(sorted((
394
                                    field_id, 'subdivision_code')))},
395
                                extra_msg="{} subdivision is trying to replace"
396
                                " {}={!r} field by {}={!r}".format(
397
                                    self.subdivision_code,
398
                                    field_id, current_value,
399
                                    field_id, new_value))
400
401
            self._fields.update(parent_metadata)
402
403
    def validate(self):
404
        """ Check fields consistency and requirements in one go.
405
406
        Properly check that fields are consistent between themselves, and only
407
        raise an exception at the end, for the whole address object. Our custom
408
        exception will provide a detailed status of bad fields.
409
        """
410
411
        required_fields = self.check_required_fields()
412
        invalid_fields = self.check_invalid_fields(required_fields)
413
        inconsistent_fields = self.check_inconsistent_fields(required_fields,
414
                                                             invalid_fields)
415
416
        # Raise our custom exception if any value is wrong.
417
        if required_fields or invalid_fields or inconsistent_fields:
418
            raise InvalidAddress(
419
                required_fields, invalid_fields, inconsistent_fields)
420
421
    def check_required_fields(self):
422
        """Check that all required fields are set.
423
424
        :return: The set of unset thus required fields.
425
        """
426
        required_fields = set()
427
        for field_id in self.REQUIRED_FIELDS:
428
            if not getattr(self, field_id):
429
                required_fields.add(field_id)
430
        return required_fields
431
432
    def check_invalid_fields(self, required_fields):
433
        """Check all fields for invalidity, only if not previously flagged as
434
        required.
435
436
        :param required_fields:
437
        :return:
438
        """
439
        invalid_fields = dict()
440
        if 'country_code' not in required_fields:
441
            # Check that the country code exists.
442
            try:
443
                countries.get(alpha_2=self.country_code)
444
            except KeyError:
445
                invalid_fields['country_code'] = self.country_code
446
447
        if self.subdivision_code and 'subdivision_code' not in required_fields:
448
            # Check that the country code exists.
449
            try:
450
                subdivisions.get(code=self.subdivision_code)
451
            except KeyError:
452
                invalid_fields['subdivision_code'] = self.subdivision_code
453
        return invalid_fields
454
455
    def check_inconsistent_fields(self, required_fields, invalid_fields):
456
        """Check country consistency against subdivision, only if none of the
457
         two fields were previously flagged as required or invalid.
458
459
        :param required_fields: The set of missing required fields.
460
        :param invalid_fields: The set of invalid fields.
461
        :return:
462
        """
463
        inconsistent_fields = set()
464
        any_wrong_field = required_fields.union(invalid_fields)
465
        consistency_fields = {'country_code', 'subdivision_code'}
466
        inconsistency = consistency_fields.intersection(any_wrong_field)
467
        if not inconsistency and not self.valid_subdivision_country():
468
            inconsistent_fields.add(tuple(sorted(consistency_fields)))
469
        return inconsistent_fields
470
471
    def valid_subdivision_country(self):
472
        """Validates that the country attached to the subdivision is
473
        the same as the Address country_code.
474
475
        :return: True if the subdivision country is the same as the country,
476
        False otherwise.
477
        """
478
        if not self.subdivision_code:
479
            return True
480
        inferred_country = country_from_subdivision(self.subdivision_code)
481
        return inferred_country == self.country_code
482
483
    @property
484
    def valid(self):
485
        """ Return a boolean indicating if the address is valid. """
486
        try:
487
            self.validate()
488
        except InvalidAddress:
489
            return False
490
        return True
491
492
    @property
493
    def empty(self):
494
        """ Return True only if all fields are empty. """
495
        for value in set(self.values()):
496
            if value:
497
                return False
498
        return True
499
500
    def __bool__(self):
501
        """ Consider the instance to be True if not empty. """
502
        return not self.empty
503
504
    def __nonzero__(self):
505
        """ Python2 retro-compatibility of ``__bool__()``. """
506
        return self.__bool__()
507
508
    @property
509
    def country(self):
510
        """ Return country object. """
511
        if self.country_code:
512
            return countries.get(alpha_2=self.country_code)
513
        return None
514
515
    @property
516
    def country_name(self):
517
        """ Return country's name.
518
519
        Common name always takes precedence over the default name, as the
520
        latter isoften pompous, and sometimes false (i.e. not in sync with
521
        current political situation).
522
        """
523
        if self.country:
524
            if hasattr(self.country, 'common_name'):
525
                return self.country.common_name
526
            return self.country.name
527
        return None
528
529
    @property
530
    def subdivision(self):
531
        """ Return subdivision object. """
532
        if self.subdivision_code:
533
            return subdivisions.get(code=self.subdivision_code)
534
        return None
535
536
    @property
537
    def subdivision_name(self):
538
        """ Return subdivision's name. """
539
        if self.subdivision:
540
            return self.subdivision.name
541
        return None
542
543
    @property
544
    def subdivision_type_name(self):
545
        """ Return subdivision's type human-readable name. """
546
        if self.subdivision:
547
            return self.subdivision.type
548
        return None
549
550
    @property
551
    def subdivision_type_id(self):
552
        """ Return subdivision's type as a Python-friendly ID string. """
553
        if self.subdivision:
554
            return subdivision_type_id(self.subdivision)
555
        return None
556
557
558
# Address utils.
559
560
def random_address(locale=None):
561
    """ Return a random, valid address.
562
563
    A ``locale`` parameter try to produce a localized-consistent address. Else
564
    a random locale is picked-up.
565
    """
566
    # XXX Exclude 'ar_PS' that doesn't work currently (it's defined in Faker
567
    # but not in pycountry).
568
    # See: https://github.com/scaleway/postal-address/issues/20
569
    while locale in [None, 'ar_PS']:
570
        locale = random.choice(list(faker.config.AVAILABLE_LOCALES))
571
    fake = faker.Faker(locale=locale)
572
573
    components = {
574
        'line1': fake.street_address(),
575
        'line2': fake.sentence(),
576
        'postal_code': fake.postcode(),
577
        'city_name': fake.city(),
578
        'country_code': fake.country_code()}
579
580
    subdiv_codes = list(territory_children_codes(components['country_code']))
581
    if subdiv_codes:
582
        components['subdivision_code'] = random.choice(subdiv_codes)
583
584
    return Address(strict=False, **components)
585
586
587
# Subdivisions utils.
588
589
def subdivision_type_id(subdivision):
590
    """ Normalize subdivision type name into a Python-friendly ID.
591
592
    Here is the list of all subdivision types defined by ``pycountry`` v1.8::
593
594
        >>> print '\n'.join(sorted(set([x.type for x in subdivisions])))
595
        Administration
596
        Administrative Region
597
        Administrative Territory
598
        Administrative atoll
599
        Administrative region
600
        Arctic Region
601
        Area
602
        Autonomous City
603
        Autonomous District
604
        Autonomous Province
605
        Autonomous Region
606
        Autonomous city
607
        Autonomous community
608
        Autonomous municipality
609
        Autonomous province
610
        Autonomous region
611
        Autonomous republic
612
        Autonomous sector
613
        Autonomous territorial unit
614
        Borough
615
        Canton
616
        Capital District
617
        Capital Metropolitan City
618
        Capital Territory
619
        Capital city
620
        Capital district
621
        Capital territory
622
        Chains (of islands)
623
        City
624
        City corporation
625
        City with county rights
626
        Commune
627
        Constitutional province
628
        Council area
629
        Country
630
        County
631
        Department
632
        Dependency
633
        Development region
634
        District
635
        District council area
636
        Division
637
        Economic Prefecture
638
        Economic region
639
        Emirate
640
        Entity
641
        Federal Dependency
642
        Federal District
643
        Federal Territories
644
        Federal district
645
        Geographical Entity
646
        Geographical region
647
        Geographical unit
648
        Governorate
649
        Included for completeness
650
        Indigenous region
651
        Island
652
        Island council
653
        Island group
654
        Local council
655
        London borough
656
        Metropolitan cities
657
        Metropolitan department
658
        Metropolitan district
659
        Metropolitan region
660
        Municipalities
661
        Municipality
662
        Oblast
663
        Outlying area
664
        Overseas region/department
665
        Overseas territorial collectivity
666
        Parish
667
        Popularates
668
        Prefecture
669
        Province
670
        Quarter
671
        Rayon
672
        Region
673
        Regional council
674
        Republic
675
        Republican City
676
        Self-governed part
677
        Special District
678
        Special Municipality
679
        Special Region
680
        Special administrative region
681
        Special city
682
        Special island authority
683
        Special municipality
684
        Special zone
685
        State
686
        Territorial unit
687
        Territory
688
        Town council
689
        Two-tier county
690
        Union territory
691
        Unitary authority
692
        Unitary authority (England)
693
        Unitary authority (Wales)
694
        district
695
        state
696
        zone
697
698
    This method transform and normalize any of these into Python-friendly IDs.
699
    """
700
    type_id = slugify(subdivision.type)
701
702
    # Any occurence of the 'city' or 'municipality' string in the type
703
    # overrides its classification to a city.
704
    if set(['city', 'municipality']).intersection(type_id.split('_')):
705
        type_id = 'city'
706
707
    return type_id
708
709
710
def subdivision_metadata(subdivision):
711
    """ Return a serialize dict of subdivision metadata.
712
713
    Metadata IDs are derived from subdivision type.
714
    """
715
    subdiv_type_id = subdivision_type_id(subdivision)
716
    metadata = {
717
        '{}'.format(subdiv_type_id): subdivision,
718
        # Rename code to slug to avoid overriding 'country_code' in some cases
719
        # See https://github.com/scaleway/postal-address/issues/16
720
        '{}_slug'.format(subdiv_type_id): subdivision.code,
721
        '{}_name'.format(subdiv_type_id): subdivision.name,
722
        '{}_type_name'.format(subdiv_type_id): subdivision.type}
723
724
    # Check that we are not producing metadata IDs colliding with address
725
    # fields.
726
    assert not set(metadata).difference(
727
        Address.SUBDIVISION_METADATA_WHITELIST).issubset(
728
            Address.BASE_FIELD_IDS)
729
730
    return metadata
731