Test Failed
Push — master ( 64abe2...a464fa )
by Chris
04:02 queued 11s
created

abydos.phonetic.bmpm._bm_redo_language()   A

Complexity

Conditions 1

Size

Total Lines 19
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 5
nop 6
dl 0
loc 19
rs 10
c 0
b 0
f 0
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# This file is based on Alexander Beider and Stephen P. Morse's implementation
7
# of the Beider-Morse Phonetic Matching (BMPM) System, available at
8
# http://stevemorse.org/phonetics/bmpm.htm.
9
#
10
# Abydos is free software: you can redistribute it and/or modify
11
# it under the terms of the GNU General Public License as published by
12
# the Free Software Foundation, either version 3 of the License, or
13
# (at your option) any later version.
14
#
15
# Abydos is distributed in the hope that it will be useful,
16
# but WITHOUT ANY WARRANTY; without even the implied warranty of
17
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
# GNU General Public License for more details.
19
#
20
# You should have received a copy of the GNU General Public License
21
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
22
23
"""abydos.phonetic.bmpm.
24
25
The phonetic.bmpm module implements the Beider-Morse Phonentic Matching (BMPM)
26
algorithm.
27
"""
28
29
from __future__ import unicode_literals
30
31
from re import search
32
from unicodedata import normalize
33
34
from six import PY3, text_type
35
from six.moves import range
36
37
from ._bmdata import BMDATA, L_ANY, L_ARABIC, L_CYRILLIC, L_CZECH, L_DUTCH, \
38
    L_ENGLISH, L_FRENCH, L_GERMAN, L_GREEK, L_GREEKLATIN, L_HEBREW, \
39
    L_HUNGARIAN, L_ITALIAN, L_LATVIAN, L_NONE, L_POLISH, L_PORTUGUESE, \
40
    L_ROMANIAN, L_RUSSIAN, L_SPANISH, L_TURKISH
41
42
if PY3:
43
    long = int
44
45
_LANG_DICT = {'any': L_ANY, 'arabic': L_ARABIC, 'cyrillic': L_CYRILLIC,
46
              'czech': L_CZECH, 'dutch': L_DUTCH, 'english': L_ENGLISH,
47
              'french': L_FRENCH, 'german': L_GERMAN, 'greek': L_GREEK,
48
              'greeklatin': L_GREEKLATIN, 'hebrew': L_HEBREW,
49
              'hungarian': L_HUNGARIAN, 'italian': L_ITALIAN,
50
              'latvian': L_LATVIAN, 'polish': L_POLISH,
51
              'portuguese': L_PORTUGUESE, 'romanian': L_ROMANIAN,
52
              'russian': L_RUSSIAN, 'spanish': L_SPANISH, 'turkish': L_TURKISH}
53
54
BMDATA['gen']['discards'] = {'da ', 'dal ', 'de ', 'del ', 'dela ', 'de la ',
55
                             'della ', 'des ', 'di ', 'do ', 'dos ', 'du ',
56
                             'van ', 'von ', 'd\''}
57
BMDATA['sep']['discards'] = {'al', 'el', 'da', 'dal', 'de', 'del', 'dela',
58
                             'de la', 'della', 'des', 'di', 'do', 'dos', 'du',
59
                             'van', 'von'}
60
BMDATA['ash']['discards'] = {'bar', 'ben', 'da', 'de', 'van', 'von'}
61
62
# format of rules array
63
_PATTERN_POS = 0
64
_LCONTEXT_POS = 1
65
_RCONTEXT_POS = 2
66
_PHONETIC_POS = 3
67
68
69
def _bm_language(name, name_mode):
70
    """Return the best guess language ID for the word and language choices.
71
72
    :param str name: the term to guess the language of
73
    :param str name_mode: the name mode of the algorithm: 'gen' (default),
74
                'ash' (Ashkenazi), or 'sep' (Sephardic)
75
    """
76
    name = name.strip().lower()
77
    rules = BMDATA[name_mode]['language_rules']
78
    all_langs = sum(_LANG_DICT[_] for _ in BMDATA[name_mode]['languages'])-1
79
    choices_remaining = all_langs
80
    for rule in rules:
81
        letters, languages, accept = rule
82
        if search(letters, name) is not None:
83
            if accept:
84
                choices_remaining &= languages
85
            else:
86
                choices_remaining &= (~languages) % (all_langs+1)
87
    if choices_remaining == L_NONE:
88
        choices_remaining = L_ANY
89
    return choices_remaining
90
91
92
def _bm_redo_language(term, name_mode, rules, final_rules1, final_rules2,
93
                      concat):
94
    """Reassess the language of the terms and call the phonetic encoder.
95
96
    Uses a split multi-word term.
97
98
    :param str term: the term to encode via Beider-Morse
99
    :param str name_mode: the name mode of the algorithm: 'gen' (default),
100
        'ash' (Ashkenazi), or 'sep' (Sephardic)
101
    :param tuple rules: the set of initial phonetic transform regexps
102
    :param tuple final_rules1: the common set of final phonetic transform
103
        regexps
104
    :param tuple final_rules2: the specific set of final phonetic transform
105
        regexps
106
    :param bool concat: a flag to indicate concatenation
107
    """
108
    language_arg = _bm_language(term, name_mode)
109
    return _bm_phonetic(term, name_mode, rules, final_rules1, final_rules2,
110
                        language_arg, concat)
111
112
113
def _bm_phonetic(term, name_mode, rules, final_rules1, final_rules2,
114
                 language_arg=0, concat=False):
115
    """Return the Beider-Morse encoding(s) of a term.
116
117
    :param str term: the term to encode via Beider-Morse
118
    :param str name_mode: the name mode of the algorithm: 'gen' (default),
119
        ash' (Ashkenazi), or 'sep' (Sephardic)
120
    :param tuple rules: the set of initial phonetic transform regexps
121
    :param tuple final_rules1: the common set of final phonetic transform
122
        regexps
123
    :param tuple final_rules2: the specific set of final phonetic transform
124
        regexps
125
    :param int language_arg: an integer representing the target language of the
126
        phonetic encoding
127
    :param bool concat: a flag to indicate concatenation
128
    """
129
    term = term.replace('-', ' ').strip()
130
131
    if name_mode == 'gen':  # generic case
132
        # discard and concatenate certain words if at the start of the name
133
        for pfx in BMDATA['gen']['discards']:
134
            if term.startswith(pfx):
135
                remainder = term[len(pfx):]
136
                combined = pfx[:-1]+remainder
137
                result = (_bm_redo_language(remainder, name_mode, rules,
138
                                            final_rules1, final_rules2,
139
                                            concat) +
140
                          '-' +
141
                          _bm_redo_language(combined, name_mode, rules,
142
                                            final_rules1, final_rules2,
143
                                            concat))
144
                return result
145
146
    words = term.split()  # create array of the individual words in the name
147
    words2 = []
148
149
    if name_mode == 'sep':  # Sephardic case
150
        # for each word in the name, delete portions of word preceding
151
        # apostrophe
152
        # ex: d'avila d'aguilar --> avila aguilar
153
        # also discard certain words in the name
154
155
        # note that we can never get a match on "de la" because we are checking
156
        # single words below
157
        # this is a bug, but I won't try to fix it now
158
159
        for word in words:
160
            word = word[word.rfind('\'')+1:]
161
            if word not in BMDATA['sep']['discards']:
162
                words2.append(word)
163
164
    elif name_mode == 'ash':  # Ashkenazic case
165
        # discard certain words if at the start of the name
166
        if len(words) > 1 and words[0] in BMDATA['ash']['discards']:
167
            words2 = words[1:]
168
        else:
169
            words2 = list(words)
170
    else:
171
        words2 = list(words)
172
173
    if concat:
174
        # concatenate the separate words of a multi-word name
175
        # (normally used for exact matches)
176
        term = ' '.join(words2)
177
    elif len(words2) == 1:  # not a multi-word name
178
        term = words2[0]
179
    else:
180
        # encode each word in a multi-word name separately
181
        # (normally used for approx matches)
182
        result = '-'.join([_bm_redo_language(w, name_mode, rules,
183
                                             final_rules1, final_rules2,
184
                                             concat)
185
                           for w in words2])
186
        return result
187
188
    term_length = len(term)
189
190
    # apply language rules to map to phonetic alphabet
191
    phonetic = ''
192
    skip = 0
193
    for i in range(term_length):
194
        if skip:
195
            skip -= 1
196
            continue
197
        found = False
198
        for rule in rules:
199
            pattern = rule[_PATTERN_POS]
200
            pattern_length = len(pattern)
201
            lcontext = rule[_LCONTEXT_POS]
202
            rcontext = rule[_RCONTEXT_POS]
203
204
            # check to see if next sequence in input matches the string in the
205
            # rule
206
            if (((pattern_length > term_length - i) or
207
                 (term[i:i+pattern_length] != pattern))):  # no match
208
                continue
209
210
            right = '^'+rcontext
211
            left = lcontext+'$'
212
213
            # check that right context is satisfied
214
            if rcontext != '':
215
                if not search(right, term[i+pattern_length:]):
216
                    continue
217
218
            # check that left context is satisfied
219
            if lcontext != '':
220
                if not search(left, term[:i]):
221
                    continue
222
223
            # check for incompatible attributes
224
            candidate = _bm_apply_rule_if_compat(phonetic, rule[_PHONETIC_POS],
225
                                                 language_arg)
226
            # The below condition shouldn't ever be false
227
            if candidate is not None:  # pragma: no branch
228
                phonetic = candidate
229
                found = True
230
                break
231
232
        if not found:  # character in name that is not in table -- e.g., space
233
            pattern_length = 1
234
        skip = pattern_length-1
0 ignored issues
show
introduced by
The variable pattern_length does not seem to be defined for all execution paths.
Loading history...
235
236
    # apply final rules on phonetic-alphabet,
237
    # doing a substitution of certain characters
238
    phonetic = _bm_apply_final_rules(phonetic, final_rules1, language_arg,
239
                                     False)  # apply common rules
240
    # final_rules1 are the common approx rules,
241
    # final_rules2 are approx rules for specific language
242
    phonetic = _bm_apply_final_rules(phonetic, final_rules2, language_arg,
243
                                     True)  # apply lang specific rules
244
245
    return phonetic
246
247
248
def _bm_apply_final_rules(phonetic, final_rules, language_arg, strip):
249
    """Apply a set of final rules to the phonetic encoding.
250
251
    :param str phonetic: the term to which to apply the final rules
252
    :param tuple final_rules: the set of final phonetic transform regexps
253
    :param int language_arg: an integer representing the target language of the
254
        phonetic encoding
255
    :param bool strip: flag to indicate whether to normalize the language
256
        attributes
257
    """
258
    # optimization to save time
259
    if not final_rules:
260
        return phonetic
261
262
    # expand the result
263
    phonetic = _bm_expand_alternates(phonetic)
264
    phonetic_array = phonetic.split('|')
265
266
    for k in range(len(phonetic_array)):
267
        phonetic = phonetic_array[k]
268
        phonetic2 = ''
269
        phoneticx = _bm_normalize_lang_attrs(phonetic, True)
270
271
        i = 0
272
        while i < len(phonetic):
273
            found = False
274
275
            if phonetic[i] == '[':  # skip over language attribute
276
                attrib_start = i
277
                i += 1
278
                while True:
279
                    if phonetic[i] == ']':
280
                        i += 1
281
                        phonetic2 += phonetic[attrib_start:i]
282
                        break
283
                    i += 1
284
                continue
285
286
            for rule in final_rules:
287
                pattern = rule[_PATTERN_POS]
288
                pattern_length = len(pattern)
289
                lcontext = rule[_LCONTEXT_POS]
290
                rcontext = rule[_RCONTEXT_POS]
291
292
                right = '^'+rcontext
293
                left = lcontext+'$'
294
295
                # check to see if next sequence in phonetic matches the string
296
                # in the rule
297
                if (((pattern_length > len(phoneticx) - i) or
298
                     phoneticx[i:i+pattern_length] != pattern)):
299
                    continue
300
301
                # check that right context is satisfied
302
                if rcontext != '':
303
                    if not search(right, phoneticx[i + pattern_length:]):
304
                        continue
305
306
                # check that left context is satisfied
307
                if lcontext != '':
308
                    if not search(left, phoneticx[:i]):
309
                        continue
310
311
                # check for incompatible attributes
312
                candidate = _bm_apply_rule_if_compat(phonetic2,
313
                                                     rule[_PHONETIC_POS],
314
                                                     language_arg)
315
                # The below condition shouldn't ever be false
316
                if candidate is not None:  # pragma: no branch
317
                    phonetic2 = candidate
318
                    found = True
319
                    break
320
321
            if not found:
322
                # character in name for which there is no substitution in the
323
                # table
324
                phonetic2 += phonetic[i]
325
                pattern_length = 1
326
327
            i += pattern_length
0 ignored issues
show
introduced by
The variable pattern_length does not seem to be defined for all execution paths.
Loading history...
328
329
        phonetic_array[k] = _bm_expand_alternates(phonetic2)
330
331
    phonetic = '|'.join(phonetic_array)
332
    if strip:
333
        phonetic = _bm_normalize_lang_attrs(phonetic, True)
334
335
    if '|' in phonetic:
336
        phonetic = '(' + _bm_remove_dupes(phonetic) + ')'
337
338
    return phonetic
339
340
341
def _bm_phonetic_number(phonetic):
342
    """Remove bracketed text from the end of a string.
343
344
    :param str phonetic: a Beider-Morse phonetic encoding
345
    """
346
    if '[' in phonetic:
347
        return phonetic[:phonetic.find('[')]
348
349
    return phonetic  # experimental !!!!
350
351
352
def _bm_expand_alternates(phonetic):
353
    """Expand phonetic alternates separated by |s.
354
355
    :param str phonetic: a Beider-Morse phonetic encoding
356
    """
357
    alt_start = phonetic.find('(')
358
    if alt_start == -1:
359
        return _bm_normalize_lang_attrs(phonetic, False)
360
361
    prefix = phonetic[:alt_start]
362
    alt_start += 1  # get past the (
363
    alt_end = phonetic.find(')', alt_start)
364
    alt_string = phonetic[alt_start:alt_end]
365
    alt_end += 1  # get past the )
366
    suffix = phonetic[alt_end:]
367
    alt_array = alt_string.split('|')
368
    result = ''
369
370
    for i in range(len(alt_array)):
371
        alt = alt_array[i]
372
        alternate = _bm_expand_alternates(prefix+alt+suffix)
373
        if alternate != '' and alternate != '[0]':
374
            if result != '':
375
                result += '|'
376
            result += alternate
377
378
    return result
379
380
381
def _bm_pnums_with_leading_space(phonetic):
382
    """Join prefixes & suffixes in cases of alternate phonetic values.
383
384
    :param str phonetic: a Beider-Morse phonetic encoding
385
    """
386
    alt_start = phonetic.find('(')
387
    if alt_start == -1:
388
        return ' ' + _bm_phonetic_number(phonetic)
389
390
    prefix = phonetic[:alt_start]
391
    alt_start += 1  # get past the (
392
    alt_end = phonetic.find(')', alt_start)
393
    alt_string = phonetic[alt_start:alt_end]
394
    alt_end += 1  # get past the )
395
    suffix = phonetic[alt_end:]
396
    alt_array = alt_string.split('|')
397
    result = ''
398
    for alt in alt_array:
399
        result += _bm_pnums_with_leading_space(prefix+alt+suffix)
400
401
    return result
402
403
404
def _bm_phonetic_numbers(phonetic):
405
    """Prepare & join phonetic numbers.
406
407
    Split phonetic value on '-', run through _bm_pnums_with_leading_space,
408
    and join with ' '
409
410
    :param str phonetic: a Beider-Morse phonetic encoding
411
    """
412
    phonetic_array = phonetic.split('-')  # for names with spaces in them
413
    result = ' '.join([_bm_pnums_with_leading_space(i)[1:] for i in
414
                       phonetic_array])
415
    return result
416
417
418
def _bm_remove_dupes(phonetic):
419
    """Remove duplicates from a phonetic encoding list.
420
421
    :param str phonetic: a Beider-Morse phonetic encoding
422
    """
423
    alt_string = phonetic
424
    alt_array = alt_string.split('|')
425
426
    result = '|'
427
    for i in range(len(alt_array)):
428
        alt = alt_array[i]
429
        if alt and '|'+alt+'|' not in result:
430
            result += alt+'|'
431
432
    return result[1:-1]  # remove leading and trailing |
433
434
435
def _bm_normalize_lang_attrs(text, strip):
436
    """Remove embedded bracketed attributes.
437
438
    This (potentially) bitwise-ands bracketed attributes together and adds to
439
    the end.
440
    This is applied to a single alternative at a time -- not to a
441
    parenthisized list.
442
    It removes all embedded bracketed attributes, logically-ands them together,
443
    and places them at the end.
444
    However if strip is true, this can indeed remove embedded bracketed
445
    attributes from a parenthesized list.
446
447
    :param str text: a Beider-Morse phonetic encoding (in progress)
448
    :param bool strip: remove the bracketed attributes (and throw away)
449
    """
450
    uninitialized = -1  # all 1's
451
    attrib = uninitialized
452
    while '[' in text:
453
        bracket_start = text.find('[')
454
        bracket_end = text.find(']', bracket_start)
455
        if bracket_end == -1:
456
            raise ValueError('No closing square bracket: text=(' +
457
                             text + ') strip=(' + text_type(strip) + ')')
458
        attrib &= int(text[bracket_start+1:bracket_end])
459
        text = text[:bracket_start] + text[bracket_end+1:]
460
461
    if attrib == uninitialized or strip:
462
        return text
463
    elif attrib == 0:
464
        # means that the attributes were incompatible and there is no
465
        # alternative here
466
        return '[0]'
467
    return text + '[' + str(attrib) + ']'
468
469
470
def _bm_apply_rule_if_compat(phonetic, target, language_arg):
471
    """Apply a phonetic regex if compatible.
472
473
    tests for compatible language rules
474
475
    to do so, apply the rule, expand the results, and detect alternatives with
476
        incompatible attributes
477
478
    then drop each alternative that has incompatible attributes and keep those
479
        that are compatible
480
481
    if there are no compatible alternatives left, return false
482
483
    otherwise return the compatible alternatives
484
485
    apply the rule
486
487
    :param str phonetic: the Beider-Morse phonetic encoding (so far)
488
    :param str target: a proposed addition to the phonetic encoding
489
    :param int language_arg: an integer representing the target language of
490
        the phonetic encoding
491
    """
492
    candidate = phonetic + target
493
    if '[' not in candidate:  # no attributes so we need test no further
494
        return candidate
495
496
    # expand the result, converting incompatible attributes to [0]
497
    candidate = _bm_expand_alternates(candidate)
498
    candidate_array = candidate.split('|')
499
500
    # drop each alternative that has incompatible attributes
501
    candidate = ''
502
    found = False
503
504
    for i in range(len(candidate_array)):
505
        this_candidate = candidate_array[i]
506
        if language_arg != 1:
507
            this_candidate = _bm_normalize_lang_attrs(this_candidate + '[' +
508
                                                      str(language_arg) + ']',
509
                                                      False)
510
        if this_candidate != '[0]':
511
            found = True
512
            if candidate:
513
                candidate += '|'
514
            candidate += this_candidate
515
516
    # return false if no compatible alternatives remain
517
    if not found:
518
        return None
519
520
    # return the result of applying the rule
521
    if '|' in candidate:
522
        candidate = '('+candidate+')'
523
    return candidate
524
525
526
def _bm_language_index_from_code(code, name_mode):
527
    """Return the index value for a language code.
528
529
    This returns l_any if more than one code is specified or the code is out
530
    of bounds.
531
532
    :param int code: the language code to interpret
533
    :param str name_mode: the name mode of the algorithm: 'gen' (default),
534
                'ash' (Ashkenazi), or 'sep' (Sephardic)
535
    """
536
    if (code < 1 or
537
            code > sum(_LANG_DICT[_] for _ in
538
                       BMDATA[name_mode]['languages'])):  # code out of range
539
        return L_ANY
540
    if (code & (code - 1)) != 0:  # choice was more than one language; use any
541
        return L_ANY
542
    return code
543
544
545
def bmpm(word, language_arg=0, name_mode='gen', match_mode='approx',
546
         concat=False, filter_langs=False):
547
    """Return the Beider-Morse Phonetic Matching encoding(s) of a term.
548
549
    The Beider-Morse Phonetic Matching algorithm is described in
550
    :cite:`Beider:2008`.
551
    The reference implementation is licensed under GPLv3.
552
553
    :param str word: the word to transform
554
    :param str language_arg: the language of the term; supported values
555
        include:
556
557
            - 'any'
558
            - 'arabic'
559
            - 'cyrillic'
560
            - 'czech'
561
            - 'dutch'
562
            - 'english'
563
            - 'french'
564
            - 'german'
565
            - 'greek'
566
            - 'greeklatin'
567
            - 'hebrew'
568
            - 'hungarian'
569
            - 'italian'
570
            - 'latvian'
571
            - 'polish'
572
            - 'portuguese'
573
            - 'romanian'
574
            - 'russian'
575
            - 'spanish'
576
            - 'turkish'
577
578
    :param str name_mode: the name mode of the algorithm:
579
580
            - 'gen' -- general (default)
581
            - 'ash' -- Ashkenazi
582
            - 'sep' -- Sephardic
583
584
    :param str match_mode: matching mode: 'approx' or 'exact'
585
    :param bool concat: concatenation mode
586
    :param bool filter_langs: filter out incompatible languages
587
    :returns: the BMPM value(s)
588
    :rtype: tuple
589
590
    >>> bmpm('Christopher')
591
    'xrQstopir xrQstYpir xristopir xristYpir xrQstofir xrQstYfir xristofir
592
    xristYfir xristopi xritopir xritopi xristofi xritofir xritofi tzristopir
593
    tzristofir zristopir zristopi zritopir zritopi zristofir zristofi zritofir
594
    zritofi'
595
    >>> bmpm('Niall')
596
    'nial niol'
597
    >>> bmpm('Smith')
598
    'zmit'
599
    >>> bmpm('Schmidt')
600
    'zmit stzmit'
601
602
    >>> bmpm('Christopher', language_arg='German')
603
    'xrQstopir xrQstYpir xristopir xristYpir xrQstofir xrQstYfir xristofir
604
    xristYfir'
605
    >>> bmpm('Christopher', language_arg='English')
606
    'tzristofir tzrQstofir tzristafir tzrQstafir xristofir xrQstofir xristafir
607
    xrQstafir'
608
    >>> bmpm('Christopher', language_arg='German', name_mode='ash')
609
    'xrQstopir xrQstYpir xristopir xristYpir xrQstofir xrQstYfir xristofir
610
    xristYfir'
611
612
    >>> bmpm('Christopher', language_arg='German', match_mode='exact')
613
    'xriStopher xriStofer xristopher xristofer'
614
    """
615
    word = normalize('NFC', text_type(word.strip().lower()))
616
617
    name_mode = name_mode.strip().lower()[:3]
618
    if name_mode not in {'ash', 'sep', 'gen'}:
619
        name_mode = 'gen'
620
621
    if match_mode != 'exact':
622
        match_mode = 'approx'
623
624
    # Translate the supplied language_arg value into an integer representing
625
    # a set of languages
626
    all_langs = sum(_LANG_DICT[_] for _ in BMDATA[name_mode]['languages'])-1
627
    lang_choices = 0
628
    if isinstance(language_arg, (int, float, long)):
0 ignored issues
show
introduced by
The variable long does not seem to be defined in case PY3 on line 42 is False. Are you sure this can never be the case?
Loading history...
629
        lang_choices = int(language_arg)
630
    elif language_arg != '' and isinstance(language_arg, (text_type, str)):
631
        for lang in text_type(language_arg).lower().split(','):
632
            if lang in _LANG_DICT and (_LANG_DICT[lang] & all_langs):
633
                lang_choices += _LANG_DICT[lang]
634
            elif not filter_langs:
635
                raise ValueError('Unknown \'' + name_mode + '\' language: \'' +
636
                                 lang + '\'')
637
638
    # Language choices are either all incompatible with the name mode or
639
    # no choices were given, so try to autodetect
640
    if lang_choices == 0:
641
        language_arg = _bm_language(word, name_mode)
642
    else:
643
        language_arg = lang_choices
644
    language_arg2 = _bm_language_index_from_code(language_arg, name_mode)
645
646
    rules = BMDATA[name_mode]['rules'][language_arg2]
647
    final_rules1 = BMDATA[name_mode][match_mode]['common']
648
    final_rules2 = BMDATA[name_mode][match_mode][language_arg2]
649
650
    result = _bm_phonetic(word, name_mode, rules, final_rules1,
651
                          final_rules2, language_arg, concat)
652
    result = _bm_phonetic_numbers(result)
653
654
    return result
655
656
657
if __name__ == '__main__':
658
    import doctest
659
    doctest.testmod()
660