Completed
Push — master ( 643512...2b6b3e )
by Chris
20:40 queued 10:36
created

abydos.phones._phones.cmp_features()   F

Complexity

Conditions 15

Size

Total Lines 86
Code Lines 27

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 24
CRAP Score 15

Importance

Changes 0
Metric Value
eloc 27
dl 0
loc 86
ccs 24
cts 24
cp 1
rs 2.9998
c 0
b 0
f 0
cc 15
nop 3
crap 15

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like abydos.phones._phones.cmp_features() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2019 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
"""abydos.phones._phones.
20
21
The phones module implements phonetic feature coding, decoding, and comparison
22
functions.
23
"""
24
25 1
from __future__ import (
26
    absolute_import,
27
    division,
28
    print_function,
29
    unicode_literals,
30
)
31
32 1
from unicodedata import normalize
33
34 1
from six import text_type
35 1
from six.moves import range
36
37 1
__all__ = ['cmp_features', 'get_feature', 'ipa_to_features']
38
39
40 1
_PHONETIC_FEATURES = {
41
    't': 2783230754502126250,
42
    't͇': 2783230754502126250,
43
    't̪': 2783230479624219306,
44
    't̠': 2783231579135847082,
45
    'd': 2783230754501864106,
46
    'd͇': 2783230754501864106,
47
    'd̪': 2783230479623957162,
48
    'd̠': 2783231579135584938,
49
    's': 2783230754502125978,
50
    's͇': 2783230754502125978,
51
    's̪': 2783230479624219034,
52
    's̟': 2783230479624219034,
53
    'z': 2783230754501863834,
54
    'z͇': 2783230754501863834,
55
    'z̪': 2783230479623956890,
56
    'z̟': 2783230479623956890,
57
    'θ̱': 2783230754502125994,
58
    'θ͇': 2783230754502125994,
59
    'ɹ̝̊': 2783230754502125994,
60
    'ð̠': 2783230754501863850,
61
    'ð͇': 2783230754501863850,
62
    'ɹ̝': 2783230754501863850,
63
    'ɬ': 2783230754502060454,
64
    'ɮ': 2783230754501863846,
65
    'θ': 2783230479624219050,
66
    's̄': 2783230479624219034,
67
    'ð': 2783230479623956906,
68
    'ð̞': 2693158487076546986,
69
    'ʃ': 2783231556184615322,
70
    'ʃʲ': 2783231556184615322,
71
    'š': 2783231556184615322,
72
    'ɹ̠̊˔': 2783231579135846826,
73
    'ʒ': 2783231556184353178,
74
    'ʒʲ': 2783231556184353178,
75
    'ž': 2783231556184353178,
76
    'ɹ̠˔': 2783231579135584682,
77
    'c': 2783233463150095018,
78
    'ɟ': 2783233463149832874,
79
    'ç': 2783233463150094762,
80
    'ʝ': 2783233463149832618,
81
    'p': 2781720575281375914,
82
    'p̪': 2781702983095331498,
83
    'p͆': 2781702983095331498,
84
    'b': 2781720575281113770,
85
    'b̪': 2781702983095069354,
86
    'b͆': 2781702983095069354,
87
    'f': 2781702983095331242,
88
    'v': 2781702983095069098,
89
    'ɸ': 2781720575281375658,
90
    'ɸ̞': 2691648582733965738,
91
    'β': 2781720575281113514,
92
    'β̞': 2691648582733703594,
93
    'k': 2783233462881659562,
94
    'ɡ': 2783233462881397418,
95
    'g': 2783233462881397418,
96
    'x': 2783233462881659306,
97
    'x̞': 2981391846485961130,
98
    'ɣ': 2783233462881397162,
99
    'ɰ': 2981391846485698986,
100
    'ɣ̞': 2981391846485698986,
101
    'ɣ˕': 2981391846485698986,
102
    'q': 2783233480061528746,
103
    'ɢ': 2783233480061266602,
104
    'χ': 2783233480061528490,
105
    'x̣': 2783233480061528490,
106
    'χ̞': 2693161487514118570,
107
    'ʁ': 2783233480061266346,
108
    'ʁ̝': 2783233480061266346,
109
    'ʁ̞': 2693161487513856426,
110
    'ħ': 2783233503273855402,
111
    'ħ̞': 2693161510726445482,
112
    'ʕ': 2783233503273593258,
113
    'ʕ̝': 2783233503273593258,
114
    'ʕ̞': 2693161510726183338,
115
    'ɑ̯': 2693161510726183338,
116
    'h': 2783233503281129898,
117
    'h̃': 2693161510733719914,
118
    'ɦ': 2783233503280867754,
119
    'ʔ': 2783233503281179306,
120
    'm': 2709662981243185770,
121
    'm̥': 2709662981243447914,
122
    'ɱ': 2709645389057141354,
123
    'm̪': 2709645389057141354,
124
    'n': 2711173160463936106,
125
    'n̪': 2711172885586029162,
126
    'n̥': 2711173160464198250,
127
    'n̊': 2711173160464198250,
128
    'n̪̊': 2711172885586291306,
129
    'ɳ': 2711174259975563882,
130
    'ɳ̊': 2711174259975826026,
131
    'ɳ̥': 2711174259975826026,
132
    'ɲ': 2711175869111904874,
133
    'ñ': 2711175869111904874,
134
    'n̠ʲ': 2711173944966556266,
135
    'ɲ̟': 2711173944966556266,
136
    'ɲ̊': 2711175869112167018,
137
    'ɲ̥': 2711175869112167018,
138
    'ŋ': 2711175868843469418,
139
    'ŋ̊': 2711175868843731562,
140
    'ɴ': 2711175886023338602,
141
    'l': 2693158761954453926,
142
    'l̠': 2693159586588174758,
143
    'l̪': 2693158487076546982,
144
    'ɫ': 2693158721554917798,
145
    'lˠ': 2693158721554917798,
146
    'lˤ': 2693158721554917798,
147
    'l̴': 2693158721554917798,
148
    'ɫ̪': 2693158446677010854,
149
    'l̪ˠ': 2693158446677010854,
150
    'l̪ˤ': 2693158446677010854,
151
    'l̴̪': 2693158446677010854,
152
    'l̥': 2693158761954716070,
153
    'ʎ': 2693161470602422694,
154
    'l̠ʲ': 2693159546457074086,
155
    'ʎ̟': 2693159546457074086,
156
    'ʟ': 2693161470333987238,
157
    'r': 2711173160463936170,
158
    'r̥': 2711173160464198314,
159
    'ɹ': 2693158761954453930,
160
    'ð̠˕': 2693158761954453930,
161
    'ʀ': 2711175886023338666,
162
    'ʀ̥': 2711175886023600810,
163
    'j': 2981391846754134442,
164
    'i̯': 2981391846754134442,
165
    'j̊': 2981391846754396586,
166
    'w': 2978753018579036586,
167
    'u̯': 2978753018579036586,
168
    'ɰʷ': 2978753018579036586,
169
    'ʍ': 2978753018579298730,
170
    'w̥': 2978753018579298730,
171
    'ɰᵝ': 2978682649834858922,
172
    'wᵝ': 2978682649834858922,
173
    'ɥ': 2978753018847472042,
174
    'jʷ': 2978753018847472042,
175
    'ʡ': 2783233503274904234,
176
    'ʜ': 2783233503274903978,
177
    'ʢ': 2783233503274641834,
178
    'ʢ̝': 2783233503274641834,
179
    'ʢ̞': 2693161510727231914,
180
    'ᴙ': 2711175909236714154,
181
    'ʙ': 2709662981243185834,
182
    'ʋ': 2691630990547659178,
183
    'ɕ': 2783231539004746138,
184
    'ʑ': 2783231539004483994,
185
    'ɾ': 2711173160463935914,
186
    'ᴅ': 2711173160463935914,
187
    'ɾ̥': 2711173160464198058,
188
    'ɾ̊': 2711173160464198058,
189
    'ᴅ̥': 2711173160464198058,
190
    'ᴅ̊': 2711173160464198058,
191
    'ɾ̃': 2711173160463935850,
192
    'n̆': 2711173160463935850,
193
    'ⱱ': 2709645389057141162,
194
    'ⱱ̟': 2709662981243185578,
195
    'w̆': 2709662981243185578,
196
    'b̆': 2709662981243185578,
197
    'ɢ̆': 2711175886023338410,
198
    'ʀ̆': 2711175886023338410,
199
    'ʡ̯': 2711175909236713898,
200
    'ɺ': 2711173160463935910,
201
    'ʎ̯': 2711175869111904678,
202
    'ʟ̆': 2711175868843469222,
203
    'ʈ': 2783231854013754026,
204
    'ɖ': 2783231854013491882,
205
    'ʂ': 2783231854013753754,
206
    'ʐ': 2783231854013491610,
207
    'ɻ': 2693159861466081706,
208
    'ɽ': 2711174259975563690,
209
    'ɽ͡r': 2711174259975563946,
210
    'ɭ': 2693159861466081702,
211
    'ɭ̆': 2711174259975563686,
212
    'ɺ˞': 2711174259975563686,
213
    'ɺ̢': 2711174259975563686,
214
    'ꞎ': 2783231854013688230,
215
    'ʎ̝̊': 2783233463150094758,
216
    'ʟ̝̊': 2783233462881659302,
217
    'ʟ̝': 2783233462881397158,
218
    # affricates & co-articulated
219
    't͡ʃ': 2783231556184615833,
220
    'ʧ': 2783231556184615833,
221
    't͜ʃ': 2783231556184615833,
222
    't̠ʲʃ': 2783231556184615833,
223
    'č': 2783231556184615833,
224
    'd͡ʒ': 2783231556184353689,
225
    'ʤ': 2783231556184353689,
226
    'd͜ʒ': 2783231556184353689,
227
    'd̠ʲʒ': 2783231556184353689,
228
    'ǯ': 2783231556184353689,
229
    't͡s': 2783230754502126489,
230
    'ʦ': 2783230754502126489,
231
    't͜s': 2783230754502126489,
232
    't̪͡s̪': 2783230479624219545,
233
    't͡s̪': 2783230479624219545,
234
    't̟͡s̟': 2783230479624219545,
235
    't͡s̟': 2783230479624219545,
236
    'ʦ̪': 2783230479624219545,
237
    'ʦ̟': 2783230479624219545,
238
    't͡θ̠': 2783230754502126505,
239
    't͡θ͇': 2783230754502126505,
240
    't͡θ': 2783230479624219561,
241
    't͜θ': 2783230479624219561,
242
    't̪͡θ': 2783230479624219561,
243
    't̟͡θ': 2783230479624219561,
244
    'd͡z': 2783230754501864345,
245
    'ʣ': 2783230754501864345,
246
    'd͜z': 2783230754501864345,
247
    'd̪͡z̪': 2783230479623957401,
248
    'd͡z̪': 2783230479623957401,
249
    'd̟͡z̟': 2783230479623957401,
250
    'd͡z̟': 2783230479623957401,
251
    'ʣ̪': 2783230479623957401,
252
    'ʣ̟': 2783230479623957401,
253
    'd͡ð̠': 2783230754501864361,
254
    'd͡ð̳': 2783230754501864361,
255
    'd͡ð': 2783230479623957417,
256
    'd͜ð': 2783230479623957417,
257
    'd̪͡ð': 2783230479623957417,
258
    'd̟͡ð': 2783230479623957417,
259
    'k͡x': 2783233462881659817,
260
    'ɡ͡ɣ': 2783233462881397673,
261
    'g͡ɣ': 2783233462881397673,
262
    'p͡f': 2781738167467420585,
263
    'p̪͡f': 2781702983095331753,
264
    'b͡v': 2781738167467158441,
265
    'b̪͡v': 2781702983095069609,
266
    'b̪͜v': 2781702983095069609,
267
    't͡ɕ': 2783231539004746649,
268
    't͜ɕ': 2783231539004746649,
269
    'ʨ': 2783231539004746649,
270
    'd͡ʑ': 2783231539004484505,
271
    'd͜ʑ': 2783231539004484505,
272
    'ʥ': 2783231539004484505,
273
    'ʈ͡ʂ': 2783231854013754265,
274
    't͡ʂ': 2783231854013754265,
275
    'ɖ͡ʐ': 2783231854013492121,
276
    'd͡ʐ': 2783231854013492121,
277
    't͡ɬ': 2783230754502060965,
278
    't͜ɬ': 2783230754502060965,
279
    'ƛ': 2783230754502060965,
280
    'd͡ɮ': 2783230754501864357,
281
    'c͡ç': 2783233463150095273,
282
    'c͜ç': 2783233463150095273,
283
    'ɟ͡ʝ': 2783233463149833129,
284
    'c͡ʎ̝̥': 2783233463150095269,
285
    'k͡ʟ̝̊': 2783233462881659813,
286
    'ɡ͡ʟ̝': 2783233462881397669,
287
    'q͡χ': 2783233480061529001,
288
    'ɢ͡ʁ': 2783233480061266857,
289
    'ɢ͜ʁ': 2783233480061266857,
290
    'ɧ': 2783231813614217626,
291
    'k͡p': 2781720534881839786,
292
    'k͜p': 2781720534881839786,
293
    'ɡ͡b': 2781720534881577642,
294
    'kʷ': 2780594634974997162,
295
    'k͡w': 2780594634974997162,
296
    'k͜w': 2780594634974997162,
297
    'kʷh': 2780594634974931626,
298
    'gʷ': 2780594634974735018,
299
    'g͡w': 2780594634974735018,
300
    'gʷh': 2780594634974669482,
301
    # implosives
302
    'ɓ̥': 2781720575281355434,
303
    'ƥ': 2781720575281355434,
304
    'pʼ↓': 2781720575281355434,
305
    'ɓ': 2781720575281093290,
306
    'ɗ̥': 2783230754502105770,
307
    'ƭ': 2783230754502105770,
308
    'tʼ↓': 2783230754502105770,
309
    'ɗ': 2783230754501843626,
310
    'ᶑ': 2783231854013471402,
311
    'ʄ̊': 2783233463150074538,
312
    'ƈ': 2783233463150074538,
313
    'cʼ↓': 2783233463150074538,
314
    'ʄ': 2783233463149812394,
315
    'ɠ': 2783233462881376938,
316
    'ʛ': 2783233480061246122,
317
    # clicks
318
    'ʘ': 2781720575281374890,
319
    'ʘʰ': 2781720575281309354,
320
    'ʘ̬': 2781720575281112746,
321
    'ᶢʘ': 2781720575281112746,
322
    'ʘ̃': 2781720575281112682,
323
    'ᵑʘ': 2781720575281112682,
324
    'ᵐʘ': 2781720575281112682,
325
    'ʘ̥̃ʰ': 2781720575281309290,
326
    'ᵑ̊ʘʰ': 2781720575281309290,
327
    'ʘ̃ˀ': 2781720575281358442,
328
    'ᵑʘˀ': 2781720575281358442,
329
    'ʘ̃͜ʔ': 2781720575281358442,
330
    'ᵑ̊ʘˀ': 2781720575281358442,
331
    'ǀ': 2783230479624218282,
332
    'ʇ': 2783230479624218282,
333
    'ǀ̬': 2783230479623956138,
334
    'ʇ̬': 2783230479623956138,
335
    'ᶢǀ': 2783230479623956138,
336
    'ᶢʇ': 2783230479623956138,
337
    'ǀ̃': 2783230479623956074,
338
    'ʇ̃': 2783230479623956074,
339
    'ᵑǀ': 2783230479623956074,
340
    'ⁿǀ': 2783230479623956074,
341
    'ᵑʇ': 2783230479623956074,
342
    'ǀ̥̃ʰ': 2783230479624152682,
343
    'ʇ̥̃ʰ': 2783230479624152682,
344
    'ᵑ̊ǀʰ': 2783230479624152682,
345
    'ᵑ̊ʇʰ': 2783230479624152682,
346
    'ǀ̃ˀ': 2783230479624201834,
347
    'ʇ̃ˀ': 2783230479624201834,
348
    'ᵑǀˀ': 2783230479624201834,
349
    'ᵑʇˀ': 2783230479624201834,
350
    'ᵑ̊ǀˀ': 2783230479624201834,
351
    'ᵑ̊ʇˀ': 2783230479624201834,
352
    'ǃ': 2783230754502125226,
353
    'ʗ': 2783230754502125226,
354
    'ǃʰ': 2783230754502059690,
355
    'ʗʰ': 2783230754502059690,
356
    'ǃ̬': 2783230754501863082,
357
    'ʗ̬': 2783230754501863082,
358
    'ᶢǃ': 2783230754501863082,
359
    'ᶢʗ': 2783230754501863082,
360
    'ǃ̃': 2783230754501863018,
361
    'ᵑǃ': 2783230754501863018,
362
    'ʗ̃': 2783230754501863018,
363
    'ᵑʗ': 2783230754501863018,
364
    'ǃ̥̃ʰ': 2783230754502059626,
365
    'ʗ̃̊ʰ': 2783230754502059626,
366
    'ᵑ̊ǃʰ': 2783230754502059626,
367
    'ᵑ̊ʗʰ': 2783230754502059626,
368
    'ǃ̃ˀ': 2783230754502108778,
369
    'ʗ̃ˀ': 2783230754502108778,
370
    'ᵑǃˀ': 2783230754502108778,
371
    'ᵑʗˀ': 2783230754502108778,
372
    'ǂ': 2783233463150093994,
373
    'ʄ̵': 2783233463150093994,
374
    '⨎': 2783233463150093994,
375
    'ǂʰ': 2783233463150028458,
376
    'ǂ̬': 2783233463149831850,
377
    'ᶢǂ': 2783233463149831850,
378
    'ǂ̃': 2783233463149831786,
379
    'ᵑǂ': 2783233463149831786,
380
    'ǂ̥̃ʰ': 2783233463150028394,
381
    'ᵑ̊ǂʰ': 2783233463150028394,
382
    'ǂ̃ˀ': 2783233463150077546,
383
    'ᵑǂˀ': 2783233463150077546,
384
    'ᵑǂ͡ʔ': 2783233463150077546,
385
    'ᵑ̊ǂˀ': 2783233463150077546,
386
    'ǃ͡s': 2783233463150093978,
387
    'ǂᶴ': 2783233463150093978,
388
    'ǁ': 2783230754502125222,
389
    'ʖ': 2783230754502125222,
390
    'ǁʰ': 2783230754502059686,
391
    'ʖʰ': 2783230754502059686,
392
    'ǁ̬': 2783230754501863078,
393
    'ʖ̬': 2783230754501863078,
394
    'ᶢǁ': 2783230754501863078,
395
    'ᶢʖ': 2783230754501863078,
396
    'ǁ̃': 2783230754501863014,
397
    'ʖ̃': 2783230754501863014,
398
    'ᵑǁ': 2783230754501863014,
399
    'ᵑʖ': 2783230754501863014,
400
    'ǁ̥̃ʰ': 2783230754502059622,
401
    'ʖ̥̃ʰ': 2783230754502059622,
402
    'ᵑ̊ǁʰ': 2783230754502059622,
403
    'ᵑ̊ʖʰ': 2783230754502059622,
404
    'ǁ̃ˀ': 2783230754502108774,
405
    'ʖ̃ˀ': 2783230754502108774,
406
    'ᵑǁˀ': 2783230754502108774,
407
    'ᵑʖˀ': 2783230754502108774,
408
    'ᵑǁ͡ʔ': 2783230754502108774,
409
    'ʖ̃͜ʔ': 2783230754502108774,
410
    'ǃ˞': 2783231854013753002,
411
    'ǃǁ': 2783231854013753002,
412
    '‼': 2783231854013753002,
413
    '!!': 2783231854013753002,
414
    'ǃ˞ʰ': 2783231854013687466,
415
    '‼ʰ': 2783231854013687466,
416
    '!!ʰ': 2783231854013687466,
417
    'ǃ̬˞': 2783231854013490858,
418
    'ᶢǃ˞': 2783231854013490858,
419
    '‼̬': 2783231854013490858,
420
    '!!̬': 2783231854013490858,
421
    'ᶢ‼': 2783231854013490858,
422
    'ᶢ!!': 2783231854013490858,
423
    'ǃ̃˞': 2783231854013490794,
424
    'ᵑǃ˞': 2783231854013490794,
425
    '‼̃': 2783231854013490794,
426
    '!!̃': 2783231854013490794,
427
    'ᵑ‼': 2783231854013490794,
428
    'ᵑ!!': 2783231854013490794,
429
    'ǃ̥̃˞ʰ': 2783231854013687402,
430
    'ᵑ̊ǃ˞ʰ': 2783231854013687402,
431
    '‼̥̃ʰ': 2783231854013687402,
432
    '!!̥̃ʰ': 2783231854013687402,
433
    'ᵑ̊‼ʰ': 2783231854013687402,
434
    'ᵑ̊!!ʰ': 2783231854013687402,
435
    'ǃ̃˞ˀ': 2783231854013736554,
436
    'ᵑǃ˞͜ʔ': 2783231854013736554,
437
    'ᵑ̊‼ˀ': 2783231854013736554,
438
    'ᵑ‼ˀ': 2783231854013736554,
439
    '‼̃ˀ': 2783231854013736554,
440
    'ᵑ̊!!ˀ': 2783231854013736554,
441
    'ᵑ!!ˀ': 2783231854013736554,
442
    '!!̃ˀ': 2783231854013736554,
443
    # vowels
444
    'i': 1826957412996131242,
445
    'ɪ': 1826957413067434410,
446
    'ɩ': 1826957413067434410,
447
    'u': 1825831513894594986,
448
    'ʊ': 1825831513965898154,
449
    'ɷ': 1825831513965898154,
450
    'ᴜ': 1825831513965898154,
451
    'ɯ̽': 1826957413872740778,
452
    'ʊ̜': 2115187790024452522,
453
    'e': 1826957430176000426,
454
    'ɛ': 1826957430247303594,
455
    'o': 1825831531074464170,
456
    'ɤ': 1826957430981306794,
457
    'ɔ': 1825831531145767338,
458
    'ʌ': 1826957431052609962,
459
    'a': 1826957425952336298,
460
    'a̟': 1826957425952336298,
461
    'æ̞': 1826957425952336298,
462
    'ɶ': 1825831526045493674,
463
    'æ': 1826957425885227434,
464
    'y': 1825831513089288618,
465
    'ʏ': 1825831513160591786,
466
    'ø': 1825831530269157802,
467
    'œ': 1825831530340460970,
468
    'ə': 1828083331160779178,
469
    'ɵ̞': 1825831531347093930,
470
    'ə̹': 1825831531347093930,
471
    'ɞ̝': 1825831531347093930,
472
    'ɘ̞': 1826957431253936554,
473
    'ɯ': 1826957413805631914,
474
    'ɒ': 1825831526850800042,
475
    'ɑ': 1826957426757642666,
476
    'ɨ': 1826957414069873066,
477
    'ï': 1826957414069873066,
478
    'ʉ': 1825831514163030442,
479
    'ü': 1825831514163030442,
480
    'ɘ': 1826957431249742250,
481
    'ë': 1826957431249742250,
482
    'ɵ': 1825831531342899626,
483
    'ö': 1825831531342899626,
484
    'ɜ': 1826957431316851114,
485
    'ɛ̈': 1826957431316851114,
486
    'ɞ': 1825831531410008490,
487
    'ɔ̈': 1825831531410008490,
488
    'ä': 1826957427026078122,
489
    'a̠': 1826957427026078122,
490
    'ɑ̈': 1826957427026078122,
491
    'ɐ̞': 1826957427026078122,
492
    'ɐ': 1828083326865811882,
493
    'ɜ̞': 1826957426958969258,
494
    'ɞ̞': 1825831527052126634,
495
    'ɪ̟': 1826957413063240106,
496
    'ʏ̟': 1825831513156397482,
497
    'ʏ̫': 1825550038183881130,
498
    'ʏʷ': 1825550038183881130,
499
    'ɪʷ': 1825550038183881130,
500
    'y̫': 1825550038112577962,
501
    'yʷ': 1825550038112577962,
502
    'iʷ': 1825550038112577962,
503
    'u͍': 1825761145150417322,
504
    'ɯᵝ': 1825761145150417322,
505
    'ɯ͡β̞': 1825761145150417322,
506
    'ʉ͍': 1825761145418852778,
507
    'ɨᵝ': 1825761145418852778,
508
    'ɨ͡β̞': 1825761145418852778,
509
    'ɪ̈': 1826957414136981930,
510
    'ɨ̞': 1826957414136981930,
511
    'ɘ̝': 1826957414136981930,
512
    'ʊ̈': 1825831514230139306,
513
    'ʉ̞': 1825831514230139306,
514
    'ø̫': 1825550055292447146,
515
    'øʷ': 1825550055292447146,
516
    'eʷ': 1825550055292447146,
517
    'e̞': 1826957430180194730,
518
    'ɛ̝': 1826957430180194730,
519
    'ø̞': 1825831530273352106,
520
    'œ̝': 1825831530273352106,
521
    'o̞': 1825831531078658474,
522
    'ɔ̝': 1825831531078658474,
523
    'ɤ̞': 1826957430985501098,
524
    'ʌ̝': 1826957430985501098,
525
    # feature diacritics
526
    'ʰ': 65536,
527
    'ʱ': 327680,
528
    '̤': 327680,
529
    'ʼ': 16384,
530
    '̚': 131072,
531
    '̬': 262144,
532
    '̌': 262144,
533
    '̥': 524288,
534
    '̊': 524288,
535
    'ʷ': 6737807255011328,
536
    'ʲ': 103012106240,
537
    '̰': 278528,
538
    '̩': 1152921504606846976,
539
    '̯': 2305843009213693952,
540
    '̃': 64,
541
    '̨': 64,
542
    '͊': 128,
543
    'ˠ': 98180268032,
544
    '̴': 98180268032,
545
    'ˤ': 32505856,
546
    '̘': 23068672,
547
    '̙': 26214400,
548
    '˭': 688256,
549
}
550
551 1
_FEATURE_MASK = {
552
    'syllabic': 3458764513820540928,
553
    'consonantal': 864691128455135232,
554
    'sonorant': 216172782113783808,
555
    'approximant': 54043195528445952,
556
    'labial': 13510798882111488,
557
    'round': 3377699720527872,
558
    'protruded': 844424930131968,
559
    'compressed': 211106232532992,
560
    'labiodental': 52776558133248,
561
    'coronal': 13194139533312,
562
    'anterior': 3298534883328,
563
    'distributed': 824633720832,
564
    'dorsal': 206158430208,
565
    'high': 51539607552,
566
    'low': 12884901888,
567
    'front': 3221225472,
568
    'back': 805306368,
569
    'tense': 201326592,
570
    'pharyngeal': 50331648,
571
    'atr': 12582912,
572
    'rtr': 3145728,
573
    'voice': 786432,
574
    'spread_glottis': 196608,
575
    'constricted_glottis': 49152,
576
    'glottalic_suction': 12288,
577
    'velaric_suction': 3072,
578
    'continuant': 768,
579
    'nasal': 192,
580
    'strident': 48,
581
    'lateral': 12,
582
    'delayed_release': 3,
583
}
584
585
586 1
def ipa_to_features(ipa):
587
    """Convert IPA to features.
588
589
    This translates an IPA string of one or more phones to a list of ints
590
    representing the features of the string.
591
592
    Parameters
593
    ----------
594
    ipa : str
595
        The IPA representation of a phone or series of phones
596
597
    Returns
598
    -------
599
    list of ints
600
        A representation of the features of the input string
601
602
    Examples
603
    --------
604
    >>> ipa_to_features('mut')
605
    [2709662981243185770, 1825831513894594986, 2783230754502126250]
606
    >>> ipa_to_features('fon')
607
    [2781702983095331242, 1825831531074464170, 2711173160463936106]
608
    >>> ipa_to_features('telz')
609
    [2783230754502126250, 1826957430176000426, 2693158761954453926,
610
    2783230754501863834]
611
612
    .. versionadded:: 0.1.0
613
614
    """
615 1
    features = []
616 1
    pos = 0
617 1
    ipa = normalize('NFD', text_type(ipa.lower()))
618
619 1
    maxsymlen = max(len(_) for _ in _PHONETIC_FEATURES)
620
621 1
    while pos < len(ipa):
622 1
        found_match = False
623 1
        for i in range(maxsymlen, 0, -1):
624 1
            if (
625
                pos + i - 1 <= len(ipa)
626
                and ipa[pos : pos + i] in _PHONETIC_FEATURES
627
            ):
628 1
                features.append(_PHONETIC_FEATURES[ipa[pos : pos + i]])
629 1
                pos += i
630 1
                found_match = True
631
632 1
        if not found_match:
633 1
            features.append(-1)
634 1
            pos += 1
635
636 1
    return features
637
638
639 1
def ipa_to_feature_dicts(ipa):
640
    """Convert IPA to a feature dict list.
641
642
    This translates an IPA string of one or more phones to a list of dicts
643
    representing the features of the string.
644
645
    Parameters
646
    ----------
647
    ipa : str
648
        The IPA representation of a phone or series of phones
649
650
    Returns
651
    -------
652
    list of dicts
653
        A representation of the features of the input string
654
655
    Examples
656
    --------
657
    >>> ipa_to_feature_dicts('mut')
658
    [{'syllabic': '-',
659
      'consonantal': '+',
660
      'sonorant': '+',
661
      'approximant': '-',
662
      'labial': '+',
663
      'round': '-',
664
      'protruded': '-',
665
      'compressed': '-',
666
      'labiodental': '-',
667
      'coronal': '-',
668
      'anterior': '0',
669
      'distributed': '0',
670
      'dorsal': '-',
671
      'high': '0',
672
      'low': '0',
673
      'front': '0',
674
      'back': '0',
675
      'tense': '0',
676
      'pharyngeal': '-',
677
      'atr': '0',
678
      'rtr': '0',
679
      'voice': '+',
680
      'spread_glottis': '-',
681
      'constricted_glottis': '-',
682
      'glottalic_suction': '-',
683
      'velaric_suction': '-',
684
      'continuant': '-',
685
      'nasal': '+',
686
      'strident': '-',
687
      'lateral': '-',
688
      'delayed_release': '-'},
689
     {'syllabic': '+',
690
      'consonantal': '-',
691
      'sonorant': '+',
692
      'approximant': '+',
693
      'labial': '+',
694
      'round': '+',
695
      'protruded': '-',
696
      'compressed': '-',
697
      'labiodental': '-',
698
      'coronal': '-',
699
      'anterior': '0',
700
      'distributed': '0',
701
      'dorsal': '+',
702
      'high': '+',
703
      'low': '-',
704
      'front': '-',
705
      'back': '+',
706
      'tense': '+',
707
      'pharyngeal': '+',
708
      'atr': '+',
709
      'rtr': '-',
710
      'voice': '+',
711
      'spread_glottis': '-',
712
      'constricted_glottis': '-',
713
      'glottalic_suction': '-',
714
      'velaric_suction': '-',
715
      'continuant': '+',
716
      'nasal': '-',
717
      'strident': '-',
718
      'lateral': '-',
719
      'delayed_release': '-'},
720
     {'syllabic': '-',
721
      'consonantal': '+',
722
      'sonorant': '-',
723
      'approximant': '-',
724
      'labial': '-',
725
      'round': '0',
726
      'protruded': '0',
727
      'compressed': '0',
728
      'labiodental': '0',
729
      'coronal': '+',
730
      'anterior': '+',
731
      'distributed': '-',
732
      'dorsal': '-',
733
      'high': '0',
734
      'low': '0',
735
      'front': '0',
736
      'back': '0',
737
      'tense': '0',
738
      'pharyngeal': '-',
739
      'atr': '0',
740
      'rtr': '0',
741
      'voice': '-',
742
      'spread_glottis': '-',
743
      'constricted_glottis': '-',
744
      'glottalic_suction': '-',
745
      'velaric_suction': '-',
746
      'continuant': '-',
747
      'nasal': '-',
748
      'strident': '-',
749
      'lateral': '-',
750
      'delayed_release': '-'}]
751
752
    .. versionadded:: 0.4.1
753
754
    """
755 1
    features = []
756 1
    pos = 0
757 1
    ipa = normalize('NFD', text_type(ipa.lower()))
758
759 1
    maxsymlen = max(len(_) for _ in _PHONETIC_FEATURES)
760
761 1
    while pos < len(ipa):
762 1
        found_match = False
763 1
        for i in range(maxsymlen, 0, -1):
764 1
            if (
765
                pos + i - 1 <= len(ipa)
766
                and ipa[pos : pos + i] in _PHONETIC_FEATURES
767
            ):
768 1
                feature_int = _PHONETIC_FEATURES[ipa[pos : pos + i]]
769 1
                feature_dict = {}
770 1
                for feature in _FEATURE_MASK.keys():
771
                    # each feature mask contains two bits, one each for - and +
772 1
                    mask = _FEATURE_MASK[feature]
773
                    # the lower bit represents +
774 1
                    pos_mask = mask >> 1
775
776 1
                    masked = feature_int & mask
777 1
                    if masked == 0:
778 1
                        feature_dict[feature] = '0'  # 0
779 1
                    elif masked == mask:
780 1
                        feature_dict[feature] = '+/-'  # +/-
781 1
                    elif masked & pos_mask:
782 1
                        feature_dict[feature] = '+'  # +
783
                    else:
784 1
                        feature_dict[feature] = '-'  # -
785 1
                features.append(feature_dict)
786 1
                pos += i
787 1
                found_match = True
788
789 1
        if not found_match:
790 1
            features.append({})
791 1
            pos += 1
792
793 1
    return features
794
795
796 1
def get_feature(vector, feature):
797
    """Get a feature vector.
798
799
    This returns a list of ints, equal in length to the vector input,
800
        representing presence/absence/neutrality with respect to a particular
801
        phonetic feature.
802
803
    Parameters
804
    ----------
805
    vector : list
806
        A tuple or list of ints representing the phonetic features of a phone
807
        or series of phones (such as is returned by the ipa_to_features
808
        function)
809
    feature : str
810
        A feature name from the set:
811
812
            - ``syllabic``
813
            - ``consonantal``
814
            - ``sonorant``
815
            - ``approximant``
816
            - ``labial``
817
            - ``round``
818
            - ``protruded``
819
            - ``compressed``
820
            - ``labiodental``
821
            - ``coronal``
822
            - ``anterior``
823
            - ``distributed``
824
            - ``dorsal``
825
            - ``high``
826
            - ``low``
827
            - ``front``
828
            - ``back``
829
            - ``tense``
830
            - ``pharyngeal``
831
            - ``atr``
832
            - ``rtr``
833
            - ``voice``
834
            - ``spread_glottis``
835
            - ``constricted_glottis``
836
            - ``glottalic_suction``
837
            - ``velaric_suction``
838
            - ``continuant``
839
            - ``nasal``
840
            - ``strident``
841
            - ``lateral``
842
            - ``delayed_release``
843
844
    Returns
845
    -------
846
    list of ints
847
        A list indicating presence/absence/neutrality with respect to the
848
        feature
849
850
    Raises
851
    ------
852
    AttributeError
853
        feature must be one of ...
854
855
    Examples
856
    --------
857
    >>> tails = ipa_to_features('telz')
858
    >>> get_feature(tails, 'consonantal')
859
    [1, -1, 1, 1]
860
    >>> get_feature(tails, 'sonorant')
861
    [-1, 1, 1, -1]
862
    >>> get_feature(tails, 'nasal')
863
    [-1, -1, -1, -1]
864
    >>> get_feature(tails, 'coronal')
865
    [1, -1, 1, 1]
866
867
    .. versionadded:: 0.1.0
868
869
    """
870
    # :param bool binary: if False, -1, 0, & 1 represent -, 0, & +
871
    #           if True, only binary oppositions are allowed:
872
    #           0 & 1 represent - & + and 0s are mapped to -
873
874 1
    if feature not in _FEATURE_MASK:
875 1
        raise AttributeError(
876
            "feature must be one of: '{}'".format(
877
                "', '".join(_FEATURE_MASK.keys())
878
            )
879
        )
880
881
    # each feature mask contains two bits, one each for - and +
882 1
    mask = _FEATURE_MASK[feature]
883
    # the lower bit represents +
884 1
    pos_mask = mask >> 1
885 1
    retvec = []
886 1
    for char in vector:
887 1
        if char < 0:
888 1
            retvec.append(float('NaN'))
889
        else:
890 1
            masked = char & mask
891 1
            if masked == 0:
892 1
                retvec.append(0)  # 0
893 1
            elif masked == mask:
894 1
                retvec.append(2)  # +/-
895 1
            elif masked & pos_mask:
896 1
                retvec.append(1)  # +
897
            else:
898 1
                retvec.append(-1)  # -
899
900 1
    return retvec
901
902
903 1
def cmp_features(feat1, feat2, weights=None):
904
    """Compare features.
905
906
    This returns a number in the range [0, 1] representing a comparison of two
907
    feature bundles.
908
909
    If one of the bundles is negative, -1 is returned (for unknown values)
910
911
    If the bundles are identical, 1 is returned.
912
913
    If they are inverses of one another, 0 is returned.
914
915
    Otherwise, a float representing their similarity is returned.
916
917
    Parameters
918
    ----------
919
    feat1 : int
920
        A feature bundle
921
    feat2 : int
922
        A feature bundle
923
    weights : None or list or tuple or dict
924
        If None, all features are of equal significance and a simple normalized
925
        hamming distance of the features is calculated. If a list or tuple
926
        of numeric values is supplied, the values are inferred as the weights
927
        for each feature, in order of the features listed in _FEATURE_MASK.
928
        If a dict is supplied, its key values should match keys in
929
        _FEATURE_MASK to which each weight (value) should be assigned. Missing
930
        values in all cases are assigned a weight of 0 and will be omitted from
931
        the comparison.
932
933
    Returns
934
    -------
935
    float
936
        A comparison of the feature bundles
937
938
    Examples
939
    --------
940
    >>> cmp_features(ipa_to_features('l')[0], ipa_to_features('l')[0])
941
    1.0
942
    >>> cmp_features(ipa_to_features('l')[0], ipa_to_features('n')[0])
943
    0.8709677419354839
944
    >>> cmp_features(ipa_to_features('l')[0], ipa_to_features('z')[0])
945
    0.8709677419354839
946
    >>> cmp_features(ipa_to_features('l')[0], ipa_to_features('i')[0])
947
    0.564516129032258
948
949
    .. versionadded:: 0.1.0
950
    .. versionchanged:: 0.4.1
951
        Added weights parameter for modifiable feature weighting
952
953
    """
954 1
    if feat1 < 0 or feat2 < 0:
955 1
        return 0.0
956 1
    if feat1 == feat2:
957 1
        return 1.0
958
959
    # This should be handled some other way since this will take a long time
960
    # when done repeatedly. Maybe convert to a class & save the weights list.
961 1
    if weights is not None:
962 1
        if isinstance(weights, dict):
963 1
            weights = [
964
                weights[feature] if feature in weights else 0
965
                for feature in sorted(
966
                    _FEATURE_MASK, key=_FEATURE_MASK.get, reverse=True
967
                )
968
            ]
969 1
        elif isinstance(weights, (list, tuple)):
970 1
            weights = list(weights) + [0] * (len(_FEATURE_MASK) - len(weights))
971
        else:
972 1
            raise TypeError('weights must be a dist, list, or tuple.')
973
974 1
    magnitude = sum(weights) if weights else len(_FEATURE_MASK)
975
976 1
    featxor = feat1 ^ feat2
977 1
    diffbits = 0
978 1
    i = 0
979 1
    while featxor:
980 1
        if featxor & 0b1:
981 1
            diffbits += weights[i] if weights else 1
982 1
        featxor >>= 1
983 1
        if featxor & 0b1:
984 1
            diffbits += weights[i] if weights else 1
985 1
        featxor >>= 1
986 1
        i += 1
987 1
    return 1 - (0 if not diffbits else (diffbits / (2 * magnitude)))
988
    """
989
    diff_feats = 0
990
    i = 0
991
    while feat1 or feat2:
992
        f1 = feat1 & 0b11
993
        f2 = feat2 & 0b11
994
        if (not (0b11 in {f1, f2} and (f1 in {0b01, 0b10} or
995
            f2 in {0b01, 0b10}))) and (f1 != f2):
996
            diff_feats += weights[i] if weights else 1
997
998
        feat1 >>= 2
999
        feat2 >>= 2
1000
        i += 1
1001
1002
    return 1 - (diff_feats / magnitude)
1003
    """
1004
1005
1006
if __name__ == '__main__':
1007
    import doctest
1008
1009
    doctest.testmod()
1010