abydos.phones._phones.ipa_to_features()   B
last analyzed

Complexity

Conditions 6

Size

Total Lines 51
Code Lines 18

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 9
CRAP Score 6

Importance

Changes 0
Metric Value
eloc 18
dl 0
loc 51
ccs 9
cts 9
cp 1
rs 8.5666
c 0
b 0
f 0
cc 6
nop 1
crap 6

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
# Copyright 2014-2020 by Christopher C. Little.
2
# This file is part of Abydos.
3
#
4
# Abydos is free software: you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation, either version 3 of the License, or
7
# (at your option) any later version.
8
#
9
# Abydos is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
13
#
14
# You should have received a copy of the GNU General Public License
15
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
16
17
"""abydos.phones._phones.
18
19 1
The phones module implements phonetic feature coding, decoding, and comparison
20
functions.
21
"""
22
23
from typing import Dict, List, Optional, Sequence, Union
24
from unicodedata import normalize
25 1
26
__all__ = ['cmp_features', 'get_feature', 'ipa_to_features']
27
28
29
_PHONETIC_FEATURES = {
30
    't': 2783230754502126250,
31
    't͇': 2783230754502126250,
32 1
    't̪': 2783230479624219306,
33
    't̠': 2783231579135847082,
34 1
    'd': 2783230754501864106,
35 1
    'd͇': 2783230754501864106,
36
    'd̪': 2783230479623957162,
37 1
    'd̠': 2783231579135584938,
38
    's': 2783230754502125978,
39
    's͇': 2783230754502125978,
40 1
    's̪': 2783230479624219034,
41
    's̟': 2783230479624219034,
42
    'z': 2783230754501863834,
43
    'z͇': 2783230754501863834,
44
    'z̪': 2783230479623956890,
45
    'z̟': 2783230479623956890,
46
    'θ̱': 2783230754502125994,
47
    'θ͇': 2783230754502125994,
48
    'ɹ̝̊': 2783230754502125994,
49
    'ð̠': 2783230754501863850,
50
    'ð͇': 2783230754501863850,
51
    'ɹ̝': 2783230754501863850,
52
    'ɬ': 2783230754502060454,
53
    'ɮ': 2783230754501863846,
54
    'θ': 2783230479624219050,
55
    's̄': 2783230479624219034,
56
    'ð': 2783230479623956906,
57
    'ð̞': 2693158487076546986,
58
    'ʃ': 2783231556184615322,
59
    'ʃʲ': 2783231556184615322,
60
    'š': 2783231556184615322,
61
    'ɹ̠̊˔': 2783231579135846826,
62
    'ʒ': 2783231556184353178,
63
    'ʒʲ': 2783231556184353178,
64
    'ž': 2783231556184353178,
65
    'ɹ̠˔': 2783231579135584682,
66
    'c': 2783233463150095018,
67
    'ɟ': 2783233463149832874,
68
    'ç': 2783233463150094762,
69
    'ʝ': 2783233463149832618,
70
    'p': 2781720575281375914,
71
    'p̪': 2781702983095331498,
72
    'p͆': 2781702983095331498,
73
    'b': 2781720575281113770,
74
    'b̪': 2781702983095069354,
75
    'b͆': 2781702983095069354,
76
    'f': 2781702983095331242,
77
    'v': 2781702983095069098,
78
    'ɸ': 2781720575281375658,
79
    'ɸ̞': 2691648582733965738,
80
    'β': 2781720575281113514,
81
    'β̞': 2691648582733703594,
82
    'k': 2783233462881659562,
83
    'ɡ': 2783233462881397418,
84
    'g': 2783233462881397418,
85
    'x': 2783233462881659306,
86
    'x̞': 2981391846485961130,
87
    'ɣ': 2783233462881397162,
88
    'ɰ': 2981391846485698986,
89
    'ɣ̞': 2981391846485698986,
90
    'ɣ˕': 2981391846485698986,
91
    'q': 2783233480061528746,
92
    'ɢ': 2783233480061266602,
93
    'χ': 2783233480061528490,
94
    'x̣': 2783233480061528490,
95
    'χ̞': 2693161487514118570,
96
    'ʁ': 2783233480061266346,
97
    'ʁ̝': 2783233480061266346,
98
    'ʁ̞': 2693161487513856426,
99
    'ħ': 2783233503273855402,
100
    'ħ̞': 2693161510726445482,
101
    'ʕ': 2783233503273593258,
102
    'ʕ̝': 2783233503273593258,
103
    'ʕ̞': 2693161510726183338,
104
    'ɑ̯': 2693161510726183338,
105
    'h': 2783233503281129898,
106
    'h̃': 2693161510733719914,
107
    'ɦ': 2783233503280867754,
108
    'ʔ': 2783233503281179306,
109
    'm': 2709662981243185770,
110
    'm̥': 2709662981243447914,
111
    'ɱ': 2709645389057141354,
112
    'm̪': 2709645389057141354,
113
    'n': 2711173160463936106,
114
    'n̪': 2711172885586029162,
115
    'n̥': 2711173160464198250,
116
    'n̊': 2711173160464198250,
117
    'n̪̊': 2711172885586291306,
118
    'ɳ': 2711174259975563882,
119
    'ɳ̊': 2711174259975826026,
120
    'ɳ̥': 2711174259975826026,
121
    'ɲ': 2711175869111904874,
122
    'ñ': 2711175869111904874,
123
    'n̠ʲ': 2711173944966556266,
124
    'ɲ̟': 2711173944966556266,
125
    'ɲ̊': 2711175869112167018,
126
    'ɲ̥': 2711175869112167018,
127
    'ŋ': 2711175868843469418,
128
    'ŋ̊': 2711175868843731562,
129
    'ɴ': 2711175886023338602,
130
    'l': 2693158761954453926,
131
    'l̠': 2693159586588174758,
132
    'l̪': 2693158487076546982,
133
    'ɫ': 2693158721554917798,
134
    'lˠ': 2693158721554917798,
135
    'lˤ': 2693158721554917798,
136
    'l̴': 2693158721554917798,
137
    'ɫ̪': 2693158446677010854,
138
    'l̪ˠ': 2693158446677010854,
139
    'l̪ˤ': 2693158446677010854,
140
    'l̴̪': 2693158446677010854,
141
    'l̥': 2693158761954716070,
142
    'ʎ': 2693161470602422694,
143
    'l̠ʲ': 2693159546457074086,
144
    'ʎ̟': 2693159546457074086,
145
    'ʟ': 2693161470333987238,
146
    'r': 2711173160463936170,
147
    'r̥': 2711173160464198314,
148
    'ɹ': 2693158761954453930,
149
    'ð̠˕': 2693158761954453930,
150
    'ʀ': 2711175886023338666,
151
    'ʀ̥': 2711175886023600810,
152
    'j': 2981391846754134442,
153
    'i̯': 2981391846754134442,
154
    'j̊': 2981391846754396586,
155
    'w': 2978753018579036586,
156
    'u̯': 2978753018579036586,
157
    'ɰʷ': 2978753018579036586,
158
    'ʍ': 2978753018579298730,
159
    'w̥': 2978753018579298730,
160
    'ɰᵝ': 2978682649834858922,
161
    'wᵝ': 2978682649834858922,
162
    'ɥ': 2978753018847472042,
163
    'jʷ': 2978753018847472042,
164
    'ʡ': 2783233503274904234,
165
    'ʜ': 2783233503274903978,
166
    'ʢ': 2783233503274641834,
167
    'ʢ̝': 2783233503274641834,
168
    'ʢ̞': 2693161510727231914,
169
    'ᴙ': 2711175909236714154,
170
    'ʙ': 2709662981243185834,
171
    'ʋ': 2691630990547659178,
172
    'ɕ': 2783231539004746138,
173
    'ʑ': 2783231539004483994,
174
    'ɾ': 2711173160463935914,
175
    'ᴅ': 2711173160463935914,
176
    'ɾ̥': 2711173160464198058,
177
    'ɾ̊': 2711173160464198058,
178
    'ᴅ̥': 2711173160464198058,
179
    'ᴅ̊': 2711173160464198058,
180
    'ɾ̃': 2711173160463935850,
181
    'n̆': 2711173160463935850,
182
    'ⱱ': 2709645389057141162,
183
    'ⱱ̟': 2709662981243185578,
184
    'w̆': 2709662981243185578,
185
    'b̆': 2709662981243185578,
186
    'ɢ̆': 2711175886023338410,
187
    'ʀ̆': 2711175886023338410,
188
    'ʡ̯': 2711175909236713898,
189
    'ɺ': 2711173160463935910,
190
    'ʎ̯': 2711175869111904678,
191
    'ʟ̆': 2711175868843469222,
192
    'ʈ': 2783231854013754026,
193
    'ɖ': 2783231854013491882,
194
    'ʂ': 2783231854013753754,
195
    'ʐ': 2783231854013491610,
196
    'ɻ': 2693159861466081706,
197
    'ɽ': 2711174259975563690,
198
    'ɽ͡r': 2711174259975563946,
199
    'ɭ': 2693159861466081702,
200
    'ɭ̆': 2711174259975563686,
201
    'ɺ˞': 2711174259975563686,
202
    'ɺ̢': 2711174259975563686,
203
    'ꞎ': 2783231854013688230,
204
    'ʎ̝̊': 2783233463150094758,
205
    'ʟ̝̊': 2783233462881659302,
206
    'ʟ̝': 2783233462881397158,
207
    # affricates & co-articulated
208
    't͡ʃ': 2783231556184615833,
209
    'ʧ': 2783231556184615833,
210
    't͜ʃ': 2783231556184615833,
211
    't̠ʲʃ': 2783231556184615833,
212
    'č': 2783231556184615833,
213
    'd͡ʒ': 2783231556184353689,
214
    'ʤ': 2783231556184353689,
215
    'd͜ʒ': 2783231556184353689,
216
    'd̠ʲʒ': 2783231556184353689,
217
    'ǯ': 2783231556184353689,
218
    't͡s': 2783230754502126489,
219
    'ʦ': 2783230754502126489,
220
    't͜s': 2783230754502126489,
221
    't̪͡s̪': 2783230479624219545,
222
    't͡s̪': 2783230479624219545,
223
    't̟͡s̟': 2783230479624219545,
224
    't͡s̟': 2783230479624219545,
225
    'ʦ̪': 2783230479624219545,
226
    'ʦ̟': 2783230479624219545,
227
    't͡θ̠': 2783230754502126505,
228
    't͡θ͇': 2783230754502126505,
229
    't͡θ': 2783230479624219561,
230
    't͜θ': 2783230479624219561,
231
    't̪͡θ': 2783230479624219561,
232
    't̟͡θ': 2783230479624219561,
233
    'd͡z': 2783230754501864345,
234
    'ʣ': 2783230754501864345,
235
    'd͜z': 2783230754501864345,
236
    'd̪͡z̪': 2783230479623957401,
237
    'd͡z̪': 2783230479623957401,
238
    'd̟͡z̟': 2783230479623957401,
239
    'd͡z̟': 2783230479623957401,
240
    'ʣ̪': 2783230479623957401,
241
    'ʣ̟': 2783230479623957401,
242
    'd͡ð̠': 2783230754501864361,
243
    'd͡ð̳': 2783230754501864361,
244
    'd͡ð': 2783230479623957417,
245
    'd͜ð': 2783230479623957417,
246
    'd̪͡ð': 2783230479623957417,
247
    'd̟͡ð': 2783230479623957417,
248
    'k͡x': 2783233462881659817,
249
    'ɡ͡ɣ': 2783233462881397673,
250
    'g͡ɣ': 2783233462881397673,
251
    'p͡f': 2781738167467420585,
252
    'p̪͡f': 2781702983095331753,
253
    'b͡v': 2781738167467158441,
254
    'b̪͡v': 2781702983095069609,
255
    'b̪͜v': 2781702983095069609,
256
    't͡ɕ': 2783231539004746649,
257
    't͜ɕ': 2783231539004746649,
258
    'ʨ': 2783231539004746649,
259
    'd͡ʑ': 2783231539004484505,
260
    'd͜ʑ': 2783231539004484505,
261
    'ʥ': 2783231539004484505,
262
    'ʈ͡ʂ': 2783231854013754265,
263
    't͡ʂ': 2783231854013754265,
264
    'ɖ͡ʐ': 2783231854013492121,
265
    'd͡ʐ': 2783231854013492121,
266
    't͡ɬ': 2783230754502060965,
267
    't͜ɬ': 2783230754502060965,
268
    'ƛ': 2783230754502060965,
269
    'd͡ɮ': 2783230754501864357,
270
    'c͡ç': 2783233463150095273,
271
    'c͜ç': 2783233463150095273,
272
    'ɟ͡ʝ': 2783233463149833129,
273
    'c͡ʎ̝̥': 2783233463150095269,
274
    'k͡ʟ̝̊': 2783233462881659813,
275
    'ɡ͡ʟ̝': 2783233462881397669,
276
    'q͡χ': 2783233480061529001,
277
    'ɢ͡ʁ': 2783233480061266857,
278
    'ɢ͜ʁ': 2783233480061266857,
279
    'ɧ': 2783231813614217626,
280
    'k͡p': 2781720534881839786,
281
    'k͜p': 2781720534881839786,
282
    'ɡ͡b': 2781720534881577642,
283
    'kʷ': 2780594634974997162,
284
    'k͡w': 2780594634974997162,
285
    'k͜w': 2780594634974997162,
286
    'kʷh': 2780594634974931626,
287
    'gʷ': 2780594634974735018,
288
    'g͡w': 2780594634974735018,
289
    'gʷh': 2780594634974669482,
290
    # implosives
291
    'ɓ̥': 2781720575281355434,
292
    'ƥ': 2781720575281355434,
293
    'pʼ↓': 2781720575281355434,
294
    'ɓ': 2781720575281093290,
295
    'ɗ̥': 2783230754502105770,
296
    'ƭ': 2783230754502105770,
297
    'tʼ↓': 2783230754502105770,
298
    'ɗ': 2783230754501843626,
299
    'ᶑ': 2783231854013471402,
300
    'ʄ̊': 2783233463150074538,
301
    'ƈ': 2783233463150074538,
302
    'cʼ↓': 2783233463150074538,
303
    'ʄ': 2783233463149812394,
304
    'ɠ': 2783233462881376938,
305
    'ʛ': 2783233480061246122,
306
    # clicks
307
    'ʘ': 2781720575281374890,
308
    'ʘʰ': 2781720575281309354,
309
    'ʘ̬': 2781720575281112746,
310
    'ᶢʘ': 2781720575281112746,
311
    'ʘ̃': 2781720575281112682,
312
    'ᵑʘ': 2781720575281112682,
313
    'ᵐʘ': 2781720575281112682,
314
    'ʘ̥̃ʰ': 2781720575281309290,
315
    'ᵑ̊ʘʰ': 2781720575281309290,
316
    'ʘ̃ˀ': 2781720575281358442,
317
    'ᵑʘˀ': 2781720575281358442,
318
    'ʘ̃͜ʔ': 2781720575281358442,
319
    'ᵑ̊ʘˀ': 2781720575281358442,
320
    'ǀ': 2783230479624218282,
321
    'ʇ': 2783230479624218282,
322
    'ǀ̬': 2783230479623956138,
323
    'ʇ̬': 2783230479623956138,
324
    'ᶢǀ': 2783230479623956138,
325
    'ᶢʇ': 2783230479623956138,
326
    'ǀ̃': 2783230479623956074,
327
    'ʇ̃': 2783230479623956074,
328
    'ᵑǀ': 2783230479623956074,
329
    'ⁿǀ': 2783230479623956074,
330
    'ᵑʇ': 2783230479623956074,
331
    'ǀ̥̃ʰ': 2783230479624152682,
332
    'ʇ̥̃ʰ': 2783230479624152682,
333
    'ᵑ̊ǀʰ': 2783230479624152682,
334
    'ᵑ̊ʇʰ': 2783230479624152682,
335
    'ǀ̃ˀ': 2783230479624201834,
336
    'ʇ̃ˀ': 2783230479624201834,
337
    'ᵑǀˀ': 2783230479624201834,
338
    'ᵑʇˀ': 2783230479624201834,
339
    'ᵑ̊ǀˀ': 2783230479624201834,
340
    'ᵑ̊ʇˀ': 2783230479624201834,
341
    'ǃ': 2783230754502125226,
342
    'ʗ': 2783230754502125226,
343
    'ǃʰ': 2783230754502059690,
344
    'ʗʰ': 2783230754502059690,
345
    'ǃ̬': 2783230754501863082,
346
    'ʗ̬': 2783230754501863082,
347
    'ᶢǃ': 2783230754501863082,
348
    'ᶢʗ': 2783230754501863082,
349
    'ǃ̃': 2783230754501863018,
350
    'ᵑǃ': 2783230754501863018,
351
    'ʗ̃': 2783230754501863018,
352
    'ᵑʗ': 2783230754501863018,
353
    'ǃ̥̃ʰ': 2783230754502059626,
354
    'ʗ̃̊ʰ': 2783230754502059626,
355
    'ᵑ̊ǃʰ': 2783230754502059626,
356
    'ᵑ̊ʗʰ': 2783230754502059626,
357
    'ǃ̃ˀ': 2783230754502108778,
358
    'ʗ̃ˀ': 2783230754502108778,
359
    'ᵑǃˀ': 2783230754502108778,
360
    'ᵑʗˀ': 2783230754502108778,
361
    'ǂ': 2783233463150093994,
362
    'ʄ̵': 2783233463150093994,
363
    '⨎': 2783233463150093994,
364
    'ǂʰ': 2783233463150028458,
365
    'ǂ̬': 2783233463149831850,
366
    'ᶢǂ': 2783233463149831850,
367
    'ǂ̃': 2783233463149831786,
368
    'ᵑǂ': 2783233463149831786,
369
    'ǂ̥̃ʰ': 2783233463150028394,
370
    'ᵑ̊ǂʰ': 2783233463150028394,
371
    'ǂ̃ˀ': 2783233463150077546,
372
    'ᵑǂˀ': 2783233463150077546,
373
    'ᵑǂ͡ʔ': 2783233463150077546,
374
    'ᵑ̊ǂˀ': 2783233463150077546,
375
    'ǃ͡s': 2783233463150093978,
376
    'ǂᶴ': 2783233463150093978,
377
    'ǁ': 2783230754502125222,
378
    'ʖ': 2783230754502125222,
379
    'ǁʰ': 2783230754502059686,
380
    'ʖʰ': 2783230754502059686,
381
    'ǁ̬': 2783230754501863078,
382
    'ʖ̬': 2783230754501863078,
383
    'ᶢǁ': 2783230754501863078,
384
    'ᶢʖ': 2783230754501863078,
385
    'ǁ̃': 2783230754501863014,
386
    'ʖ̃': 2783230754501863014,
387
    'ᵑǁ': 2783230754501863014,
388
    'ᵑʖ': 2783230754501863014,
389
    'ǁ̥̃ʰ': 2783230754502059622,
390
    'ʖ̥̃ʰ': 2783230754502059622,
391
    'ᵑ̊ǁʰ': 2783230754502059622,
392
    'ᵑ̊ʖʰ': 2783230754502059622,
393
    'ǁ̃ˀ': 2783230754502108774,
394
    'ʖ̃ˀ': 2783230754502108774,
395
    'ᵑǁˀ': 2783230754502108774,
396
    'ᵑʖˀ': 2783230754502108774,
397
    'ᵑǁ͡ʔ': 2783230754502108774,
398
    'ʖ̃͜ʔ': 2783230754502108774,
399
    'ǃ˞': 2783231854013753002,
400
    'ǃǁ': 2783231854013753002,
401
    '‼': 2783231854013753002,
402
    '!!': 2783231854013753002,
403
    'ǃ˞ʰ': 2783231854013687466,
404
    '‼ʰ': 2783231854013687466,
405
    '!!ʰ': 2783231854013687466,
406
    'ǃ̬˞': 2783231854013490858,
407
    'ᶢǃ˞': 2783231854013490858,
408
    '‼̬': 2783231854013490858,
409
    '!!̬': 2783231854013490858,
410
    'ᶢ‼': 2783231854013490858,
411
    'ᶢ!!': 2783231854013490858,
412
    'ǃ̃˞': 2783231854013490794,
413
    'ᵑǃ˞': 2783231854013490794,
414
    '‼̃': 2783231854013490794,
415
    '!!̃': 2783231854013490794,
416
    'ᵑ‼': 2783231854013490794,
417
    'ᵑ!!': 2783231854013490794,
418
    'ǃ̥̃˞ʰ': 2783231854013687402,
419
    'ᵑ̊ǃ˞ʰ': 2783231854013687402,
420
    '‼̥̃ʰ': 2783231854013687402,
421
    '!!̥̃ʰ': 2783231854013687402,
422
    'ᵑ̊‼ʰ': 2783231854013687402,
423
    'ᵑ̊!!ʰ': 2783231854013687402,
424
    'ǃ̃˞ˀ': 2783231854013736554,
425
    'ᵑǃ˞͜ʔ': 2783231854013736554,
426
    'ᵑ̊‼ˀ': 2783231854013736554,
427
    'ᵑ‼ˀ': 2783231854013736554,
428
    '‼̃ˀ': 2783231854013736554,
429
    'ᵑ̊!!ˀ': 2783231854013736554,
430
    'ᵑ!!ˀ': 2783231854013736554,
431
    '!!̃ˀ': 2783231854013736554,
432
    # vowels
433
    'i': 1826957412996131242,
434
    'ɪ': 1826957413067434410,
435
    'ɩ': 1826957413067434410,
436
    'u': 1825831513894594986,
437
    'ʊ': 1825831513965898154,
438
    'ɷ': 1825831513965898154,
439
    'ᴜ': 1825831513965898154,
440
    'ɯ̽': 1826957413872740778,
441
    'ʊ̜': 2115187790024452522,
442
    'e': 1826957430176000426,
443
    'ɛ': 1826957430247303594,
444
    'o': 1825831531074464170,
445
    'ɤ': 1826957430981306794,
446
    'ɔ': 1825831531145767338,
447
    'ʌ': 1826957431052609962,
448
    'a': 1826957425952336298,
449
    'a̟': 1826957425952336298,
450
    'æ̞': 1826957425952336298,
451
    'ɶ': 1825831526045493674,
452
    'æ': 1826957425885227434,
453
    'y': 1825831513089288618,
454
    'ʏ': 1825831513160591786,
455
    'ø': 1825831530269157802,
456
    'œ': 1825831530340460970,
457
    'ə': 1828083331160779178,
458
    'ɵ̞': 1825831531347093930,
459
    'ə̹': 1825831531347093930,
460
    'ɞ̝': 1825831531347093930,
461
    'ɘ̞': 1826957431253936554,
462
    'ɯ': 1826957413805631914,
463
    'ɒ': 1825831526850800042,
464
    'ɑ': 1826957426757642666,
465
    'ɨ': 1826957414069873066,
466
    'ï': 1826957414069873066,
467
    'ʉ': 1825831514163030442,
468
    'ü': 1825831514163030442,
469
    'ɘ': 1826957431249742250,
470
    'ë': 1826957431249742250,
471
    'ɵ': 1825831531342899626,
472
    'ö': 1825831531342899626,
473
    'ɜ': 1826957431316851114,
474
    'ɛ̈': 1826957431316851114,
475
    'ɞ': 1825831531410008490,
476
    'ɔ̈': 1825831531410008490,
477
    'ä': 1826957427026078122,
478
    'a̠': 1826957427026078122,
479
    'ɑ̈': 1826957427026078122,
480
    'ɐ̞': 1826957427026078122,
481
    'ɐ': 1828083326865811882,
482
    'ɜ̞': 1826957426958969258,
483
    'ɞ̞': 1825831527052126634,
484
    'ɪ̟': 1826957413063240106,
485
    'ʏ̟': 1825831513156397482,
486
    'ʏ̫': 1825550038183881130,
487
    'ʏʷ': 1825550038183881130,
488
    'ɪʷ': 1825550038183881130,
489
    'y̫': 1825550038112577962,
490
    'yʷ': 1825550038112577962,
491
    'iʷ': 1825550038112577962,
492
    'u͍': 1825761145150417322,
493
    'ɯᵝ': 1825761145150417322,
494
    'ɯ͡β̞': 1825761145150417322,
495
    'ʉ͍': 1825761145418852778,
496
    'ɨᵝ': 1825761145418852778,
497
    'ɨ͡β̞': 1825761145418852778,
498
    'ɪ̈': 1826957414136981930,
499
    'ɨ̞': 1826957414136981930,
500
    'ɘ̝': 1826957414136981930,
501
    'ʊ̈': 1825831514230139306,
502
    'ʉ̞': 1825831514230139306,
503
    'ø̫': 1825550055292447146,
504
    'øʷ': 1825550055292447146,
505
    'eʷ': 1825550055292447146,
506
    'e̞': 1826957430180194730,
507
    'ɛ̝': 1826957430180194730,
508
    'ø̞': 1825831530273352106,
509
    'œ̝': 1825831530273352106,
510
    'o̞': 1825831531078658474,
511
    'ɔ̝': 1825831531078658474,
512
    'ɤ̞': 1826957430985501098,
513
    'ʌ̝': 1826957430985501098,
514
    # feature diacritics
515
    'ʰ': 65536,
516
    'ʱ': 327680,
517
    '̤': 327680,
518
    'ʼ': 16384,
519
    '̚': 131072,
520
    '̬': 262144,
521
    '̌': 262144,
522
    '̥': 524288,
523
    '̊': 524288,
524
    'ʷ': 6737807255011328,
525
    'ʲ': 103012106240,
526
    '̰': 278528,
527
    '̩': 1152921504606846976,
528
    '̯': 2305843009213693952,
529
    '̃': 64,
530
    '̨': 64,
531
    '͊': 128,
532
    'ˠ': 98180268032,
533
    '̴': 98180268032,
534
    'ˤ': 32505856,
535
    '̘': 23068672,
536
    '̙': 26214400,
537
    '˭': 688256,
538
}
539
540
_FEATURE_MASK = {
541
    'syllabic': 3458764513820540928,
542
    'consonantal': 864691128455135232,
543
    'sonorant': 216172782113783808,
544
    'approximant': 54043195528445952,
545
    'labial': 13510798882111488,
546
    'round': 3377699720527872,
547
    'protruded': 844424930131968,
548
    'compressed': 211106232532992,
549
    'labiodental': 52776558133248,
550
    'coronal': 13194139533312,
551 1
    'anterior': 3298534883328,
552
    'distributed': 824633720832,
553
    'dorsal': 206158430208,
554
    'high': 51539607552,
555
    'low': 12884901888,
556
    'front': 3221225472,
557
    'back': 805306368,
558
    'tense': 201326592,
559
    'pharyngeal': 50331648,
560
    'atr': 12582912,
561
    'rtr': 3145728,
562
    'voice': 786432,
563
    'spread_glottis': 196608,
564
    'constricted_glottis': 49152,
565
    'glottalic_suction': 12288,
566
    'velaric_suction': 3072,
567
    'continuant': 768,
568
    'nasal': 192,
569
    'strident': 48,
570
    'lateral': 12,
571
    'delayed_release': 3,
572
}
573
574
575
def ipa_to_features(ipa: str) -> List[int]:
576
    """Convert IPA to features.
577
578
    This translates an IPA string of one or more phones to a list of ints
579
    representing the features of the string.
580
581
    Parameters
582
    ----------
583
    ipa : str
584
        The IPA representation of a phone or series of phones
585
586 1
    Returns
587
    -------
588
    list of ints
589
        A representation of the features of the input string
590
591
    Examples
592
    --------
593
    >>> ipa_to_features('mut')
594
    [2709662981243185770, 1825831513894594986, 2783230754502126250]
595
    >>> ipa_to_features('fon')
596
    [2781702983095331242, 1825831531074464170, 2711173160463936106]
597
    >>> ipa_to_features('telz')
598
    [2783230754502126250, 1826957430176000426, 2693158761954453926,
599
    2783230754501863834]
600
601
    .. versionadded:: 0.1.0
602
603
    """
604
    features = []
605
    pos = 0
606
    ipa = normalize('NFD', ipa.lower())
607
608
    maxsymlen = max(len(_) for _ in _PHONETIC_FEATURES)
609
610
    while pos < len(ipa):
611
        found_match = False
612
        for i in range(maxsymlen, 0, -1):
613
            if (
614
                pos + i - 1 <= len(ipa)
615 1
                and ipa[pos : pos + i] in _PHONETIC_FEATURES
616 1
            ):
617 1
                features.append(_PHONETIC_FEATURES[ipa[pos : pos + i]])
618
                pos += i
619 1
                found_match = True
620
621 1
        if not found_match:
622 1
            features.append(-1)
623 1
            pos += 1
624 1
625
    return features
626
627
628 1
def ipa_to_feature_dicts(ipa: str) -> List[Dict[str, str]]:
629 1
    """Convert IPA to a feature dict list.
630 1
631
    This translates an IPA string of one or more phones to a list of dicts
632 1
    representing the features of the string.
633 1
634 1
    Parameters
635
    ----------
636 1
    ipa : str
637
        The IPA representation of a phone or series of phones
638
639 1
    Returns
640
    -------
641
    list of dicts
642
        A representation of the features of the input string
643
644
    Examples
645
    --------
646
    >>> ipa_to_feature_dicts('mut')
647
    [{'syllabic': '-',
648
      'consonantal': '+',
649
      'sonorant': '+',
650
      'approximant': '-',
651
      'labial': '+',
652
      'round': '-',
653
      'protruded': '-',
654
      'compressed': '-',
655
      'labiodental': '-',
656
      'coronal': '-',
657
      'anterior': '0',
658
      'distributed': '0',
659
      'dorsal': '-',
660
      'high': '0',
661
      'low': '0',
662
      'front': '0',
663
      'back': '0',
664
      'tense': '0',
665
      'pharyngeal': '-',
666
      'atr': '0',
667
      'rtr': '0',
668
      'voice': '+',
669
      'spread_glottis': '-',
670
      'constricted_glottis': '-',
671
      'glottalic_suction': '-',
672
      'velaric_suction': '-',
673
      'continuant': '-',
674
      'nasal': '+',
675
      'strident': '-',
676
      'lateral': '-',
677
      'delayed_release': '-'},
678
     {'syllabic': '+',
679
      'consonantal': '-',
680
      'sonorant': '+',
681
      'approximant': '+',
682
      'labial': '+',
683
      'round': '+',
684
      'protruded': '-',
685
      'compressed': '-',
686
      'labiodental': '-',
687
      'coronal': '-',
688
      'anterior': '0',
689
      'distributed': '0',
690
      'dorsal': '+',
691
      'high': '+',
692
      'low': '-',
693
      'front': '-',
694
      'back': '+',
695
      'tense': '+',
696
      'pharyngeal': '+',
697
      'atr': '+',
698
      'rtr': '-',
699
      'voice': '+',
700
      'spread_glottis': '-',
701
      'constricted_glottis': '-',
702
      'glottalic_suction': '-',
703
      'velaric_suction': '-',
704
      'continuant': '+',
705
      'nasal': '-',
706
      'strident': '-',
707
      'lateral': '-',
708
      'delayed_release': '-'},
709
     {'syllabic': '-',
710
      'consonantal': '+',
711
      'sonorant': '-',
712
      'approximant': '-',
713
      'labial': '-',
714
      'round': '0',
715
      'protruded': '0',
716
      'compressed': '0',
717
      'labiodental': '0',
718
      'coronal': '+',
719
      'anterior': '+',
720
      'distributed': '-',
721
      'dorsal': '-',
722
      'high': '0',
723
      'low': '0',
724
      'front': '0',
725
      'back': '0',
726
      'tense': '0',
727
      'pharyngeal': '-',
728
      'atr': '0',
729
      'rtr': '0',
730
      'voice': '-',
731
      'spread_glottis': '-',
732
      'constricted_glottis': '-',
733
      'glottalic_suction': '-',
734
      'velaric_suction': '-',
735
      'continuant': '-',
736
      'nasal': '-',
737
      'strident': '-',
738
      'lateral': '-',
739
      'delayed_release': '-'}]
740
741
    .. versionadded:: 0.4.1
742
743
    """
744
    features = []
745
    pos = 0
746
    ipa = normalize('NFD', ipa.lower())
747
748
    maxsymlen = max(len(_) for _ in _PHONETIC_FEATURES)
749
750
    while pos < len(ipa):
751
        found_match = False
752
        for i in range(maxsymlen, 0, -1):
753
            if (
754
                pos + i - 1 <= len(ipa)
755 1
                and ipa[pos : pos + i] in _PHONETIC_FEATURES
756 1
            ):
757 1
                feature_int = _PHONETIC_FEATURES[ipa[pos : pos + i]]
758
                feature_dict = {}
759 1
                for feature in _FEATURE_MASK.keys():
760
                    # each feature mask contains two bits, one each for - and +
761 1
                    mask = _FEATURE_MASK[feature]
762 1
                    # the lower bit represents +
763 1
                    pos_mask = mask >> 1
764 1
765
                    masked = feature_int & mask
766
                    if masked == 0:
767
                        feature_dict[feature] = '0'  # 0
768 1
                    elif masked == mask:
769 1
                        feature_dict[feature] = '+/-'  # +/-
770 1
                    elif masked & pos_mask:
771
                        feature_dict[feature] = '+'  # +
772 1
                    else:
773
                        feature_dict[feature] = '-'  # -
774 1
                features.append(feature_dict)
775
                pos += i
776 1
                found_match = True
777 1
778 1
        if not found_match:
779 1
            features.append({})
780 1
            pos += 1
781 1
782 1
    return features
783
784 1
785 1
def get_feature(vector: List[int], feature: str) -> List[Union[int, float]]:
786 1
    """Get a feature vector.
787 1
788
    This returns a list of ints, equal in length to the vector input,
789 1
        representing presence/absence/neutrality with respect to a particular
790 1
        phonetic feature.
791 1
792
    Parameters
793 1
    ----------
794
    vector : list
795
        A tuple or list of ints representing the phonetic features of a phone
796 1
        or series of phones (such as is returned by the ipa_to_features
797
        function)
798
    feature : str
799
        A feature name from the set:
800
801
            - ``syllabic``
802
            - ``consonantal``
803
            - ``sonorant``
804
            - ``approximant``
805
            - ``labial``
806
            - ``round``
807
            - ``protruded``
808
            - ``compressed``
809
            - ``labiodental``
810
            - ``coronal``
811
            - ``anterior``
812
            - ``distributed``
813
            - ``dorsal``
814
            - ``high``
815
            - ``low``
816
            - ``front``
817
            - ``back``
818
            - ``tense``
819
            - ``pharyngeal``
820
            - ``atr``
821
            - ``rtr``
822
            - ``voice``
823
            - ``spread_glottis``
824
            - ``constricted_glottis``
825
            - ``glottalic_suction``
826
            - ``velaric_suction``
827
            - ``continuant``
828
            - ``nasal``
829
            - ``strident``
830
            - ``lateral``
831
            - ``delayed_release``
832
833
    Returns
834
    -------
835
    list of ints
836
        A list indicating presence/absence/neutrality with respect to the
837
        feature
838
839
    Raises
840
    ------
841
    AttributeError
842
        feature must be one of ...
843
844
    Examples
845
    --------
846
    >>> tails = ipa_to_features('telz')
847
    >>> get_feature(tails, 'consonantal')
848
    [1, -1, 1, 1]
849
    >>> get_feature(tails, 'sonorant')
850
    [-1, 1, 1, -1]
851
    >>> get_feature(tails, 'nasal')
852
    [-1, -1, -1, -1]
853
    >>> get_feature(tails, 'coronal')
854
    [1, -1, 1, 1]
855
856
    .. versionadded:: 0.1.0
857
858
    """
859
    # :param bool binary: if False, -1, 0, & 1 represent -, 0, & +
860
    #           if True, only binary oppositions are allowed:
861
    #           0 & 1 represent - & + and 0s are mapped to -
862
863
    if feature not in _FEATURE_MASK:
864
        raise AttributeError(
865
            "feature must be one of: '{}'".format(
866
                "', '".join(_FEATURE_MASK.keys())
867
            )
868
        )
869
870
    # each feature mask contains two bits, one each for - and +
871
    mask = _FEATURE_MASK[feature]
872
    # the lower bit represents +
873
    pos_mask = mask >> 1
874 1
    retvec = []
875 1
    for char in vector:
876
        if char < 0:
877
            retvec.append(float('NaN'))
878
        else:
879
            masked = char & mask
880
            if masked == 0:
881
                retvec.append(0)  # 0
882 1
            elif masked == mask:
883
                retvec.append(2)  # +/-
884 1
            elif masked & pos_mask:
885 1
                retvec.append(1)  # +
886 1
            else:
887 1
                retvec.append(-1)  # -
888 1
889
    return retvec
890 1
891 1
892 1
def cmp_features(
893 1
    feat1: int,
894 1
    feat2: int,
895 1
    weights: Optional[
896 1
        Union[Sequence[Union[int, float]], Dict[str, Union[int, float]]]
897
    ] = None,
898 1
) -> float:
899
    """Compare features.
900 1
901
    This returns a number in the range [0, 1] representing a comparison of two
902
    feature bundles.
903 1
904
    If one of the bundles is negative, -1 is returned (for unknown values)
905
906
    If the bundles are identical, 1 is returned.
907
908
    If they are inverses of one another, 0 is returned.
909
910
    Otherwise, a float representing their similarity is returned.
911
912
    Parameters
913
    ----------
914
    feat1 : int
915
        A feature bundle
916
    feat2 : int
917
        A feature bundle
918
    weights : None or list or tuple or dict
919
        If None, all features are of equal significance and a simple normalized
920
        hamming distance of the features is calculated. If a list or tuple
921
        of numeric values is supplied, the values are inferred as the weights
922
        for each feature, in order of the features listed in _FEATURE_MASK.
923
        If a dict is supplied, its key values should match keys in
924
        _FEATURE_MASK to which each weight (value) should be assigned. Missing
925
        values in all cases are assigned a weight of 0 and will be omitted from
926
        the comparison.
927
928
    Returns
929
    -------
930
    float
931
        A comparison of the feature bundles
932
933
    Examples
934
    --------
935
    >>> cmp_features(ipa_to_features('l')[0], ipa_to_features('l')[0])
936
    1.0
937
    >>> cmp_features(ipa_to_features('l')[0], ipa_to_features('n')[0])
938
    0.8709677419354839
939
    >>> cmp_features(ipa_to_features('l')[0], ipa_to_features('z')[0])
940
    0.8709677419354839
941
    >>> cmp_features(ipa_to_features('l')[0], ipa_to_features('i')[0])
942
    0.564516129032258
943
944
    .. versionadded:: 0.1.0
945
    .. versionchanged:: 0.4.1
946
        Added weights parameter for modifiable feature weighting
947
948
    """
949
    if feat1 < 0 or feat2 < 0:
950
        return 0.0
951
    if feat1 == feat2:
952
        return 1.0
953
954 1
    # This should be handled some other way since this will take a long time
955 1
    # when done repeatedly. Maybe convert to a class & save the weights list.
956 1
    if weights is not None:
957 1
        if isinstance(weights, dict):
958
            weights = [
959
                weights[feature] if feature in weights else 0
960
                for feature in sorted(
961 1
                    _FEATURE_MASK, key=_FEATURE_MASK.get, reverse=True
962 1
                )
963 1
            ]
964
        elif isinstance(weights, (list, tuple)):
965
            weights = list(weights) + [0] * (len(_FEATURE_MASK) - len(weights))
966
        else:
967
            raise TypeError('weights must be a dist, list, or tuple.')
968
969 1
    magnitude = sum(weights) if weights else len(_FEATURE_MASK)
970 1
971
    """
972 1
    # Alternate implementation
973
    diff_feats = 0
974 1
    i = 0
975
    while feat1 or feat2:
976 1
        f1 = feat1 & 0b11
977 1
        f2 = feat2 & 0b11
978 1
        if (not (0b11 in {f1, f2} and (f1 in {0b01, 0b10} or
979 1
            f2 in {0b01, 0b10}))) and (f1 != f2):
980 1
            diff_feats += weights[i] if weights else 1
981 1
982 1
        feat1 >>= 2
983 1
        feat2 >>= 2
984 1
        i += 1
985 1
986 1
    return 1 - (diff_feats / magnitude)
987 1
    """
988
989
    featxor = feat1 ^ feat2
990
    diffbits = 0.0
991
    i = 0
992
    while featxor:
993
        if featxor & 0b1:
994
            diffbits += weights[i] if weights else 1
995
        featxor >>= 1
996
        if featxor & 0b1:
997
            diffbits += weights[i] if weights else 1
998
        featxor >>= 1
999
        i += 1
1000
    return 1 - (0 if not diffbits else (diffbits / (2 * magnitude)))
1001
1002
1003
if __name__ == '__main__':
1004
    import doctest
1005
1006
    doctest.testmod()
1007