Conditions | 142 |
Total Lines | 633 |
Code Lines | 381 |
Lines | 0 |
Ratio | 0 % |
Tests | 269 |
CRAP Score | 142 |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like abydos.phonetic._phonet.Phonet.encode() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | # -*- coding: utf-8 -*- |
||
1071 | 1 | def encode(self, word, mode=1, lang='de'): |
|
1072 | """Return the phonet code for a word. |
||
1073 | |||
1074 | Args: |
||
1075 | word (str): The word to transform |
||
1076 | mode (int): The ponet variant to employ (1 or 2) |
||
1077 | lang (str): 'de' (default) for German, 'none' for no language |
||
1078 | |||
1079 | Returns: |
||
1080 | str: The phonet value |
||
1081 | |||
1082 | Examples: |
||
1083 | >>> pe = Phonet() |
||
1084 | >>> pe.encode('Christopher') |
||
1085 | 'KRISTOFA' |
||
1086 | >>> pe.encode('Niall') |
||
1087 | 'NIAL' |
||
1088 | >>> pe.encode('Smith') |
||
1089 | 'SMIT' |
||
1090 | >>> pe.encode('Schmidt') |
||
1091 | 'SHMIT' |
||
1092 | |||
1093 | >>> pe.encode('Christopher', mode=2) |
||
1094 | 'KRIZTUFA' |
||
1095 | >>> pe.encode('Niall', mode=2) |
||
1096 | 'NIAL' |
||
1097 | >>> pe.encode('Smith', mode=2) |
||
1098 | 'ZNIT' |
||
1099 | >>> pe.encode('Schmidt', mode=2) |
||
1100 | 'ZNIT' |
||
1101 | |||
1102 | >>> pe.encode('Christopher', lang='none') |
||
1103 | 'CHRISTOPHER' |
||
1104 | >>> pe.encode('Niall', lang='none') |
||
1105 | 'NIAL' |
||
1106 | >>> pe.encode('Smith', lang='none') |
||
1107 | 'SMITH' |
||
1108 | >>> pe.encode('Schmidt', lang='none') |
||
1109 | 'SCHMIDT' |
||
1110 | |||
1111 | """ |
||
1112 | 1 | phonet_hash = Counter() |
|
1113 | 1 | alpha_pos = Counter() |
|
1114 | |||
1115 | 1 | phonet_hash_1 = Counter() |
|
1116 | 1 | phonet_hash_2 = Counter() |
|
1117 | |||
1118 | 1 | def _initialize_phonet(lang): |
|
1119 | """Initialize phonet variables. |
||
1120 | |||
1121 | Args: |
||
1122 | lang (str): Language to use for rules |
||
1123 | |||
1124 | """ |
||
1125 | 1 | if lang == 'none': |
|
1126 | 1 | _phonet_rules = self._rules_no_lang |
|
1127 | else: |
||
1128 | 1 | _phonet_rules = self._rules_german |
|
1129 | |||
1130 | 1 | phonet_hash[''] = -1 |
|
1131 | |||
1132 | # German and international umlauts |
||
1133 | 1 | for j in { |
|
1134 | 'À', |
||
1135 | 'Á', |
||
1136 | 'Â', |
||
1137 | 'Ã', |
||
1138 | 'Ä', |
||
1139 | 'Å', |
||
1140 | 'Æ', |
||
1141 | 'Ç', |
||
1142 | 'È', |
||
1143 | 'É', |
||
1144 | 'Ê', |
||
1145 | 'Ë', |
||
1146 | 'Ì', |
||
1147 | 'Í', |
||
1148 | 'Î', |
||
1149 | 'Ï', |
||
1150 | 'Ð', |
||
1151 | 'Ñ', |
||
1152 | 'Ò', |
||
1153 | 'Ó', |
||
1154 | 'Ô', |
||
1155 | 'Õ', |
||
1156 | 'Ö', |
||
1157 | 'Ø', |
||
1158 | 'Ù', |
||
1159 | 'Ú', |
||
1160 | 'Û', |
||
1161 | 'Ü', |
||
1162 | 'Ý', |
||
1163 | 'Þ', |
||
1164 | 'ß', |
||
1165 | 'Œ', |
||
1166 | 'Š', |
||
1167 | 'Ÿ', |
||
1168 | }: |
||
1169 | 1 | alpha_pos[j] = 1 |
|
1170 | 1 | phonet_hash[j] = -1 |
|
1171 | |||
1172 | # "normal" letters ('A'-'Z') |
||
1173 | 1 | for i, j in enumerate('ABCDEFGHIJKLMNOPQRSTUVWXYZ'): |
|
1174 | 1 | alpha_pos[j] = i + 2 |
|
1175 | 1 | phonet_hash[j] = -1 |
|
1176 | |||
1177 | 1 | for i in range(26): |
|
1178 | 1 | for j in range(28): |
|
1179 | 1 | phonet_hash_1[i, j] = -1 |
|
1180 | 1 | phonet_hash_2[i, j] = -1 |
|
1181 | |||
1182 | # for each phonetc rule |
||
1183 | 1 | for i in range(len(_phonet_rules)): |
|
1184 | 1 | rule = _phonet_rules[i] |
|
1185 | |||
1186 | 1 | if rule and i % 3 == 0: |
|
1187 | # calculate first hash value |
||
1188 | 1 | k = _phonet_rules[i][0] |
|
1189 | |||
1190 | 1 | if phonet_hash[k] < 0 and ( |
|
1191 | _phonet_rules[i + 1] or _phonet_rules[i + 2] |
||
1192 | ): |
||
1193 | 1 | phonet_hash[k] = i |
|
1194 | |||
1195 | # calculate second hash values |
||
1196 | 1 | if k and alpha_pos[k] >= 2: |
|
1197 | 1 | k = alpha_pos[k] |
|
1198 | |||
1199 | 1 | j = k - 2 |
|
1200 | 1 | rule = rule[1:] |
|
1201 | |||
1202 | 1 | if not rule: |
|
1203 | 1 | rule = ' ' |
|
1204 | 1 | elif rule[0] == '(': |
|
1205 | 1 | rule = rule[1:] |
|
1206 | else: |
||
1207 | 1 | rule = rule[0] |
|
1208 | |||
1209 | 1 | while rule and (rule[0] != ')'): |
|
1210 | 1 | k = alpha_pos[rule[0]] |
|
1211 | |||
1212 | 1 | if k > 0: |
|
1213 | # add hash value for this letter |
||
1214 | 1 | if phonet_hash_1[j, k] < 0: |
|
1215 | 1 | phonet_hash_1[j, k] = i |
|
1216 | 1 | phonet_hash_2[j, k] = i |
|
1217 | |||
1218 | 1 | if phonet_hash_2[j, k] >= (i - 30): |
|
1219 | 1 | phonet_hash_2[j, k] = i |
|
1220 | else: |
||
1221 | 1 | k = -1 |
|
1222 | |||
1223 | 1 | if k <= 0: |
|
1224 | # add hash value for all letters |
||
1225 | 1 | if phonet_hash_1[j, 0] < 0: |
|
1226 | 1 | phonet_hash_1[j, 0] = i |
|
1227 | |||
1228 | 1 | phonet_hash_2[j, 0] = i |
|
1229 | |||
1230 | 1 | rule = rule[1:] |
|
1231 | |||
1232 | 1 | def _phonet(term, mode, lang): |
|
1233 | """Return the phonet coded form of a term. |
||
1234 | |||
1235 | Args: |
||
1236 | term (str): Term to transform |
||
1237 | mode (int): The ponet variant to employ (1 or 2) |
||
1238 | lang (str): 'de' (default) for German, 'none' for no language |
||
1239 | |||
1240 | Returns: |
||
1241 | str: The phonet value |
||
1242 | |||
1243 | """ |
||
1244 | 1 | if lang == 'none': |
|
1245 | 1 | _phonet_rules = self._rules_no_lang |
|
1246 | else: |
||
1247 | 1 | _phonet_rules = self._rules_german |
|
1248 | |||
1249 | 1 | char0 = '' |
|
1250 | 1 | dest = term |
|
1251 | |||
1252 | 1 | if not term: |
|
1253 | 1 | return '' |
|
1254 | |||
1255 | 1 | term_length = len(term) |
|
1256 | |||
1257 | # convert input string to upper-case |
||
1258 | 1 | src = term.translate(self._upper_trans) |
|
1259 | |||
1260 | # check "src" |
||
1261 | 1 | i = 0 |
|
1262 | 1 | j = 0 |
|
1263 | 1 | zeta = 0 |
|
1264 | |||
1265 | 1 | while i < len(src): |
|
1266 | 1 | char = src[i] |
|
1267 | |||
1268 | 1 | pos = alpha_pos[char] |
|
1269 | |||
1270 | 1 | if pos >= 2: |
|
1271 | 1 | xpos = pos - 2 |
|
1272 | |||
1273 | 1 | if i + 1 == len(src): |
|
1274 | 1 | pos = alpha_pos[''] |
|
1275 | else: |
||
1276 | 1 | pos = alpha_pos[src[i + 1]] |
|
1277 | |||
1278 | 1 | start1 = phonet_hash_1[xpos, pos] |
|
1279 | 1 | start2 = phonet_hash_1[xpos, 0] |
|
1280 | 1 | end1 = phonet_hash_2[xpos, pos] |
|
1281 | 1 | end2 = phonet_hash_2[xpos, 0] |
|
1282 | |||
1283 | # preserve rule priorities |
||
1284 | 1 | if (start2 >= 0) and ((start1 < 0) or (start2 < start1)): |
|
1285 | 1 | pos = start1 |
|
1286 | 1 | start1 = start2 |
|
1287 | 1 | start2 = pos |
|
1288 | 1 | pos = end1 |
|
1289 | 1 | end1 = end2 |
|
1290 | 1 | end2 = pos |
|
1291 | |||
1292 | 1 | if (end1 >= start2) and (start2 >= 0): |
|
1293 | 1 | if end2 > end1: |
|
1294 | 1 | end1 = end2 |
|
1295 | |||
1296 | 1 | start2 = -1 |
|
1297 | 1 | end2 = -1 |
|
1298 | else: |
||
1299 | 1 | pos = phonet_hash[char] |
|
1300 | 1 | start1 = pos |
|
1301 | 1 | end1 = 10000 |
|
1302 | 1 | start2 = -1 |
|
1303 | 1 | end2 = -1 |
|
1304 | |||
1305 | 1 | pos = start1 |
|
1306 | 1 | zeta0 = 0 |
|
1307 | |||
1308 | 1 | if pos >= 0: |
|
1309 | # check rules for this char |
||
1310 | 1 | while (_phonet_rules[pos] is None) or ( |
|
1311 | _phonet_rules[pos][0] == char |
||
1312 | ): |
||
1313 | 1 | if pos > end1: |
|
1314 | 1 | if start2 > 0: |
|
1315 | 1 | pos = start2 |
|
1316 | 1 | start1 = start2 |
|
1317 | 1 | start2 = -1 |
|
1318 | 1 | end1 = end2 |
|
1319 | 1 | end2 = -1 |
|
1320 | 1 | continue |
|
1321 | |||
1322 | 1 | break |
|
1323 | |||
1324 | 1 | if (_phonet_rules[pos] is None) or ( |
|
1325 | _phonet_rules[pos + mode] is None |
||
1326 | ): |
||
1327 | # no conversion rule available |
||
1328 | 1 | pos += 3 |
|
1329 | 1 | continue |
|
1330 | |||
1331 | # check whole string |
||
1332 | 1 | matches = 1 # number of matching letters |
|
1333 | 1 | priority = 5 # default priority |
|
1334 | 1 | rule = _phonet_rules[pos] |
|
1335 | 1 | rule = rule[1:] |
|
1336 | |||
1337 | 1 | while ( |
|
1338 | rule |
||
1339 | and (len(src) > (i + matches)) |
||
1340 | and (src[i + matches] == rule[0]) |
||
1341 | and not rule[0].isdigit() |
||
1342 | and (rule not in '(-<^$') |
||
1343 | ): |
||
1344 | 1 | matches += 1 |
|
1345 | 1 | rule = rule[1:] |
|
1346 | |||
1347 | 1 | if rule and (rule[0] == '('): |
|
1348 | # check an array of letters |
||
1349 | 1 | if ( |
|
1350 | (len(src) > (i + matches)) |
||
1351 | and src[i + matches].isalpha() |
||
1352 | and (src[i + matches] in rule[1:]) |
||
1353 | ): |
||
1354 | 1 | matches += 1 |
|
1355 | |||
1356 | 1 | while rule and rule[0] != ')': |
|
1357 | 1 | rule = rule[1:] |
|
1358 | |||
1359 | # if rule[0] == ')': |
||
1360 | 1 | rule = rule[1:] |
|
1361 | |||
1362 | 1 | if rule: |
|
1363 | 1 | priority0 = ord(rule[0]) |
|
1364 | else: |
||
1365 | 1 | priority0 = 0 |
|
1366 | |||
1367 | 1 | matches0 = matches |
|
1368 | |||
1369 | 1 | while rule and rule[0] == '-' and matches > 1: |
|
1370 | 1 | matches -= 1 |
|
1371 | 1 | rule = rule[1:] |
|
1372 | |||
1373 | 1 | if rule and rule[0] == '<': |
|
1374 | 1 | rule = rule[1:] |
|
1375 | |||
1376 | 1 | if rule and rule[0].isdigit(): |
|
1377 | # read priority |
||
1378 | 1 | priority = int(rule[0]) |
|
1379 | 1 | rule = rule[1:] |
|
1380 | |||
1381 | 1 | if rule and rule[0:2] == '^^': |
|
1382 | 1 | rule = rule[1:] |
|
1383 | |||
1384 | 1 | if ( |
|
1385 | not rule |
||
1386 | or ( |
||
1387 | (rule[0] == '^') |
||
1388 | and ((i == 0) or not src[i - 1].isalpha()) |
||
1389 | and ( |
||
1390 | (rule[1:2] != '$') |
||
1391 | or ( |
||
1392 | not ( |
||
1393 | src[ |
||
1394 | i + matches0 : i + matches0 + 1 |
||
1395 | ].isalpha() |
||
1396 | ) |
||
1397 | and ( |
||
1398 | src[ |
||
1399 | i + matches0 : i + matches0 + 1 |
||
1400 | ] |
||
1401 | != '.' |
||
1402 | ) |
||
1403 | ) |
||
1404 | ) |
||
1405 | ) |
||
1406 | or ( |
||
1407 | (rule[0] == '$') |
||
1408 | and (i > 0) |
||
1409 | and src[i - 1].isalpha() |
||
1410 | and ( |
||
1411 | ( |
||
1412 | not src[ |
||
1413 | i + matches0 : i + matches0 + 1 |
||
1414 | ].isalpha() |
||
1415 | ) |
||
1416 | and ( |
||
1417 | src[i + matches0 : i + matches0 + 1] |
||
1418 | != '.' |
||
1419 | ) |
||
1420 | ) |
||
1421 | ) |
||
1422 | ): |
||
1423 | # look for continuation, if: |
||
1424 | # matches > 1 und NO '-' in first string */ |
||
1425 | 1 | pos0 = -1 |
|
1426 | |||
1427 | 1 | start3 = 0 |
|
1428 | 1 | start4 = 0 |
|
1429 | 1 | end3 = 0 |
|
1430 | 1 | end4 = 0 |
|
1431 | |||
1432 | 1 | if ( |
|
1433 | (matches > 1) |
||
1434 | and src[i + matches : i + matches + 1] |
||
1435 | and (priority0 != ord('-')) |
||
1436 | ): |
||
1437 | 1 | char0 = src[i + matches - 1] |
|
1438 | 1 | pos0 = alpha_pos[char0] |
|
1439 | |||
1440 | 1 | if pos0 >= 2 and src[i + matches]: |
|
1441 | 1 | xpos = pos0 - 2 |
|
1442 | 1 | pos0 = alpha_pos[src[i + matches]] |
|
1443 | 1 | start3 = phonet_hash_1[xpos, pos0] |
|
1444 | 1 | start4 = phonet_hash_1[xpos, 0] |
|
1445 | 1 | end3 = phonet_hash_2[xpos, pos0] |
|
1446 | 1 | end4 = phonet_hash_2[xpos, 0] |
|
1447 | |||
1448 | # preserve rule priorities |
||
1449 | 1 | if (start4 >= 0) and ( |
|
1450 | (start3 < 0) or (start4 < start3) |
||
1451 | ): |
||
1452 | 1 | pos0 = start3 |
|
1453 | 1 | start3 = start4 |
|
1454 | 1 | start4 = pos0 |
|
1455 | 1 | pos0 = end3 |
|
1456 | 1 | end3 = end4 |
|
1457 | 1 | end4 = pos0 |
|
1458 | |||
1459 | 1 | if (end3 >= start4) and (start4 >= 0): |
|
1460 | 1 | if end4 > end3: |
|
1461 | 1 | end3 = end4 |
|
1462 | |||
1463 | 1 | start4 = -1 |
|
1464 | 1 | end4 = -1 |
|
1465 | else: |
||
1466 | 1 | pos0 = phonet_hash[char0] |
|
1467 | 1 | start3 = pos0 |
|
1468 | 1 | end3 = 10000 |
|
1469 | 1 | start4 = -1 |
|
1470 | 1 | end4 = -1 |
|
1471 | |||
1472 | 1 | pos0 = start3 |
|
1473 | |||
1474 | # check continuation rules for src[i+matches] |
||
1475 | 1 | if pos0 >= 0: |
|
1476 | 1 | while (_phonet_rules[pos0] is None) or ( |
|
1477 | _phonet_rules[pos0][0] == char0 |
||
1478 | ): |
||
1479 | 1 | if pos0 > end3: |
|
1480 | 1 | if start4 > 0: |
|
1481 | 1 | pos0 = start4 |
|
1482 | 1 | start3 = start4 |
|
1483 | 1 | start4 = -1 |
|
1484 | 1 | end3 = end4 |
|
1485 | 1 | end4 = -1 |
|
1486 | 1 | continue |
|
1487 | |||
1488 | 1 | priority0 = -1 |
|
1489 | |||
1490 | # important |
||
1491 | 1 | break |
|
1492 | |||
1493 | 1 | if (_phonet_rules[pos0] is None) or ( |
|
1494 | _phonet_rules[pos0 + mode] is None |
||
1495 | ): |
||
1496 | # no conversion rule available |
||
1497 | 1 | pos0 += 3 |
|
1498 | 1 | continue |
|
1499 | |||
1500 | # check whole string |
||
1501 | 1 | matches0 = matches |
|
1502 | 1 | priority0 = 5 |
|
1503 | 1 | rule = _phonet_rules[pos0] |
|
1504 | 1 | rule = rule[1:] |
|
1505 | |||
1506 | 1 | while ( |
|
1507 | rule |
||
1508 | and ( |
||
1509 | src[ |
||
1510 | i + matches0 : i + matches0 + 1 |
||
1511 | ] |
||
1512 | == rule[0] |
||
1513 | ) |
||
1514 | and ( |
||
1515 | not rule[0].isdigit() |
||
1516 | or (rule in '(-<^$') |
||
1517 | ) |
||
1518 | ): |
||
1519 | 1 | matches0 += 1 |
|
1520 | 1 | rule = rule[1:] |
|
1521 | |||
1522 | 1 | if rule and rule[0] == '(': |
|
1523 | # check an array of letters |
||
1524 | 1 | if src[ |
|
1525 | i + matches0 : i + matches0 + 1 |
||
1526 | ].isalpha() and ( |
||
1527 | src[i + matches0] in rule[1:] |
||
1528 | ): |
||
1529 | 1 | matches0 += 1 |
|
1530 | |||
1531 | 1 | while rule and rule[0] != ')': |
|
1532 | 1 | rule = rule[1:] |
|
1533 | |||
1534 | # if rule[0] == ')': |
||
1535 | 1 | rule = rule[1:] |
|
1536 | |||
1537 | 1 | while rule and rule[0] == '-': |
|
1538 | # "matches0" is NOT decremented |
||
1539 | # because of |
||
1540 | # "if (matches0 == matches)" |
||
1541 | 1 | rule = rule[1:] |
|
1542 | |||
1543 | 1 | if rule and rule[0] == '<': |
|
1544 | 1 | rule = rule[1:] |
|
1545 | |||
1546 | 1 | if rule and rule[0].isdigit(): |
|
1547 | 1 | priority0 = int(rule[0]) |
|
1548 | 1 | rule = rule[1:] |
|
1549 | |||
1550 | 1 | if ( |
|
1551 | not rule |
||
1552 | or |
||
1553 | # rule == '^' is not possible here |
||
1554 | ( |
||
1555 | (rule[0] == '$') |
||
1556 | and not src[ |
||
1557 | i + matches0 : i + matches0 + 1 |
||
1558 | ].isalpha() |
||
1559 | and ( |
||
1560 | src[ |
||
1561 | i |
||
1562 | + matches0 : i |
||
1563 | + matches0 |
||
1564 | + 1 |
||
1565 | ] |
||
1566 | != '.' |
||
1567 | ) |
||
1568 | ) |
||
1569 | ): |
||
1570 | 1 | if matches0 == matches: |
|
1571 | # this is only a partial string |
||
1572 | 1 | pos0 += 3 |
|
1573 | 1 | continue |
|
1574 | |||
1575 | 1 | if priority0 < priority: |
|
1576 | # priority is too low |
||
1577 | 1 | pos0 += 3 |
|
1578 | 1 | continue |
|
1579 | |||
1580 | # continuation rule found |
||
1581 | 1 | break |
|
1582 | |||
1583 | 1 | pos0 += 3 |
|
1584 | |||
1585 | # end of "while" |
||
1586 | 1 | if (priority0 >= priority) and ( |
|
1587 | (_phonet_rules[pos0] is not None) |
||
1588 | and (_phonet_rules[pos0][0] == char0) |
||
1589 | ): |
||
1590 | |||
1591 | 1 | pos += 3 |
|
1592 | 1 | continue |
|
1593 | |||
1594 | # replace string |
||
1595 | 1 | if _phonet_rules[pos] and ( |
|
1596 | '<' in _phonet_rules[pos][1:] |
||
1597 | ): |
||
1598 | 1 | priority0 = 1 |
|
1599 | else: |
||
1600 | 1 | priority0 = 0 |
|
1601 | |||
1602 | 1 | rule = _phonet_rules[pos + mode] |
|
1603 | |||
1604 | 1 | if (priority0 == 1) and (zeta == 0): |
|
1605 | # rule with '<' is applied |
||
1606 | 1 | if ( |
|
1607 | (j > 0) |
||
1608 | and rule |
||
1609 | and ( |
||
1610 | (dest[j - 1] == char) |
||
1611 | or (dest[j - 1] == rule[0]) |
||
1612 | ) |
||
1613 | ): |
||
1614 | 1 | j -= 1 |
|
1615 | |||
1616 | 1 | zeta0 = 1 |
|
1617 | 1 | zeta += 1 |
|
1618 | 1 | matches0 = 0 |
|
1619 | |||
1620 | 1 | while rule and src[i + matches0]: |
|
1621 | 1 | src = ( |
|
1622 | src[0 : i + matches0] |
||
1623 | + rule[0] |
||
1624 | + src[i + matches0 + 1 :] |
||
1625 | ) |
||
1626 | 1 | matches0 += 1 |
|
1627 | 1 | rule = rule[1:] |
|
1628 | |||
1629 | 1 | if matches0 < matches: |
|
1630 | 1 | src = ( |
|
1631 | src[0 : i + matches0] |
||
1632 | + src[i + matches :] |
||
1633 | ) |
||
1634 | |||
1635 | 1 | char = src[i] |
|
1636 | else: |
||
1637 | 1 | i = i + matches - 1 |
|
1638 | 1 | zeta = 0 |
|
1639 | |||
1640 | 1 | while len(rule) > 1: |
|
1641 | 1 | if (j == 0) or (dest[j - 1] != rule[0]): |
|
1642 | 1 | dest = ( |
|
1643 | dest[0:j] |
||
1644 | + rule[0] |
||
1645 | + dest[min(len(dest), j + 1) :] |
||
1646 | ) |
||
1647 | 1 | j += 1 |
|
1648 | |||
1649 | 1 | rule = rule[1:] |
|
1650 | |||
1651 | # new "current char" |
||
1652 | 1 | if not rule: |
|
1653 | 1 | rule = '' |
|
1654 | 1 | char = '' |
|
1655 | else: |
||
1656 | 1 | char = rule[0] |
|
1657 | |||
1658 | 1 | if ( |
|
1659 | _phonet_rules[pos] |
||
1660 | and '^^' in _phonet_rules[pos][1:] |
||
1661 | ): |
||
1662 | 1 | if char: |
|
1663 | 1 | dest = ( |
|
1664 | dest[0:j] |
||
1665 | + char |
||
1666 | + dest[min(len(dest), j + 1) :] |
||
1667 | ) |
||
1668 | 1 | j += 1 |
|
1669 | |||
1670 | 1 | src = src[i + 1 :] |
|
1671 | 1 | i = 0 |
|
1672 | 1 | zeta0 = 1 |
|
1673 | |||
1674 | 1 | break |
|
1675 | |||
1676 | 1 | pos += 3 |
|
1677 | |||
1678 | 1 | if pos > end1 and start2 > 0: |
|
1679 | 1 | pos = start2 |
|
1680 | 1 | start1 = start2 |
|
1681 | 1 | end1 = end2 |
|
1682 | 1 | start2 = -1 |
|
1683 | 1 | end2 = -1 |
|
1684 | |||
1685 | 1 | if zeta0 == 0: |
|
1686 | 1 | if char and ((j == 0) or (dest[j - 1] != char)): |
|
1687 | # delete multiple letters only |
||
1688 | 1 | dest = ( |
|
1689 | dest[0:j] + char + dest[min(j + 1, term_length) :] |
||
1690 | ) |
||
1691 | 1 | j += 1 |
|
1692 | |||
1693 | 1 | i += 1 |
|
1694 | 1 | zeta = 0 |
|
1695 | |||
1696 | 1 | dest = dest[0:j] |
|
1697 | |||
1698 | 1 | return dest |
|
1699 | |||
1700 | 1 | _initialize_phonet(lang) |
|
1701 | |||
1702 | 1 | word = unicode_normalize('NFKC', text_type(word)) |
|
1703 | 1 | return _phonet(word, mode, lang) |
|
1704 | |||
1755 |