Conditions | 142 |
Total Lines | 633 |
Code Lines | 381 |
Lines | 0 |
Ratio | 0 % |
Tests | 269 |
CRAP Score | 142 |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like abydos.phonetic._Phonet.Phonet.encode() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | # -*- coding: utf-8 -*- |
||
1075 | 1 | def encode(self, word, mode=1, lang='de'): |
|
1076 | """Return the phonet code for a word. |
||
1077 | |||
1078 | Args: |
||
1079 | word (str): The word to transform |
||
1080 | mode (int): The ponet variant to employ (1 or 2) |
||
1081 | lang (str): 'de' (default) for German, 'none' for no language |
||
1082 | |||
1083 | Returns: |
||
1084 | str: The phonet value |
||
1085 | |||
1086 | Examples: |
||
1087 | >>> pe = Phonet() |
||
1088 | >>> pe.encode('Christopher') |
||
1089 | 'KRISTOFA' |
||
1090 | >>> pe.encode('Niall') |
||
1091 | 'NIAL' |
||
1092 | >>> pe.encode('Smith') |
||
1093 | 'SMIT' |
||
1094 | >>> pe.encode('Schmidt') |
||
1095 | 'SHMIT' |
||
1096 | |||
1097 | >>> pe.encode('Christopher', mode=2) |
||
1098 | 'KRIZTUFA' |
||
1099 | >>> pe.encode('Niall', mode=2) |
||
1100 | 'NIAL' |
||
1101 | >>> pe.encode('Smith', mode=2) |
||
1102 | 'ZNIT' |
||
1103 | >>> pe.encode('Schmidt', mode=2) |
||
1104 | 'ZNIT' |
||
1105 | |||
1106 | >>> pe.encode('Christopher', lang='none') |
||
1107 | 'CHRISTOPHER' |
||
1108 | >>> pe.encode('Niall', lang='none') |
||
1109 | 'NIAL' |
||
1110 | >>> pe.encode('Smith', lang='none') |
||
1111 | 'SMITH' |
||
1112 | >>> pe.encode('Schmidt', lang='none') |
||
1113 | 'SCHMIDT' |
||
1114 | |||
1115 | """ |
||
1116 | 1 | phonet_hash = Counter() |
|
1117 | 1 | alpha_pos = Counter() |
|
1118 | |||
1119 | 1 | phonet_hash_1 = Counter() |
|
1120 | 1 | phonet_hash_2 = Counter() |
|
1121 | |||
1122 | 1 | def _initialize_phonet(lang): |
|
1123 | """Initialize phonet variables. |
||
1124 | |||
1125 | Args: |
||
1126 | lang (str): Language to use for rules |
||
1127 | |||
1128 | """ |
||
1129 | 1 | if lang == 'none': |
|
1130 | 1 | _phonet_rules = self._rules_no_lang |
|
1131 | else: |
||
1132 | 1 | _phonet_rules = self._rules_german |
|
1133 | |||
1134 | 1 | phonet_hash[''] = -1 |
|
1135 | |||
1136 | # German and international umlauts |
||
1137 | 1 | for j in { |
|
1138 | 'À', |
||
1139 | 'Á', |
||
1140 | 'Â', |
||
1141 | 'Ã', |
||
1142 | 'Ä', |
||
1143 | 'Å', |
||
1144 | 'Æ', |
||
1145 | 'Ç', |
||
1146 | 'È', |
||
1147 | 'É', |
||
1148 | 'Ê', |
||
1149 | 'Ë', |
||
1150 | 'Ì', |
||
1151 | 'Í', |
||
1152 | 'Î', |
||
1153 | 'Ï', |
||
1154 | 'Ð', |
||
1155 | 'Ñ', |
||
1156 | 'Ò', |
||
1157 | 'Ó', |
||
1158 | 'Ô', |
||
1159 | 'Õ', |
||
1160 | 'Ö', |
||
1161 | 'Ø', |
||
1162 | 'Ù', |
||
1163 | 'Ú', |
||
1164 | 'Û', |
||
1165 | 'Ü', |
||
1166 | 'Ý', |
||
1167 | 'Þ', |
||
1168 | 'ß', |
||
1169 | 'Œ', |
||
1170 | 'Š', |
||
1171 | 'Ÿ', |
||
1172 | }: |
||
1173 | 1 | alpha_pos[j] = 1 |
|
1174 | 1 | phonet_hash[j] = -1 |
|
1175 | |||
1176 | # "normal" letters ('A'-'Z') |
||
1177 | 1 | for i, j in enumerate('ABCDEFGHIJKLMNOPQRSTUVWXYZ'): |
|
1178 | 1 | alpha_pos[j] = i + 2 |
|
1179 | 1 | phonet_hash[j] = -1 |
|
1180 | |||
1181 | 1 | for i in range(26): |
|
1182 | 1 | for j in range(28): |
|
1183 | 1 | phonet_hash_1[i, j] = -1 |
|
1184 | 1 | phonet_hash_2[i, j] = -1 |
|
1185 | |||
1186 | # for each phonetc rule |
||
1187 | 1 | for i in range(len(_phonet_rules)): |
|
1188 | 1 | rule = _phonet_rules[i] |
|
1189 | |||
1190 | 1 | if rule and i % 3 == 0: |
|
1191 | # calculate first hash value |
||
1192 | 1 | k = _phonet_rules[i][0] |
|
1193 | |||
1194 | 1 | if phonet_hash[k] < 0 and ( |
|
1195 | _phonet_rules[i + 1] or _phonet_rules[i + 2] |
||
1196 | ): |
||
1197 | 1 | phonet_hash[k] = i |
|
1198 | |||
1199 | # calculate second hash values |
||
1200 | 1 | if k and alpha_pos[k] >= 2: |
|
1201 | 1 | k = alpha_pos[k] |
|
1202 | |||
1203 | 1 | j = k - 2 |
|
1204 | 1 | rule = rule[1:] |
|
1205 | |||
1206 | 1 | if not rule: |
|
1207 | 1 | rule = ' ' |
|
1208 | 1 | elif rule[0] == '(': |
|
1209 | 1 | rule = rule[1:] |
|
1210 | else: |
||
1211 | 1 | rule = rule[0] |
|
1212 | |||
1213 | 1 | while rule and (rule[0] != ')'): |
|
1214 | 1 | k = alpha_pos[rule[0]] |
|
1215 | |||
1216 | 1 | if k > 0: |
|
1217 | # add hash value for this letter |
||
1218 | 1 | if phonet_hash_1[j, k] < 0: |
|
1219 | 1 | phonet_hash_1[j, k] = i |
|
1220 | 1 | phonet_hash_2[j, k] = i |
|
1221 | |||
1222 | 1 | if phonet_hash_2[j, k] >= (i - 30): |
|
1223 | 1 | phonet_hash_2[j, k] = i |
|
1224 | else: |
||
1225 | 1 | k = -1 |
|
1226 | |||
1227 | 1 | if k <= 0: |
|
1228 | # add hash value for all letters |
||
1229 | 1 | if phonet_hash_1[j, 0] < 0: |
|
1230 | 1 | phonet_hash_1[j, 0] = i |
|
1231 | |||
1232 | 1 | phonet_hash_2[j, 0] = i |
|
1233 | |||
1234 | 1 | rule = rule[1:] |
|
1235 | |||
1236 | 1 | def _phonet(term, mode, lang): |
|
1237 | """Return the phonet coded form of a term. |
||
1238 | |||
1239 | Args: |
||
1240 | term (str): Term to transform |
||
1241 | mode (int): The ponet variant to employ (1 or 2) |
||
1242 | lang (str): 'de' (default) for German, 'none' for no language |
||
1243 | |||
1244 | Returns: |
||
1245 | str: The phonet value |
||
1246 | |||
1247 | """ |
||
1248 | 1 | if lang == 'none': |
|
1249 | 1 | _phonet_rules = self._rules_no_lang |
|
1250 | else: |
||
1251 | 1 | _phonet_rules = self._rules_german |
|
1252 | |||
1253 | 1 | char0 = '' |
|
1254 | 1 | dest = term |
|
1255 | |||
1256 | 1 | if not term: |
|
1257 | 1 | return '' |
|
1258 | |||
1259 | 1 | term_length = len(term) |
|
1260 | |||
1261 | # convert input string to upper-case |
||
1262 | 1 | src = term.translate(self._upper_trans) |
|
1263 | |||
1264 | # check "src" |
||
1265 | 1 | i = 0 |
|
1266 | 1 | j = 0 |
|
1267 | 1 | zeta = 0 |
|
1268 | |||
1269 | 1 | while i < len(src): |
|
1270 | 1 | char = src[i] |
|
1271 | |||
1272 | 1 | pos = alpha_pos[char] |
|
1273 | |||
1274 | 1 | if pos >= 2: |
|
1275 | 1 | xpos = pos - 2 |
|
1276 | |||
1277 | 1 | if i + 1 == len(src): |
|
1278 | 1 | pos = alpha_pos[''] |
|
1279 | else: |
||
1280 | 1 | pos = alpha_pos[src[i + 1]] |
|
1281 | |||
1282 | 1 | start1 = phonet_hash_1[xpos, pos] |
|
1283 | 1 | start2 = phonet_hash_1[xpos, 0] |
|
1284 | 1 | end1 = phonet_hash_2[xpos, pos] |
|
1285 | 1 | end2 = phonet_hash_2[xpos, 0] |
|
1286 | |||
1287 | # preserve rule priorities |
||
1288 | 1 | if (start2 >= 0) and ((start1 < 0) or (start2 < start1)): |
|
1289 | 1 | pos = start1 |
|
1290 | 1 | start1 = start2 |
|
1291 | 1 | start2 = pos |
|
1292 | 1 | pos = end1 |
|
1293 | 1 | end1 = end2 |
|
1294 | 1 | end2 = pos |
|
1295 | |||
1296 | 1 | if (end1 >= start2) and (start2 >= 0): |
|
1297 | 1 | if end2 > end1: |
|
1298 | 1 | end1 = end2 |
|
1299 | |||
1300 | 1 | start2 = -1 |
|
1301 | 1 | end2 = -1 |
|
1302 | else: |
||
1303 | 1 | pos = phonet_hash[char] |
|
1304 | 1 | start1 = pos |
|
1305 | 1 | end1 = 10000 |
|
1306 | 1 | start2 = -1 |
|
1307 | 1 | end2 = -1 |
|
1308 | |||
1309 | 1 | pos = start1 |
|
1310 | 1 | zeta0 = 0 |
|
1311 | |||
1312 | 1 | if pos >= 0: |
|
1313 | # check rules for this char |
||
1314 | 1 | while (_phonet_rules[pos] is None) or ( |
|
1315 | _phonet_rules[pos][0] == char |
||
1316 | ): |
||
1317 | 1 | if pos > end1: |
|
1318 | 1 | if start2 > 0: |
|
1319 | 1 | pos = start2 |
|
1320 | 1 | start1 = start2 |
|
1321 | 1 | start2 = -1 |
|
1322 | 1 | end1 = end2 |
|
1323 | 1 | end2 = -1 |
|
1324 | 1 | continue |
|
1325 | |||
1326 | 1 | break |
|
1327 | |||
1328 | 1 | if (_phonet_rules[pos] is None) or ( |
|
1329 | _phonet_rules[pos + mode] is None |
||
1330 | ): |
||
1331 | # no conversion rule available |
||
1332 | 1 | pos += 3 |
|
1333 | 1 | continue |
|
1334 | |||
1335 | # check whole string |
||
1336 | 1 | matches = 1 # number of matching letters |
|
1337 | 1 | priority = 5 # default priority |
|
1338 | 1 | rule = _phonet_rules[pos] |
|
1339 | 1 | rule = rule[1:] |
|
1340 | |||
1341 | 1 | while ( |
|
1342 | rule |
||
1343 | and (len(src) > (i + matches)) |
||
1344 | and (src[i + matches] == rule[0]) |
||
1345 | and not rule[0].isdigit() |
||
1346 | and (rule not in '(-<^$') |
||
1347 | ): |
||
1348 | 1 | matches += 1 |
|
1349 | 1 | rule = rule[1:] |
|
1350 | |||
1351 | 1 | if rule and (rule[0] == '('): |
|
1352 | # check an array of letters |
||
1353 | 1 | if ( |
|
1354 | (len(src) > (i + matches)) |
||
1355 | and src[i + matches].isalpha() |
||
1356 | and (src[i + matches] in rule[1:]) |
||
1357 | ): |
||
1358 | 1 | matches += 1 |
|
1359 | |||
1360 | 1 | while rule and rule[0] != ')': |
|
1361 | 1 | rule = rule[1:] |
|
1362 | |||
1363 | # if rule[0] == ')': |
||
1364 | 1 | rule = rule[1:] |
|
1365 | |||
1366 | 1 | if rule: |
|
1367 | 1 | priority0 = ord(rule[0]) |
|
1368 | else: |
||
1369 | 1 | priority0 = 0 |
|
1370 | |||
1371 | 1 | matches0 = matches |
|
1372 | |||
1373 | 1 | while rule and rule[0] == '-' and matches > 1: |
|
1374 | 1 | matches -= 1 |
|
1375 | 1 | rule = rule[1:] |
|
1376 | |||
1377 | 1 | if rule and rule[0] == '<': |
|
1378 | 1 | rule = rule[1:] |
|
1379 | |||
1380 | 1 | if rule and rule[0].isdigit(): |
|
1381 | # read priority |
||
1382 | 1 | priority = int(rule[0]) |
|
1383 | 1 | rule = rule[1:] |
|
1384 | |||
1385 | 1 | if rule and rule[0:2] == '^^': |
|
1386 | 1 | rule = rule[1:] |
|
1387 | |||
1388 | 1 | if ( |
|
1389 | not rule |
||
1390 | or ( |
||
1391 | (rule[0] == '^') |
||
1392 | and ((i == 0) or not src[i - 1].isalpha()) |
||
1393 | and ( |
||
1394 | (rule[1:2] != '$') |
||
1395 | or ( |
||
1396 | not ( |
||
1397 | src[ |
||
1398 | i + matches0 : i + matches0 + 1 |
||
1399 | ].isalpha() |
||
1400 | ) |
||
1401 | and ( |
||
1402 | src[ |
||
1403 | i + matches0 : i + matches0 + 1 |
||
1404 | ] |
||
1405 | != '.' |
||
1406 | ) |
||
1407 | ) |
||
1408 | ) |
||
1409 | ) |
||
1410 | or ( |
||
1411 | (rule[0] == '$') |
||
1412 | and (i > 0) |
||
1413 | and src[i - 1].isalpha() |
||
1414 | and ( |
||
1415 | ( |
||
1416 | not src[ |
||
1417 | i + matches0 : i + matches0 + 1 |
||
1418 | ].isalpha() |
||
1419 | ) |
||
1420 | and ( |
||
1421 | src[i + matches0 : i + matches0 + 1] |
||
1422 | != '.' |
||
1423 | ) |
||
1424 | ) |
||
1425 | ) |
||
1426 | ): |
||
1427 | # look for continuation, if: |
||
1428 | # matches > 1 und NO '-' in first string */ |
||
1429 | 1 | pos0 = -1 |
|
1430 | |||
1431 | 1 | start3 = 0 |
|
1432 | 1 | start4 = 0 |
|
1433 | 1 | end3 = 0 |
|
1434 | 1 | end4 = 0 |
|
1435 | |||
1436 | 1 | if ( |
|
1437 | (matches > 1) |
||
1438 | and src[i + matches : i + matches + 1] |
||
1439 | and (priority0 != ord('-')) |
||
1440 | ): |
||
1441 | 1 | char0 = src[i + matches - 1] |
|
1442 | 1 | pos0 = alpha_pos[char0] |
|
1443 | |||
1444 | 1 | if pos0 >= 2 and src[i + matches]: |
|
1445 | 1 | xpos = pos0 - 2 |
|
1446 | 1 | pos0 = alpha_pos[src[i + matches]] |
|
1447 | 1 | start3 = phonet_hash_1[xpos, pos0] |
|
1448 | 1 | start4 = phonet_hash_1[xpos, 0] |
|
1449 | 1 | end3 = phonet_hash_2[xpos, pos0] |
|
1450 | 1 | end4 = phonet_hash_2[xpos, 0] |
|
1451 | |||
1452 | # preserve rule priorities |
||
1453 | 1 | if (start4 >= 0) and ( |
|
1454 | (start3 < 0) or (start4 < start3) |
||
1455 | ): |
||
1456 | 1 | pos0 = start3 |
|
1457 | 1 | start3 = start4 |
|
1458 | 1 | start4 = pos0 |
|
1459 | 1 | pos0 = end3 |
|
1460 | 1 | end3 = end4 |
|
1461 | 1 | end4 = pos0 |
|
1462 | |||
1463 | 1 | if (end3 >= start4) and (start4 >= 0): |
|
1464 | 1 | if end4 > end3: |
|
1465 | 1 | end3 = end4 |
|
1466 | |||
1467 | 1 | start4 = -1 |
|
1468 | 1 | end4 = -1 |
|
1469 | else: |
||
1470 | 1 | pos0 = phonet_hash[char0] |
|
1471 | 1 | start3 = pos0 |
|
1472 | 1 | end3 = 10000 |
|
1473 | 1 | start4 = -1 |
|
1474 | 1 | end4 = -1 |
|
1475 | |||
1476 | 1 | pos0 = start3 |
|
1477 | |||
1478 | # check continuation rules for src[i+matches] |
||
1479 | 1 | if pos0 >= 0: |
|
1480 | 1 | while (_phonet_rules[pos0] is None) or ( |
|
1481 | _phonet_rules[pos0][0] == char0 |
||
1482 | ): |
||
1483 | 1 | if pos0 > end3: |
|
1484 | 1 | if start4 > 0: |
|
1485 | 1 | pos0 = start4 |
|
1486 | 1 | start3 = start4 |
|
1487 | 1 | start4 = -1 |
|
1488 | 1 | end3 = end4 |
|
1489 | 1 | end4 = -1 |
|
1490 | 1 | continue |
|
1491 | |||
1492 | 1 | priority0 = -1 |
|
1493 | |||
1494 | # important |
||
1495 | 1 | break |
|
1496 | |||
1497 | 1 | if (_phonet_rules[pos0] is None) or ( |
|
1498 | _phonet_rules[pos0 + mode] is None |
||
1499 | ): |
||
1500 | # no conversion rule available |
||
1501 | 1 | pos0 += 3 |
|
1502 | 1 | continue |
|
1503 | |||
1504 | # check whole string |
||
1505 | 1 | matches0 = matches |
|
1506 | 1 | priority0 = 5 |
|
1507 | 1 | rule = _phonet_rules[pos0] |
|
1508 | 1 | rule = rule[1:] |
|
1509 | |||
1510 | 1 | while ( |
|
1511 | rule |
||
1512 | and ( |
||
1513 | src[ |
||
1514 | i + matches0 : i + matches0 + 1 |
||
1515 | ] |
||
1516 | == rule[0] |
||
1517 | ) |
||
1518 | and ( |
||
1519 | not rule[0].isdigit() |
||
1520 | or (rule in '(-<^$') |
||
1521 | ) |
||
1522 | ): |
||
1523 | 1 | matches0 += 1 |
|
1524 | 1 | rule = rule[1:] |
|
1525 | |||
1526 | 1 | if rule and rule[0] == '(': |
|
1527 | # check an array of letters |
||
1528 | 1 | if src[ |
|
1529 | i + matches0 : i + matches0 + 1 |
||
1530 | ].isalpha() and ( |
||
1531 | src[i + matches0] in rule[1:] |
||
1532 | ): |
||
1533 | 1 | matches0 += 1 |
|
1534 | |||
1535 | 1 | while rule and rule[0] != ')': |
|
1536 | 1 | rule = rule[1:] |
|
1537 | |||
1538 | # if rule[0] == ')': |
||
1539 | 1 | rule = rule[1:] |
|
1540 | |||
1541 | 1 | while rule and rule[0] == '-': |
|
1542 | # "matches0" is NOT decremented |
||
1543 | # because of |
||
1544 | # "if (matches0 == matches)" |
||
1545 | 1 | rule = rule[1:] |
|
1546 | |||
1547 | 1 | if rule and rule[0] == '<': |
|
1548 | 1 | rule = rule[1:] |
|
1549 | |||
1550 | 1 | if rule and rule[0].isdigit(): |
|
1551 | 1 | priority0 = int(rule[0]) |
|
1552 | 1 | rule = rule[1:] |
|
1553 | |||
1554 | 1 | if ( |
|
1555 | not rule |
||
1556 | or |
||
1557 | # rule == '^' is not possible here |
||
1558 | ( |
||
1559 | (rule[0] == '$') |
||
1560 | and not src[ |
||
1561 | i + matches0 : i + matches0 + 1 |
||
1562 | ].isalpha() |
||
1563 | and ( |
||
1564 | src[ |
||
1565 | i |
||
1566 | + matches0 : i |
||
1567 | + matches0 |
||
1568 | + 1 |
||
1569 | ] |
||
1570 | != '.' |
||
1571 | ) |
||
1572 | ) |
||
1573 | ): |
||
1574 | 1 | if matches0 == matches: |
|
1575 | # this is only a partial string |
||
1576 | 1 | pos0 += 3 |
|
1577 | 1 | continue |
|
1578 | |||
1579 | 1 | if priority0 < priority: |
|
1580 | # priority is too low |
||
1581 | 1 | pos0 += 3 |
|
1582 | 1 | continue |
|
1583 | |||
1584 | # continuation rule found |
||
1585 | 1 | break |
|
1586 | |||
1587 | 1 | pos0 += 3 |
|
1588 | |||
1589 | # end of "while" |
||
1590 | 1 | if (priority0 >= priority) and ( |
|
1591 | (_phonet_rules[pos0] is not None) |
||
1592 | and (_phonet_rules[pos0][0] == char0) |
||
1593 | ): |
||
1594 | |||
1595 | 1 | pos += 3 |
|
1596 | 1 | continue |
|
1597 | |||
1598 | # replace string |
||
1599 | 1 | if _phonet_rules[pos] and ( |
|
1600 | '<' in _phonet_rules[pos][1:] |
||
1601 | ): |
||
1602 | 1 | priority0 = 1 |
|
1603 | else: |
||
1604 | 1 | priority0 = 0 |
|
1605 | |||
1606 | 1 | rule = _phonet_rules[pos + mode] |
|
1607 | |||
1608 | 1 | if (priority0 == 1) and (zeta == 0): |
|
1609 | # rule with '<' is applied |
||
1610 | 1 | if ( |
|
1611 | (j > 0) |
||
1612 | and rule |
||
1613 | and ( |
||
1614 | (dest[j - 1] == char) |
||
1615 | or (dest[j - 1] == rule[0]) |
||
1616 | ) |
||
1617 | ): |
||
1618 | 1 | j -= 1 |
|
1619 | |||
1620 | 1 | zeta0 = 1 |
|
1621 | 1 | zeta += 1 |
|
1622 | 1 | matches0 = 0 |
|
1623 | |||
1624 | 1 | while rule and src[i + matches0]: |
|
1625 | 1 | src = ( |
|
1626 | src[0 : i + matches0] |
||
1627 | + rule[0] |
||
1628 | + src[i + matches0 + 1 :] |
||
1629 | ) |
||
1630 | 1 | matches0 += 1 |
|
1631 | 1 | rule = rule[1:] |
|
1632 | |||
1633 | 1 | if matches0 < matches: |
|
1634 | 1 | src = ( |
|
1635 | src[0 : i + matches0] |
||
1636 | + src[i + matches :] |
||
1637 | ) |
||
1638 | |||
1639 | 1 | char = src[i] |
|
1640 | else: |
||
1641 | 1 | i = i + matches - 1 |
|
1642 | 1 | zeta = 0 |
|
1643 | |||
1644 | 1 | while len(rule) > 1: |
|
1645 | 1 | if (j == 0) or (dest[j - 1] != rule[0]): |
|
1646 | 1 | dest = ( |
|
1647 | dest[0:j] |
||
1648 | + rule[0] |
||
1649 | + dest[min(len(dest), j + 1) :] |
||
1650 | ) |
||
1651 | 1 | j += 1 |
|
1652 | |||
1653 | 1 | rule = rule[1:] |
|
1654 | |||
1655 | # new "current char" |
||
1656 | 1 | if not rule: |
|
1657 | 1 | rule = '' |
|
1658 | 1 | char = '' |
|
1659 | else: |
||
1660 | 1 | char = rule[0] |
|
1661 | |||
1662 | 1 | if ( |
|
1663 | _phonet_rules[pos] |
||
1664 | and '^^' in _phonet_rules[pos][1:] |
||
1665 | ): |
||
1666 | 1 | if char: |
|
1667 | 1 | dest = ( |
|
1668 | dest[0:j] |
||
1669 | + char |
||
1670 | + dest[min(len(dest), j + 1) :] |
||
1671 | ) |
||
1672 | 1 | j += 1 |
|
1673 | |||
1674 | 1 | src = src[i + 1 :] |
|
1675 | 1 | i = 0 |
|
1676 | 1 | zeta0 = 1 |
|
1677 | |||
1678 | 1 | break |
|
1679 | |||
1680 | 1 | pos += 3 |
|
1681 | |||
1682 | 1 | if pos > end1 and start2 > 0: |
|
1683 | 1 | pos = start2 |
|
1684 | 1 | start1 = start2 |
|
1685 | 1 | end1 = end2 |
|
1686 | 1 | start2 = -1 |
|
1687 | 1 | end2 = -1 |
|
1688 | |||
1689 | 1 | if zeta0 == 0: |
|
1690 | 1 | if char and ((j == 0) or (dest[j - 1] != char)): |
|
1691 | # delete multiple letters only |
||
1692 | 1 | dest = ( |
|
1693 | dest[0:j] + char + dest[min(j + 1, term_length) :] |
||
1694 | ) |
||
1695 | 1 | j += 1 |
|
1696 | |||
1697 | 1 | i += 1 |
|
1698 | 1 | zeta = 0 |
|
1699 | |||
1700 | 1 | dest = dest[0:j] |
|
1701 | |||
1702 | 1 | return dest |
|
1703 | |||
1704 | 1 | _initialize_phonet(lang) |
|
1705 | |||
1706 | 1 | word = unicode_normalize('NFKC', text_type(word)) |
|
1707 | 1 | return _phonet(word, mode, lang) |
|
1708 | |||
1759 |