| Conditions | 32 |
| Total Lines | 211 |
| Code Lines | 96 |
| Lines | 0 |
| Ratio | 0 % |
| Tests | 76 |
| CRAP Score | 32 |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like abydos.phonetic._sfinx_bis.SfinxBis.encode() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | # Copyright 2014-2020 by Christopher C. Little. |
||
| 199 | def encode(self, word: str) -> str: |
||
| 200 | """Return the SfinxBis code for a word. |
||
| 201 | |||
| 202 | Parameters |
||
| 203 | ---------- |
||
| 204 | word : str |
||
| 205 | The word to transform |
||
| 206 | |||
| 207 | Returns |
||
| 208 | ------- |
||
| 209 | 1 | str |
|
| 210 | The SfinxBis value |
||
| 211 | |||
| 212 | Examples |
||
| 213 | 1 | -------- |
|
| 214 | >>> pe = SfinxBis() |
||
| 215 | >>> pe.encode('Christopher') |
||
| 216 | 'K68376' |
||
| 217 | >>> pe.encode('Niall') |
||
| 218 | 'N4' |
||
| 219 | >>> pe.encode('Smith') |
||
| 220 | 'S53' |
||
| 221 | >>> pe.encode('Schmidt') |
||
| 222 | 'S53' |
||
| 223 | |||
| 224 | >>> pe.encode('Johansson') |
||
| 225 | 'J585' |
||
| 226 | >>> pe.encode('Sjöberg') |
||
| 227 | '#162' |
||
| 228 | |||
| 229 | |||
| 230 | .. versionadded:: 0.1.0 |
||
| 231 | .. versionchanged:: 0.3.6 |
||
| 232 | Encapsulated in class |
||
| 233 | .. versionchanged:: 0.6.0 |
||
| 234 | Made return a str only (comma-separated) |
||
| 235 | |||
| 236 | """ |
||
| 237 | |||
| 238 | def _foersvensker(lokal_ordet: str) -> str: |
||
| 239 | """Return the Swedish-ized form of the word. |
||
| 240 | |||
| 241 | Parameters |
||
| 242 | ---------- |
||
| 243 | lokal_ordet : str |
||
| 244 | Word to transform |
||
| 245 | |||
| 246 | Returns |
||
| 247 | ------- |
||
| 248 | str |
||
| 249 | Transformed word |
||
| 250 | 1 | ||
| 251 | .. versionadded:: 0.1.0 |
||
| 252 | |||
| 253 | """ |
||
| 254 | lokal_ordet = lokal_ordet.replace('STIERN', 'STJÄRN') |
||
| 255 | lokal_ordet = lokal_ordet.replace('HIE', 'HJ') |
||
| 256 | lokal_ordet = lokal_ordet.replace('SIÖ', 'SJÖ') |
||
| 257 | lokal_ordet = lokal_ordet.replace('SCH', 'SH') |
||
| 258 | lokal_ordet = lokal_ordet.replace('QU', 'KV') |
||
| 259 | lokal_ordet = lokal_ordet.replace('IO', 'JO') |
||
| 260 | lokal_ordet = lokal_ordet.replace('PH', 'F') |
||
| 261 | |||
| 262 | for i in self._harde_vokaler: |
||
| 263 | lokal_ordet = lokal_ordet.replace(i + 'Ü', i + 'J') |
||
| 264 | lokal_ordet = lokal_ordet.replace(i + 'Y', i + 'J') |
||
| 265 | lokal_ordet = lokal_ordet.replace(i + 'I', i + 'J') |
||
| 266 | 1 | for i in self._mjuka_vokaler: |
|
| 267 | 1 | lokal_ordet = lokal_ordet.replace(i + 'Ü', i + 'J') |
|
| 268 | 1 | lokal_ordet = lokal_ordet.replace(i + 'Y', i + 'J') |
|
| 269 | 1 | lokal_ordet = lokal_ordet.replace(i + 'I', i + 'J') |
|
| 270 | 1 | ||
| 271 | 1 | if 'H' in lokal_ordet: |
|
| 272 | 1 | for i in self._uc_c_set: |
|
| 273 | lokal_ordet = lokal_ordet.replace('H' + i, i) |
||
| 274 | 1 | ||
| 275 | 1 | lokal_ordet = lokal_ordet.translate(self._substitutions) |
|
| 276 | 1 | ||
| 277 | 1 | lokal_ordet = lokal_ordet.replace('Ð', 'ETH') |
|
| 278 | 1 | lokal_ordet = lokal_ordet.replace('Þ', 'TH') |
|
| 279 | 1 | ||
| 280 | 1 | return lokal_ordet |
|
| 281 | 1 | ||
| 282 | def _koda_foersta_ljudet(lokal_ordet: str) -> str: |
||
| 283 | 1 | """Return the word with the first sound coded. |
|
| 284 | 1 | ||
| 285 | 1 | Parameters |
|
| 286 | ---------- |
||
| 287 | 1 | lokal_ordet : str |
|
| 288 | Word to transform |
||
| 289 | 1 | ||
| 290 | 1 | Returns |
|
| 291 | 1 | ------- |
|
| 292 | str |
||
| 293 | 1 | Transformed word |
|
| 294 | |||
| 295 | 1 | .. versionadded:: 0.1.0 |
|
| 296 | |||
| 297 | """ |
||
| 298 | if ( |
||
| 299 | lokal_ordet[0:1] in self._mjuka_vokaler |
||
| 300 | or lokal_ordet[0:1] in self._harde_vokaler |
||
| 301 | ): |
||
| 302 | lokal_ordet = '$' + lokal_ordet[1:] |
||
| 303 | elif lokal_ordet[0:2] in ('DJ', 'GJ', 'HJ', 'LJ'): |
||
| 304 | lokal_ordet = 'J' + lokal_ordet[2:] |
||
| 305 | elif ( |
||
| 306 | lokal_ordet[0:1] == 'G' |
||
| 307 | and lokal_ordet[1:2] in self._mjuka_vokaler |
||
| 308 | ): |
||
| 309 | lokal_ordet = 'J' + lokal_ordet[1:] |
||
| 310 | elif lokal_ordet[0:1] == 'Q': |
||
| 311 | 1 | lokal_ordet = 'K' + lokal_ordet[1:] |
|
| 312 | elif lokal_ordet[0:2] == 'CH' and lokal_ordet[2:3] in frozenset( |
||
| 313 | self._mjuka_vokaler | self._harde_vokaler |
||
| 314 | ): |
||
| 315 | 1 | lokal_ordet = '#' + lokal_ordet[2:] |
|
| 316 | 1 | elif ( |
|
| 317 | 1 | lokal_ordet[0:1] == 'C' |
|
| 318 | 1 | and lokal_ordet[1:2] in self._harde_vokaler |
|
| 319 | ): |
||
| 320 | lokal_ordet = 'K' + lokal_ordet[1:] |
||
| 321 | elif ( |
||
| 322 | 1 | lokal_ordet[0:1] == 'C' and lokal_ordet[1:2] in self._uc_c_set |
|
| 323 | 1 | ): |
|
| 324 | 1 | lokal_ordet = 'K' + lokal_ordet[1:] |
|
| 325 | 1 | elif lokal_ordet[0:1] == 'X': |
|
| 326 | lokal_ordet = 'S' + lokal_ordet[1:] |
||
| 327 | elif ( |
||
| 328 | 1 | lokal_ordet[0:1] == 'C' |
|
| 329 | 1 | and lokal_ordet[1:2] in self._mjuka_vokaler |
|
| 330 | ): |
||
| 331 | lokal_ordet = 'S' + lokal_ordet[1:] |
||
| 332 | elif lokal_ordet[0:3] in ('SKJ', 'STJ', 'SCH'): |
||
| 333 | 1 | lokal_ordet = '#' + lokal_ordet[3:] |
|
| 334 | 1 | elif lokal_ordet[0:2] in ('SH', 'KJ', 'TJ', 'SJ'): |
|
| 335 | lokal_ordet = '#' + lokal_ordet[2:] |
||
| 336 | elif ( |
||
| 337 | 1 | lokal_ordet[0:2] == 'SK' |
|
| 338 | 1 | and lokal_ordet[2:3] in self._mjuka_vokaler |
|
| 339 | 1 | ): |
|
| 340 | 1 | lokal_ordet = '#' + lokal_ordet[2:] |
|
| 341 | elif ( |
||
| 342 | lokal_ordet[0:1] == 'K' |
||
| 343 | and lokal_ordet[1:2] in self._mjuka_vokaler |
||
| 344 | 1 | ): |
|
| 345 | 1 | lokal_ordet = '#' + lokal_ordet[1:] |
|
| 346 | 1 | return lokal_ordet |
|
| 347 | 1 | ||
| 348 | 1 | # Steg 1, Versaler |
|
| 349 | 1 | word = unicode_normalize('NFC', word.upper()) |
|
| 350 | word = word.replace('-', ' ') |
||
| 351 | |||
| 352 | # Steg 2, Ta bort adelsprefix |
||
| 353 | 1 | for adelstitel in self._adelstitler: |
|
| 354 | 1 | while adelstitel in word: |
|
| 355 | word = word.replace(adelstitel, ' ') |
||
| 356 | if word.startswith(adelstitel[1:]): |
||
| 357 | word = word[len(adelstitel) - 1 :] |
||
| 358 | 1 | ||
| 359 | 1 | # Split word into tokens |
|
| 360 | ordlista = word.split() |
||
| 361 | |||
| 362 | 1 | # Steg 3, Ta bort dubbelteckning i början på namnet |
|
| 363 | 1 | ordlista = [ |
|
| 364 | 1 | self._delete_consecutive_repeats(ordet) for ordet in ordlista |
|
| 365 | ] |
||
| 366 | if not ordlista: |
||
| 367 | 1 | # noinspection PyRedundantParentheses |
|
| 368 | 1 | return '' |
|
| 369 | 1 | ||
| 370 | 1 | # Steg 4, Försvenskning |
|
| 371 | 1 | ordlista = [_foersvensker(ordet) for ordet in ordlista] |
|
| 372 | |||
| 373 | # Steg 5, Ta bort alla tecken som inte är A-Ö (65-90,196,197,214) |
||
| 374 | 1 | ordlista = [ |
|
| 375 | ''.join(c for c in ordet if c in self._uc_set) |
||
| 376 | for ordet in ordlista |
||
| 377 | 1 | ] |
|
| 378 | |||
| 379 | # Steg 6, Koda första ljudet |
||
| 380 | 1 | ordlista = [_koda_foersta_ljudet(ordet) for ordet in ordlista] |
|
| 381 | |||
| 382 | 1 | # Steg 7, Dela upp namnet i två delar |
|
| 383 | rest = [ordet[1:] for ordet in ordlista] |
||
| 384 | |||
| 385 | 1 | # Steg 8, Utför fonetisk transformation i resten |
|
| 386 | rest = [ordet.replace('DT', 'T') for ordet in rest] |
||
| 387 | rest = [ordet.replace('X', 'KS') for ordet in rest] |
||
| 388 | 1 | ||
| 389 | # Steg 9, Koda resten till en sifferkod |
||
| 390 | for vokal in self._mjuka_vokaler: |
||
| 391 | rest = [ordet.replace('C' + vokal, '8' + vokal) for ordet in rest] |
||
| 392 | rest = [ordet.translate(self._trans) for ordet in rest] |
||
| 393 | |||
| 394 | 1 | # Steg 10, Ta bort intilliggande dubbletter |
|
| 395 | rest = [self._delete_consecutive_repeats(ordet) for ordet in rest] |
||
| 396 | |||
| 397 | 1 | # Steg 11, Ta bort alla "9" |
|
| 398 | rest = [ordet.replace('9', '') for ordet in rest] |
||
| 399 | |||
| 400 | 1 | # Steg 12, Sätt ihop delarna igen |
|
| 401 | 1 | ordlista = [ |
|
| 402 | ''.join(ordet) for ordet in zip((_[0:1] for _ in ordlista), rest) |
||
| 403 | ] |
||
| 404 | 1 | ||
| 405 | 1 | # truncate, if max_length is set |
|
| 406 | 1 | if self._max_length > 0: |
|
| 407 | ordlista = [ordet[: self._max_length] for ordet in ordlista] |
||
| 408 | |||
| 409 | 1 | return ','.join(ordlista) |
|
| 410 | |||
| 416 |