| Conditions | 51 |
| Total Lines | 332 |
| Code Lines | 293 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like abydos.fingerprint.synoname_toolcode() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | # -*- coding: utf-8 -*- |
||
| 399 | def synoname_toolcode(lname, fname='', qual='', normalize=0): |
||
| 400 | """Build the Synoname toolcode. |
||
| 401 | |||
| 402 | :param lname: last name |
||
| 403 | :param fname: first name (can be blank) |
||
| 404 | :param qual: qualifier |
||
| 405 | :return: |
||
| 406 | """ |
||
| 407 | method_dict = {'end': 1, 'middle': 2, 'beginning': 4, |
||
| 408 | 'beginning_no_space': 8} |
||
| 409 | special_table = ( |
||
| 410 | # Roman, string, extra, method |
||
| 411 | (False, 'NONE', '', 0), |
||
| 412 | (False, 'aine', '', 3), |
||
| 413 | (False, 'also erroneously', '', 4), |
||
| 414 | (False, 'also identified with the', '', 2), |
||
| 415 | (False, 'also identified with', '', 2), |
||
| 416 | (False, 'archbishop', '', 7), |
||
| 417 | (False, 'atelier', '', 7), |
||
| 418 | (False, 'baron', '', 7), |
||
| 419 | (False, 'cadet', '', 3), |
||
| 420 | (False, 'cardinal', '', 7), |
||
| 421 | (False, 'circle of', '', 5), |
||
| 422 | (False, 'circle', '', 5), |
||
| 423 | (False, 'class of', '', 5), |
||
| 424 | (False, 'conde de', '', 7), |
||
| 425 | (False, 'countess', '', 7), |
||
| 426 | (False, 'count', '', 7), |
||
| 427 | (False, "d'", " d'", 15), |
||
| 428 | (False, 'dai', '', 15), |
||
| 429 | (False, "dall'", " dall'", 15), |
||
| 430 | (False, 'dalla', '', 15), |
||
| 431 | (False, 'dalle', '', 15), |
||
| 432 | (False, 'dal', '', 15), |
||
| 433 | (False, 'da', '', 15), |
||
| 434 | (False, 'degli', '', 15), |
||
| 435 | (False, 'della', '', 15), |
||
| 436 | (False, 'del', '', 15), |
||
| 437 | (False, 'den', '', 15), |
||
| 438 | (False, 'der altere', '', 3), |
||
| 439 | (False, 'der jungere', '', 3), |
||
| 440 | (False, 'der', '', 15), |
||
| 441 | (False, 'de la', '', 15), |
||
| 442 | (False, 'des', '', 15), |
||
| 443 | (False, "de'", " de'", 15), |
||
| 444 | (False, 'de', '', 15), |
||
| 445 | (False, 'di ser', '', 7), |
||
| 446 | (False, 'di', '', 15), |
||
| 447 | (False, 'dos', '', 15), |
||
| 448 | (False, 'du', '', 15), |
||
| 449 | (False, 'duke of', '', 7), |
||
| 450 | (False, 'earl of', '', 7), |
||
| 451 | (False, 'el', '', 15), |
||
| 452 | (False, 'fils', '', 3), |
||
| 453 | (False, 'florentine follower of', '', 5), |
||
| 454 | (False, 'follower of', '', 5), |
||
| 455 | (False, 'fra', '', 7), |
||
| 456 | (False, 'freiherr von', '', 7), |
||
| 457 | (False, 'giovane', '', 7), |
||
| 458 | (False, 'group', '', 5), |
||
| 459 | (True, 'iii', '', 3), |
||
| 460 | (True, 'ii', '', 3), |
||
| 461 | (False, 'il giovane', '', 7), |
||
| 462 | (False, 'il vecchio', '', 7), |
||
| 463 | (False, 'il', '', 15), |
||
| 464 | (False, "in't", '', 7), |
||
| 465 | (False, 'in het', '', 7), |
||
| 466 | (True, 'iv', '', 3), |
||
| 467 | (True, 'ix', '', 3), |
||
| 468 | (True, 'i', '', 3), |
||
| 469 | (False, 'jr.', '', 3), |
||
| 470 | (False, 'jr', '', 3), |
||
| 471 | (False, 'juniore', '', 3), |
||
| 472 | (False, 'junior', '', 3), |
||
| 473 | (False, 'king of', '', 7), |
||
| 474 | (False, "l'", " l'", 15), |
||
| 475 | (False, "l'aine", '', 3), |
||
| 476 | (False, 'la', '', 15), |
||
| 477 | (False, 'le jeune', '', 3), |
||
| 478 | (False, 'le', '', 15), |
||
| 479 | (False, 'lo', '', 15), |
||
| 480 | (False, 'maestro', '', 7), |
||
| 481 | (False, 'maitre', '', 7), |
||
| 482 | (False, 'marchioness', '', 7), |
||
| 483 | (False, 'markgrafin von', '', 7), |
||
| 484 | (False, 'marquess', '', 7), |
||
| 485 | (False, 'marquis', '', 7), |
||
| 486 | (False, 'master of the', '', 7), |
||
| 487 | (False, 'master of', '', 7), |
||
| 488 | (False, 'master known as the', '', 7), |
||
| 489 | (False, 'master with the', '', 7), |
||
| 490 | (False, 'master with', '', 7), |
||
| 491 | (False, 'masters', '', 7), |
||
| 492 | (False, 'master', '', 7), |
||
| 493 | (False, 'meister', '', 7), |
||
| 494 | (False, 'met de', '', 7), |
||
| 495 | (False, 'met', '', 7), |
||
| 496 | (False, 'mlle.', '', 7), |
||
| 497 | (False, 'mlle', '', 7), |
||
| 498 | (False, 'monogrammist', '', 7), |
||
| 499 | (False, 'monsu', '', 7), |
||
| 500 | (False, 'nee', '', 2), |
||
| 501 | (False, 'of', '', 3), |
||
| 502 | (False, 'oncle', '', 3), |
||
| 503 | (False, 'op den', '', 15), |
||
| 504 | (False, 'op de', '', 15), |
||
| 505 | (False, 'or', '', 2), |
||
| 506 | (False, 'over den', '', 15), |
||
| 507 | (False, 'over de', '', 15), |
||
| 508 | (False, 'over', '', 7), |
||
| 509 | (False, 'p.re', '', 7), |
||
| 510 | (False, 'p.r.a.', '', 1), |
||
| 511 | (False, 'padre', '', 7), |
||
| 512 | (False, 'painter', '', 7), |
||
| 513 | (False, 'pere', '', 3), |
||
| 514 | (False, 'possibly identified with', '', 6), |
||
| 515 | (False, 'possibly', '', 6), |
||
| 516 | (False, 'pseudo', '', 15), |
||
| 517 | (False, 'r.a.', '', 1), |
||
| 518 | (False, 'reichsgraf von', '', 7), |
||
| 519 | (False, 'ritter von', '', 7), |
||
| 520 | (False, 'sainte-', ' sainte-', 8), |
||
| 521 | (False, 'sainte', '', 7), |
||
| 522 | (False, 'saint-', ' saint-', 8), |
||
| 523 | (False, 'saint', '', 7), |
||
| 524 | (False, 'santa', '', 15), |
||
| 525 | (False, "sant'", " sant'", 15), |
||
| 526 | (False, 'san', '', 15), |
||
| 527 | (False, 'ser', '', 7), |
||
| 528 | (False, 'seniore', '', 3), |
||
| 529 | (False, 'senior', '', 3), |
||
| 530 | (False, 'sir', '', 5), |
||
| 531 | (False, 'sr.', '', 3), |
||
| 532 | (False, 'sr', '', 3), |
||
| 533 | (False, 'ss.', ' ss.', 14), |
||
| 534 | (False, 'ss', '', 6), |
||
| 535 | (False, 'st-', ' st-', 8), |
||
| 536 | (False, 'st.', ' st.', 15), |
||
| 537 | (False, 'ste-', ' ste-', 8), |
||
| 538 | (False, 'ste.', ' ste.', 15), |
||
| 539 | (False, 'studio', '', 7), |
||
| 540 | (False, 'sub-group', '', 5), |
||
| 541 | (False, 'sultan of', '', 7), |
||
| 542 | (False, 'ten', '', 15), |
||
| 543 | (False, 'ter', '', 15), |
||
| 544 | (False, 'the elder', '', 3), |
||
| 545 | (False, 'the younger', '', 3), |
||
| 546 | (False, 'the', '', 7), |
||
| 547 | (False, 'tot', '', 15), |
||
| 548 | (False, 'unidentified', '', 1), |
||
| 549 | (False, 'van den', '', 15), |
||
| 550 | (False, 'van der', '', 15), |
||
| 551 | (False, 'van de', '', 15), |
||
| 552 | (False, 'vanden', '', 15), |
||
| 553 | (False, 'vander', '', 15), |
||
| 554 | (False, 'van', '', 15), |
||
| 555 | (False, 'vecchia', '', 7), |
||
| 556 | (False, 'vecchio', '', 7), |
||
| 557 | (True, 'viii', '', 3), |
||
| 558 | (True, 'vii', '', 3), |
||
| 559 | (True, 'vi', '', 3), |
||
| 560 | (True, 'v', '', 3), |
||
| 561 | (False, 'vom', '', 7), |
||
| 562 | (False, 'von', '', 15), |
||
| 563 | (False, 'workshop', '', 7), |
||
| 564 | (True, 'xiii', '', 3), |
||
| 565 | (True, 'xii', '', 3), |
||
| 566 | (True, 'xiv', '', 3), |
||
| 567 | (True, 'xix', '', 3), |
||
| 568 | (True, 'xi', '', 3), |
||
| 569 | (True, 'xviii', '', 3), |
||
| 570 | (True, 'xvii', '', 3), |
||
| 571 | (True, 'xvi', '', 3), |
||
| 572 | (True, 'xv', '', 3), |
||
| 573 | (True, 'xx', '', 3), |
||
| 574 | (True, 'x', '', 3), |
||
| 575 | (False, 'y', '', 7) |
||
| 576 | ) |
||
| 577 | |||
| 578 | # Start with the basic code |
||
| 579 | toolcode = ['0', '0', '0', '000', '00', '00', '$', '', '$', ''] |
||
| 580 | |||
| 581 | full_name = ' '.join((lname, fname)) |
||
| 582 | |||
| 583 | # Fill field 0 (qualifier) |
||
| 584 | qual_3 = {'adaptation after', 'after', 'assistant of', 'assistants of', |
||
| 585 | 'circle of', 'follower of', 'imitator of', 'in the style of', |
||
| 586 | 'manner of', 'pupil of', 'school of', 'studio of', |
||
| 587 | 'style of', 'workshop of'} |
||
| 588 | qual_2 = {'copy after', 'copy after?', 'copy of'} |
||
| 589 | qual_1 = {'ascribed to', 'attributed to or copy after', |
||
| 590 | 'attributed to', 'possibly'} |
||
| 591 | |||
| 592 | if qual in qual_3: |
||
| 593 | toolcode[0] = '3' |
||
| 594 | elif qual in qual_2: |
||
| 595 | toolcode[0] = '2' |
||
| 596 | elif qual in qual_1: |
||
| 597 | toolcode[0] = '1' |
||
| 598 | |||
| 599 | # Fill field 1 (punctuation) |
||
| 600 | if '.' in full_name: |
||
| 601 | toolcode[1] = '2' |
||
| 602 | else: |
||
| 603 | for punct in ',-/:;"&\'()!{|}?$%*+<=>[\\]^_`~': |
||
| 604 | if punct in full_name: |
||
| 605 | toolcode[1] = '1' |
||
| 606 | break |
||
| 607 | |||
| 608 | # Fill field 2 (generation) |
||
| 609 | gen_1 = ('the elder', ' sr.', ' sr', 'senior', 'der altere', 'il vecchio', |
||
| 610 | "l'aine", 'p.re', 'padre', 'seniore', 'vecchia', 'vecchio') |
||
| 611 | gen_2 = (' jr.', ' jr', 'der jungere', 'il giovane', 'giovane', 'juniore', |
||
| 612 | 'junior', 'le jeune', 'the younger') |
||
| 613 | |||
| 614 | elderyounger = '' # save elder/younger for possible movement later |
||
| 615 | for gen in gen_1: |
||
| 616 | if gen in full_name: |
||
| 617 | toolcode[2] = '1' |
||
| 618 | elderyounger = gen |
||
| 619 | break |
||
| 620 | else: |
||
| 621 | for gen in gen_2: |
||
| 622 | if gen in full_name: |
||
| 623 | toolcode[2] = '2' |
||
| 624 | elderyounger = gen |
||
| 625 | break |
||
| 626 | |||
| 627 | # do comma flip |
||
| 628 | if normalize: |
||
| 629 | comma = lname.find(',') |
||
| 630 | if comma != -1: |
||
| 631 | lname_end = lname[comma + 1:] |
||
| 632 | while lname_end[0] in {' ', ','}: |
||
| 633 | lname_end = lname_end[1:] |
||
| 634 | fname = lname_end + ' ' + fname |
||
| 635 | lname = lname[:comma].strip() |
||
| 636 | |||
| 637 | # do elder/younger move |
||
| 638 | if normalize == 2 and elderyounger: |
||
| 639 | elderyounger_loc = fname.find(elderyounger) |
||
| 640 | if elderyounger_loc != -1: |
||
| 641 | lname = lname + ' ' + elderyounger.strip() |
||
| 642 | fname = (fname[:elderyounger_loc].strip() + ' ' + |
||
| 643 | fname[elderyounger_loc + len(elderyounger):]) |
||
| 644 | |||
| 645 | toolcode[4] = '{:02d}'.format(len(fname)) |
||
| 646 | toolcode[5] = '{:02d}'.format(len(lname)) |
||
| 647 | |||
| 648 | # strip punctuation |
||
| 649 | for char in ',/:;"&()!{|}?$%*+<=>[\\]^_`~': |
||
| 650 | full_name = full_name.replace(char, '') |
||
| 651 | for pos, char in enumerate(full_name): |
||
| 652 | if char == '-' and full_name[pos - 1:pos + 2] != 'b-g': |
||
| 653 | full_name = full_name[:pos] + ' ' + full_name[pos + 1:] |
||
| 654 | |||
| 655 | # Fill field 9 (search range) |
||
| 656 | for letter in [_[0] for _ in full_name.split()]: |
||
| 657 | if letter not in toolcode[9]: |
||
| 658 | toolcode[9] += letter |
||
| 659 | if len(toolcode[9]) == 15: |
||
| 660 | break |
||
| 661 | |||
| 662 | def roman_check(numeral, fname, lname): |
||
| 663 | """Move Roman numerals from first name to last.""" |
||
| 664 | loc = fname.find(numeral) |
||
| 665 | if (loc != -1 and |
||
| 666 | (fname[loc + len(numeral)] in {' ', ','} or |
||
| 667 | len(fname[loc:]) == len(numeral))): |
||
| 668 | lname += ' ' + numeral |
||
| 669 | fname = fname[:loc].strip() |
||
| 670 | while fname[-1] in {' ', ','}: |
||
| 671 | fname = fname[:-1] |
||
| 672 | return fname, lname |
||
| 673 | |||
| 674 | # Fill fields 7 (specials) and 3 (roman numerals) |
||
| 675 | for num, special in enumerate(special_table): |
||
| 676 | roman, string, extra, method = special |
||
| 677 | if method & method_dict['end']: |
||
| 678 | string_context = ' ' + string |
||
| 679 | loc = full_name.find(string_context) |
||
| 680 | if ((len(full_name) > len(string_context)) and |
||
| 681 | (loc == len(full_name) - len(string_context))): |
||
| 682 | if roman: |
||
| 683 | if not any(abbr in fname for abbr in ('i.', 'v.', 'x.')): |
||
| 684 | full_name = full_name[:loc] |
||
| 685 | toolcode[7] += '{:03d}'.format(num) + 'a' |
||
| 686 | if not toolcode[3]: |
||
| 687 | toolcode[3] = '{:03d}'.format(num) |
||
| 688 | if normalize == 2: |
||
| 689 | fname, lname = roman_check(string, fname, lname) |
||
| 690 | else: |
||
| 691 | full_name = full_name[:loc] |
||
| 692 | toolcode[7] += '{:03d}'.format(num) + 'a' |
||
| 693 | if method & method_dict['middle']: |
||
| 694 | string_context = ' ' + string + ' ' |
||
| 695 | loc = full_name.find(string_context) |
||
| 696 | if loc > 0: |
||
| 697 | if roman: |
||
| 698 | if not any(abbr in fname for abbr in ('i.', 'v.', 'x.')): |
||
| 699 | full_name = (full_name[:loc] + |
||
| 700 | full_name[loc + len(string) + 1:]) |
||
| 701 | toolcode[7] += '{:03d}'.format(num) + 'b' |
||
| 702 | if not toolcode[3]: |
||
| 703 | toolcode[3] = '{:03d}'.format(num) |
||
| 704 | if normalize == 2: |
||
| 705 | fname, lname = roman_check(string, fname, lname) |
||
| 706 | else: |
||
| 707 | full_name = (full_name[:loc] + |
||
| 708 | full_name[loc + len(string) + 1:]) |
||
| 709 | toolcode[7] += '{:03d}'.format(num) + 'b' |
||
| 710 | if method & method_dict['beginning']: |
||
| 711 | string_context = string + ' ' |
||
| 712 | loc = full_name.find(string_context) |
||
| 713 | if loc == 0: |
||
| 714 | full_name = full_name[len(string) + 1:] |
||
| 715 | toolcode[7] += '{:03d}'.format(num) + 'c' |
||
| 716 | if method & method_dict['beginning_no_space']: |
||
| 717 | loc = full_name.find(string) |
||
| 718 | if loc == 0: |
||
| 719 | toolcode[7] += '{:03d}'.format(num) + 'd' |
||
| 720 | if full_name[len(string)] not in toolcode[9]: |
||
| 721 | toolcode[9] += full_name[len(string)] |
||
| 722 | |||
| 723 | if extra: |
||
| 724 | loc = full_name.find(extra) |
||
| 725 | if loc != -1: |
||
| 726 | toolcode[7] += '{:03d}'.format(num) + 'X' |
||
| 727 | if full_name[loc + len(extra)] not in toolcode[9]: |
||
| 728 | toolcode[9] += full_name[loc + len(string)] |
||
| 729 | |||
| 730 | return lname, fname, ''.join(toolcode) |
||
| 731 | |||
| 736 |