Conditions | 66 |
Total Lines | 163 |
Code Lines | 112 |
Lines | 0 |
Ratio | 0 % |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like abydos.phonetic.nysiis.nysiis() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | # -*- coding: utf-8 -*- |
||
34 | def nysiis(word, max_length=6, modified=False): |
||
35 | """Return the NYSIIS code for a word. |
||
36 | |||
37 | The New York State Identification and Intelligence System algorithm is |
||
38 | defined in :cite:`Taft:1970`. |
||
39 | |||
40 | The modified version of this algorithm is described in Appendix B of |
||
41 | :cite:`Lynch:1977`. |
||
42 | |||
43 | :param str word: the word to transform |
||
44 | :param int max_length: the maximum length (default 6) of the code to return |
||
45 | :param bool modified: indicates whether to use USDA modified NYSIIS |
||
46 | :returns: the NYSIIS value |
||
47 | :rtype: str |
||
48 | |||
49 | >>> nysiis('Christopher') |
||
50 | 'CRASTA' |
||
51 | >>> nysiis('Niall') |
||
52 | 'NAL' |
||
53 | >>> nysiis('Smith') |
||
54 | 'SNAT' |
||
55 | >>> nysiis('Schmidt') |
||
56 | 'SNAD' |
||
57 | |||
58 | >>> nysiis('Christopher', max_length=-1) |
||
59 | 'CRASTAFAR' |
||
60 | |||
61 | >>> nysiis('Christopher', max_length=8, modified=True) |
||
62 | 'CRASTAFA' |
||
63 | >>> nysiis('Niall', max_length=8, modified=True) |
||
64 | 'NAL' |
||
65 | >>> nysiis('Smith', max_length=8, modified=True) |
||
66 | 'SNAT' |
||
67 | >>> nysiis('Schmidt', max_length=8, modified=True) |
||
68 | 'SNAD' |
||
69 | """ |
||
70 | # Require a max_length of at least 6 |
||
71 | if max_length > -1: |
||
72 | max_length = max(6, max_length) |
||
73 | |||
74 | _vowels = {'A', 'E', 'I', 'O', 'U'} |
||
75 | |||
76 | word = ''.join(c for c in word.upper() if c.isalpha()) |
||
77 | word = word.replace('ß', 'SS') |
||
78 | |||
79 | # exit early if there are no alphas |
||
80 | if not word: |
||
81 | return '' |
||
82 | |||
83 | original_first_char = word[0] |
||
84 | |||
85 | if word[:3] == 'MAC': |
||
86 | word = 'MCC'+word[3:] |
||
87 | elif word[:2] == 'KN': |
||
88 | word = 'NN'+word[2:] |
||
89 | elif word[:1] == 'K': |
||
90 | word = 'C'+word[1:] |
||
91 | elif word[:2] in {'PH', 'PF'}: |
||
92 | word = 'FF'+word[2:] |
||
93 | elif word[:3] == 'SCH': |
||
94 | word = 'SSS'+word[3:] |
||
95 | elif modified: |
||
96 | if word[:2] == 'WR': |
||
97 | word = 'RR'+word[2:] |
||
98 | elif word[:2] == 'RH': |
||
99 | word = 'RR'+word[2:] |
||
100 | elif word[:2] == 'DG': |
||
101 | word = 'GG'+word[2:] |
||
102 | elif word[:1] in _vowels: |
||
103 | word = 'A'+word[1:] |
||
104 | |||
105 | if modified and word[-1:] in {'S', 'Z'}: |
||
106 | word = word[:-1] |
||
107 | |||
108 | if word[-2:] == 'EE' or word[-2:] == 'IE' or (modified and |
||
109 | word[-2:] == 'YE'): |
||
110 | word = word[:-2]+'Y' |
||
111 | elif word[-2:] in {'DT', 'RT', 'RD'}: |
||
112 | word = word[:-2]+'D' |
||
113 | elif word[-2:] in {'NT', 'ND'}: |
||
114 | word = word[:-2]+('N' if modified else 'D') |
||
115 | elif modified: |
||
116 | if word[-2:] == 'IX': |
||
117 | word = word[:-2]+'ICK' |
||
118 | elif word[-2:] == 'EX': |
||
119 | word = word[:-2]+'ECK' |
||
120 | elif word[-2:] in {'JR', 'SR'}: |
||
121 | return 'ERROR' |
||
122 | |||
123 | key = word[:1] |
||
124 | |||
125 | skip = 0 |
||
126 | for i in range(1, len(word)): |
||
127 | if i >= len(word): |
||
128 | continue |
||
129 | elif skip: |
||
130 | skip -= 1 |
||
131 | continue |
||
132 | elif word[i:i+2] == 'EV': |
||
133 | word = word[:i] + 'AF' + word[i+2:] |
||
134 | skip = 1 |
||
135 | elif word[i] in _vowels: |
||
136 | word = word[:i] + 'A' + word[i+1:] |
||
137 | elif modified and i != len(word)-1 and word[i] == 'Y': |
||
138 | word = word[:i] + 'A' + word[i+1:] |
||
139 | elif word[i] == 'Q': |
||
140 | word = word[:i] + 'G' + word[i+1:] |
||
141 | elif word[i] == 'Z': |
||
142 | word = word[:i] + 'S' + word[i+1:] |
||
143 | elif word[i] == 'M': |
||
144 | word = word[:i] + 'N' + word[i+1:] |
||
145 | elif word[i:i+2] == 'KN': |
||
146 | word = word[:i] + 'N' + word[i+2:] |
||
147 | elif word[i] == 'K': |
||
148 | word = word[:i] + 'C' + word[i+1:] |
||
149 | elif modified and i == len(word)-3 and word[i:i+3] == 'SCH': |
||
150 | word = word[:i] + 'SSA' |
||
151 | skip = 2 |
||
152 | elif word[i:i+3] == 'SCH': |
||
153 | word = word[:i] + 'SSS' + word[i+3:] |
||
154 | skip = 2 |
||
155 | elif modified and i == len(word)-2 and word[i:i+2] == 'SH': |
||
156 | word = word[:i] + 'SA' |
||
157 | skip = 1 |
||
158 | elif word[i:i+2] == 'SH': |
||
159 | word = word[:i] + 'SS' + word[i+2:] |
||
160 | skip = 1 |
||
161 | elif word[i:i+2] == 'PH': |
||
162 | word = word[:i] + 'FF' + word[i+2:] |
||
163 | skip = 1 |
||
164 | elif modified and word[i:i+3] == 'GHT': |
||
165 | word = word[:i] + 'TTT' + word[i+3:] |
||
166 | skip = 2 |
||
167 | elif modified and word[i:i+2] == 'DG': |
||
168 | word = word[:i] + 'GG' + word[i+2:] |
||
169 | skip = 1 |
||
170 | elif modified and word[i:i+2] == 'WR': |
||
171 | word = word[:i] + 'RR' + word[i+2:] |
||
172 | skip = 1 |
||
173 | elif word[i] == 'H' and (word[i-1] not in _vowels or |
||
174 | word[i+1:i+2] not in _vowels): |
||
175 | word = word[:i] + word[i-1] + word[i+1:] |
||
176 | elif word[i] == 'W' and word[i-1] in _vowels: |
||
177 | word = word[:i] + word[i-1] + word[i+1:] |
||
178 | |||
179 | if word[i:i+skip+1] != key[-1:]: |
||
180 | key += word[i:i+skip+1] |
||
181 | |||
182 | key = _delete_consecutive_repeats(key) |
||
183 | |||
184 | if key[-1:] == 'S': |
||
185 | key = key[:-1] |
||
186 | if key[-2:] == 'AY': |
||
187 | key = key[:-2] + 'Y' |
||
188 | if key[-1:] == 'A': |
||
189 | key = key[:-1] |
||
190 | if modified and key[:1] == 'A': |
||
191 | key = original_first_char + key[1:] |
||
192 | |||
193 | if max_length > 0: |
||
194 | key = key[:max_length] |
||
195 | |||
196 | return key |
||
197 | |||
202 |