Conditions | 21 |
Total Lines | 138 |
Code Lines | 70 |
Lines | 0 |
Ratio | 0 % |
Tests | 61 |
CRAP Score | 21 |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like abydos.phonetic._spfc.SPFC.encode() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | # -*- coding: utf-8 -*- |
||
71 | 1 | def encode(self, word): |
|
72 | """Return the Standardized Phonetic Frequency Code (SPFC) of a word. |
||
73 | |||
74 | :param str word: the word to transform |
||
75 | :returns: the SPFC value |
||
76 | :rtype: str |
||
77 | |||
78 | >>> pe = SPFC() |
||
79 | >>> pe.encode('Christopher Smith') |
||
80 | '01160' |
||
81 | >>> pe.encode('Christopher Schmidt') |
||
82 | '01160' |
||
83 | >>> pe.encode('Niall Smith') |
||
84 | '01660' |
||
85 | >>> pe.encode('Niall Schmidt') |
||
86 | '01660' |
||
87 | |||
88 | >>> pe.encode('L.Smith') |
||
89 | '01960' |
||
90 | >>> pe.encode('R.Miller') |
||
91 | '65490' |
||
92 | |||
93 | >>> pe.encode(('L', 'Smith')) |
||
94 | '01960' |
||
95 | >>> pe.encode(('R', 'Miller')) |
||
96 | '65490' |
||
97 | """ |
||
98 | |||
99 | 1 | def _raise_word_ex(): |
|
100 | """Raise an AttributeError.""" |
||
101 | 1 | raise AttributeError( |
|
102 | 'word attribute must be a string with a space or period ' |
||
103 | + 'dividing the first and last names or a tuple/list ' |
||
104 | + 'consisting of the first and last names' |
||
105 | ) |
||
106 | |||
107 | 1 | if not word: |
|
108 | 1 | return '' |
|
109 | |||
110 | 1 | names = [] |
|
111 | 1 | if isinstance(word, (str, text_type)): |
|
112 | 1 | names = word.split('.', 1) |
|
113 | 1 | if len(names) != 2: |
|
114 | 1 | names = word.split(' ', 1) |
|
115 | 1 | if len(names) != 2: |
|
116 | 1 | _raise_word_ex() |
|
117 | 1 | elif hasattr(word, '__iter__'): |
|
118 | 1 | if len(word) != 2: |
|
119 | 1 | _raise_word_ex() |
|
120 | 1 | names = word |
|
121 | else: |
||
122 | 1 | _raise_word_ex() |
|
123 | |||
124 | 1 | names = [ |
|
125 | unicode_normalize( |
||
126 | 'NFKD', text_type(_.strip().replace('ß', 'SS').upper()) |
||
127 | ) |
||
128 | for _ in names |
||
129 | ] |
||
130 | 1 | code = '' |
|
131 | |||
132 | 1 | def _steps_one_to_three(name): |
|
133 | """Perform the first three steps of SPFC.""" |
||
134 | # filter out non A-Z |
||
135 | 1 | name = ''.join(_ for _ in name if _ in self._uc_set) |
|
136 | |||
137 | # 1. In the field, convert DK to K, DT to T, SC to S, KN to N, |
||
138 | # and MN to N |
||
139 | 1 | for subst in self._substitutions: |
|
140 | 1 | name = name.replace(subst[0], subst[1]) |
|
141 | |||
142 | # 2. In the name field, replace multiple letters with a single |
||
143 | # letter |
||
144 | 1 | name = self._delete_consecutive_repeats(name) |
|
145 | |||
146 | # 3. Remove vowels, W, H, and Y, but keep the first letter in the |
||
147 | # name field. |
||
148 | 1 | if name: |
|
149 | 1 | name = name[0] + ''.join( |
|
150 | _ |
||
151 | for _ in name[1:] |
||
152 | if _ not in {'A', 'E', 'H', 'I', 'O', 'U', 'W', 'Y'} |
||
153 | ) |
||
154 | 1 | return name |
|
155 | |||
156 | 1 | names = [_steps_one_to_three(_) for _ in names] |
|
157 | |||
158 | # 4. The first digit of the code is obtained using PF1 and the first |
||
159 | # letter of the name field. Remove this letter after coding. |
||
160 | 1 | if names[1]: |
|
161 | 1 | code += names[1][0].translate(self._pf1) |
|
162 | 1 | names[1] = names[1][1:] |
|
163 | |||
164 | # 5. Using the last letters of the name, use Table PF3 to obtain the |
||
165 | # second digit of the code. Use as many letters as possible and remove |
||
166 | # after coding. |
||
167 | 1 | if names[1]: |
|
168 | 1 | if names[1][-3:] == 'STN' or names[1][-3:] == 'PRS': |
|
169 | 1 | code += '8' |
|
170 | 1 | names[1] = names[1][:-3] |
|
171 | 1 | elif names[1][-2:] == 'SN': |
|
172 | 1 | code += '8' |
|
173 | 1 | names[1] = names[1][:-2] |
|
174 | 1 | elif names[1][-3:] == 'STR': |
|
175 | 1 | code += '9' |
|
176 | 1 | names[1] = names[1][:-3] |
|
177 | 1 | elif names[1][-2:] in {'SR', 'TN', 'TD'}: |
|
178 | 1 | code += '9' |
|
179 | 1 | names[1] = names[1][:-2] |
|
180 | 1 | elif names[1][-3:] == 'DRS': |
|
181 | 1 | code += '7' |
|
182 | 1 | names[1] = names[1][:-3] |
|
183 | 1 | elif names[1][-2:] in {'TR', 'MN'}: |
|
184 | 1 | code += '7' |
|
185 | 1 | names[1] = names[1][:-2] |
|
186 | else: |
||
187 | 1 | code += names[1][-1].translate(self._pf3) |
|
188 | 1 | names[1] = names[1][:-1] |
|
189 | |||
190 | # 6. The third digit is found using Table PF2 and the first character |
||
191 | # of the first name. Remove after coding. |
||
192 | 1 | if names[0]: |
|
193 | 1 | code += names[0][0].translate(self._pf2) |
|
194 | 1 | names[0] = names[0][1:] |
|
195 | |||
196 | # 7. The fourth digit is found using Table PF2 and the first character |
||
197 | # of the name field. If no letters remain use zero. After coding remove |
||
198 | # the letter. |
||
199 | # 8. The fifth digit is found in the same manner as the fourth using |
||
200 | # the remaining characters of the name field if any. |
||
201 | 1 | for _ in range(2): |
|
202 | 1 | if names[1]: |
|
203 | 1 | code += names[1][0].translate(self._pf2) |
|
204 | 1 | names[1] = names[1][1:] |
|
205 | else: |
||
206 | 1 | code += '0' |
|
207 | |||
208 | 1 | return code |
|
209 | |||
246 |