| Conditions | 54 | 
| Total Lines | 386 | 
| Code Lines | 257 | 
| Lines | 0 | 
| Ratio | 0 % | 
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like bm_php2py._run_script() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | #!/usr/bin/env python  | 
            ||
| 53 | def _run_script():  | 
            ||
| 54 | # The list of languages from BMPM to support (might need to be updated or  | 
            ||
| 55 | # tuned as BMPM is updated)  | 
            ||
| 56 | lang_tuple = (  | 
            ||
| 57 | 'any',  | 
            ||
| 58 | 'arabic',  | 
            ||
| 59 | 'cyrillic',  | 
            ||
| 60 | 'czech',  | 
            ||
| 61 | 'dutch',  | 
            ||
| 62 | 'english',  | 
            ||
| 63 | 'french',  | 
            ||
| 64 | 'german',  | 
            ||
| 65 | 'greek',  | 
            ||
| 66 | 'greeklatin',  | 
            ||
| 67 | 'hebrew',  | 
            ||
| 68 | 'hungarian',  | 
            ||
| 69 | 'italian',  | 
            ||
| 70 | 'latvian',  | 
            ||
| 71 | 'polish',  | 
            ||
| 72 | 'portuguese',  | 
            ||
| 73 | 'romanian',  | 
            ||
| 74 | 'russian',  | 
            ||
| 75 | 'spanish',  | 
            ||
| 76 | 'turkish',  | 
            ||
| 77 | )  | 
            ||
| 78 | |||
| 79 |     lang_dict = {} | 
            ||
| 80 | for i, l in enumerate(lang_tuple):  | 
            ||
| 81 | lang_dict[l] = 2 ** i  | 
            ||
| 82 | lang_dict['common'] = "'common'"  | 
            ||
| 83 | |||
| 84 | nl = False  | 
            ||
| 85 | array_seen = False  | 
            ||
| 86 | |||
| 87 | tail_text = ''  | 
            ||
| 88 | |||
| 89 | def c2u(name):  | 
            ||
| 90 | """Convert camelCase (used in PHP) to Python-standard snake_case.  | 
            ||
| 91 | |||
| 92 | Src:  | 
            ||
| 93 | https://stackoverflow.com/questions/1175208/elegant-python-function-to-convert-camelcase-to-snake-case  | 
            ||
| 94 | |||
| 95 | Parameters  | 
            ||
| 96 | ----------  | 
            ||
| 97 | name: A function or variable name in camelCase  | 
            ||
| 98 | |||
| 99 | Returns  | 
            ||
| 100 | -------  | 
            ||
| 101 | str: The name in snake_case  | 
            ||
| 102 | |||
| 103 | """  | 
            ||
| 104 |         s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name) | 
            ||
| 105 |         s1 = re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() | 
            ||
| 106 | return s1  | 
            ||
| 107 | |||
| 108 | def pythonize(line, fn='', subdir='gen'):  | 
            ||
| 109 | """Convert a line of BMPM code from PHP to Python.  | 
            ||
| 110 | |||
| 111 | Parameters  | 
            ||
| 112 | ----------  | 
            ||
| 113 | line : str  | 
            ||
| 114 | A line of code  | 
            ||
| 115 | fn : str  | 
            ||
| 116 | A filename  | 
            ||
| 117 | subdir : str  | 
            ||
| 118 | The file's subdirectory  | 
            ||
| 119 | |||
| 120 | Returns  | 
            ||
| 121 | -------  | 
            ||
| 122 | The code in Python  | 
            ||
| 123 | |||
| 124 | """  | 
            ||
| 125 | global nl, array_seen  | 
            ||
| 126 | |||
| 127 | if '$all' in line:  | 
            ||
| 128 | return ''  | 
            ||
| 129 | if 'make the sum of all languages be visible in the function' in line:  | 
            ||
| 130 | return ''  | 
            ||
| 131 | |||
| 132 | line = line.strip()  | 
            ||
| 133 | |||
| 134 |         if 'array' in line and not line.startswith('//'): | 
            ||
| 135 | array_seen = True  | 
            ||
| 136 | |||
| 137 |         line = re.sub('//+', '#', line) | 
            ||
| 138 |         # line = re.sub('"\.\((\$.+?)\)\."', r'\1', line) | 
            ||
| 139 |         if line and re.search(r'array\("[^"]+?"\)', line): | 
            ||
| 140 |             # print("### " + line) | 
            ||
| 141 | line = ''  | 
            ||
| 142 |         line = line.replace('array', '') | 
            ||
| 143 | line = re.sub(r'^\s*', '', line)  | 
            ||
| 144 |         line = re.sub(';$', '', line) | 
            ||
| 145 |         line = re.sub('^include_.+', '', line) | 
            ||
| 146 | |||
| 147 | line = re.sub(  | 
            ||
| 148 |             r'\$(approx|rules|exact)\[LanguageIndex\("([^"]+)", ' | 
            ||
| 149 | + r'\$languages\)\] = \$([a-zA-Z]+)',  | 
            ||
| 150 | lambda m: (  | 
            ||
| 151 | "BMDATA['"  | 
            ||
| 152 | + subdir  | 
            ||
| 153 | + "']['"  | 
            ||
| 154 | + m.group(1)  | 
            ||
| 155 | + "'][L_"  | 
            ||
| 156 | + m.group(2).upper()  | 
            ||
| 157 | + '] = _'  | 
            ||
| 158 | + subdir.upper()  | 
            ||
| 159 | + '_'  | 
            ||
| 160 | + c2u(m.group(3)).upper()  | 
            ||
| 161 | ),  | 
            ||
| 162 | line,  | 
            ||
| 163 | )  | 
            ||
| 164 | |||
| 165 | line = re.sub(  | 
            ||
| 166 | r'\$(approx|rules|exact|hebrew)([A-Za-z]+) = _merge'  | 
            ||
| 167 | + r'\(\$([a-zA-Z]+), \$([a-zA-Z]+)\)',  | 
            ||
| 168 | lambda m: (  | 
            ||
| 169 | "BMDATA['"  | 
            ||
| 170 | + subdir  | 
            ||
| 171 | + "']['"  | 
            ||
| 172 | + m.group(1)  | 
            ||
| 173 | + "'][L_"  | 
            ||
| 174 | + c2u(m.group(2)).upper()  | 
            ||
| 175 | + '] = _'  | 
            ||
| 176 | + subdir.upper()  | 
            ||
| 177 | + '_'  | 
            ||
| 178 | + c2u(m.group(3)).upper()  | 
            ||
| 179 | + ' + _'  | 
            ||
| 180 | + subdir.upper()  | 
            ||
| 181 | + '_'  | 
            ||
| 182 | + c2u(m.group(4)).upper()  | 
            ||
| 183 | ),  | 
            ||
| 184 | line,  | 
            ||
| 185 | )  | 
            ||
| 186 | |||
| 187 | line = re.sub(  | 
            ||
| 188 |             r'\$(approx|rules|exact)\[LanguageIndex\("([^"]+)", ' | 
            ||
| 189 | + r'\$languages\)\] = _merge\(\$([a-zA-Z]+), \$([a-zA-Z]+)\)',  | 
            ||
| 190 | lambda m: (  | 
            ||
| 191 | "BMDATA['"  | 
            ||
| 192 | + subdir  | 
            ||
| 193 | + "']['"  | 
            ||
| 194 | + m.group(1)  | 
            ||
| 195 | + "'][L_"  | 
            ||
| 196 | + c2u(m.group(2)).upper()  | 
            ||
| 197 | + '] = _'  | 
            ||
| 198 | + subdir.upper()  | 
            ||
| 199 | + '_'  | 
            ||
| 200 | + c2u(m.group(3)).upper()  | 
            ||
| 201 | + ' + _'  | 
            ||
| 202 | + subdir.upper()  | 
            ||
| 203 | + '_'  | 
            ||
| 204 | + c2u(m.group(4)).upper()  | 
            ||
| 205 | ),  | 
            ||
| 206 | line,  | 
            ||
| 207 | )  | 
            ||
| 208 | |||
| 209 | line = re.sub(  | 
            ||
| 210 | r'^\$([a-zA-Z]+)',  | 
            ||
| 211 | lambda m: '_' + s.upper() + '_' + c2u(m.group(1)).upper(),  | 
            ||
| 212 | line,  | 
            ||
| 213 | )  | 
            ||
| 214 | |||
| 215 | for _ in range(len(lang_tuple)):  | 
            ||
| 216 | line = re.sub(r'($[a-zA-Z]+) *\+ *($[a-zA-Z]+)', r'\1\+\2', line)  | 
            ||
| 217 | |||
| 218 | line = re.sub(  | 
            ||
| 219 | r'\$([a-zA-Z]+)',  | 
            ||
| 220 | lambda m: (  | 
            ||
| 221 | 'L_' + m.group(1).upper()  | 
            ||
| 222 | if m.group(1) in lang_dict  | 
            ||
| 223 | else '$' + m.group(1)  | 
            ||
| 224 | ),  | 
            ||
| 225 | line,  | 
            ||
| 226 | )  | 
            ||
| 227 | line = re.sub(r'\[\"\.\((L_[A-Z_+]+)\)\.\"\]', r'[\1]', line)  | 
            ||
| 228 | |||
| 229 | line = re.sub(  | 
            ||
| 230 | 'L_([A-Z]+)', lambda m: str(lang_dict[m.group(1).lower()]), line  | 
            ||
| 231 | )  | 
            ||
| 232 | for _ in range(4):  | 
            ||
| 233 | line = re.sub(  | 
            ||
| 234 | r'([0-9]+) *\+ *([0-9]+)',  | 
            ||
| 235 | lambda m: str(int(m.group(1)) + int(m.group(2))),  | 
            ||
| 236 | line,  | 
            ||
| 237 | )  | 
            ||
| 238 | |||
| 239 | if fn == 'lang':  | 
            ||
| 240 |             if len(line.split(',')) >= 3: | 
            ||
| 241 |                 parts = line.split(',') | 
            ||
| 242 |                 parts[0] = re.sub('/(.+?)/', r'\1', parts[0]) | 
            ||
| 243 |                 # parts[1] = re.sub('\$', 'L_', parts[1]) | 
            ||
| 244 |                 # parts[1] = re.sub(' *\+ *', '|', parts[1]) | 
            ||
| 245 | parts[2] = parts[2].title()  | 
            ||
| 246 | line = ','.join(parts)  | 
            ||
| 247 | |||
| 248 | if 'languagenames' in fn:  | 
            ||
| 249 |             line = line.replace('"', "'") | 
            ||
| 250 |             line = line.replace("','", "', '") | 
            ||
| 251 | if line and line[0] == "'":  | 
            ||
| 252 | line = ' ' * 14 + line  | 
            ||
| 253 | |||
| 254 | # fix upstream  | 
            ||
| 255 |         # line = line.replace('ë', 'ü') | 
            ||
| 256 | |||
| 257 | comment = ''  | 
            ||
| 258 | if '#' in line:  | 
            ||
| 259 |             hashsign = line.find('#') | 
            ||
| 260 | comment = line[hashsign:]  | 
            ||
| 261 | code = line[:hashsign]  | 
            ||
| 262 | else:  | 
            ||
| 263 | code = line  | 
            ||
| 264 | |||
| 265 | code = code.rstrip()  | 
            ||
| 266 | comment = comment.strip()  | 
            ||
| 267 | if not re.match(r'^\s*$', code):  | 
            ||
| 268 | comment = ' ' + comment  | 
            ||
| 269 | |||
| 270 |         if '(' in code and ')' in code: | 
            ||
| 271 |             prefix = code[: code.find('(') + 1] | 
            ||
| 272 |             suffix = code[code.rfind(')') :] | 
            ||
| 273 | tuplecontent = code[len(prefix) : len(code) - len(suffix)]  | 
            ||
| 274 | |||
| 275 |             elts = tuplecontent.split(',') | 
            ||
| 276 | for i in range(len(elts)):  | 
            ||
| 277 | elts[i] = elts[i].strip()  | 
            ||
| 278 | if elts[i][0] == '"' and elts[i][-1] == '"':  | 
            ||
| 279 |                     elts[i] = "'" + elts[i][1:-1].replace("'", "\\'") + "'" | 
            ||
| 280 | tuplecontent = ', '.join(elts)  | 
            ||
| 281 | |||
| 282 | code = prefix + tuplecontent + suffix  | 
            ||
| 283 | |||
| 284 | line = code + comment  | 
            ||
| 285 |         line = re.sub('# *', '# ', line) | 
            ||
| 286 | |||
| 287 | if line:  | 
            ||
| 288 | nl = False  | 
            ||
| 289 | if array_seen and not (  | 
            ||
| 290 |                 line[0] == '_' or line.startswith('BMDATA') | 
            ||
| 291 | ):  | 
            ||
| 292 | line = ' ' * 4 + line  | 
            ||
| 293 | return line + '\n'  | 
            ||
| 294 | elif not nl:  | 
            ||
| 295 | nl = True  | 
            ||
| 296 | return '\n'  | 
            ||
| 297 | else:  | 
            ||
| 298 | return ''  | 
            ||
| 299 | |||
| 300 | if len(sys.argv) > 1:  | 
            ||
| 301 |         bmdir = sys.argv[1].rstrip('/') + '/' | 
            ||
| 302 | else:  | 
            ||
| 303 | bmdir = '../../bmpm/'  | 
            ||
| 304 | |||
| 305 | outfilename = '../abydos/phonetic/_beider_morse_data.py'  | 
            ||
| 306 | outfile = codecs.open(outfilename, 'w', 'utf-8')  | 
            ||
| 307 | |||
| 308 | outfile.write(  | 
            ||
| 309 | r'# -*- coding: utf-8 -*-\n\n# Copyright 2014-2018 by \  | 
            ||
| 310 | Christopher C. Little.\n# This file is part of Abydos.\n#\n# This file is \  | 
            ||
| 311 | based on Alexander Beider and Stephen P. Morse\'s implementation\n# of the \  | 
            ||
| 312 | Beider-Morse Phonetic Matching (BMPM) System, available at\n# \  | 
            ||
| 313 | http://stevemorse.org/phonetics/bmpm.htm.\n#\n# Abydos is free software: \  | 
            ||
| 314 | you can redistribute it and/or modify\n# it under the terms of the GNU \  | 
            ||
| 315 | General Public License as published by\n# the Free Software Foundation, \  | 
            ||
| 316 | either version 3 of the License, or\n# (at your option) any later version.\n\  | 
            ||
| 317 | #\n# Abydos is distributed in the hope that it will be useful,\n# but WITHOUT \  | 
            ||
| 318 | ANY WARRANTY; without even the implied warranty of\n# MERCHANTABILITY or \  | 
            ||
| 319 | FITNESS FOR A PARTICULAR PURPOSE. See the\n# GNU General Public License for \  | 
            ||
| 320 | more details.\n#\n# You should have received a copy of the GNU General Public \  | 
            ||
| 321 | License\n# along with Abydos. If not, see <http://www.gnu.org/licenses/>.\n\n\  | 
            ||
| 322 | """abydos.phonetic._beider_morse_data.\n\nBehind-the-scenes constants, \  | 
            ||
| 323 | rules, etc. for the Beider-Morse Phonentic\nMatching (BMPM) algorithm\n\nDO \  | 
            ||
| 324 | NOT EDIT - This document is automatically generated from the reference\n\  | 
            ||
| 325 | implementation in PHP.\n"""\n# pylint: disable=line-too-long\n\nfrom \  | 
            ||
| 326 | __future__ import (\n absolute_import,\n division,\n print_function,\  | 
            ||
| 327 | unicode_literals,\n)\n'  | 
            ||
| 328 | )  | 
            ||
| 329 | |||
| 330 |     outfile.write('L_NONE = 0\n') | 
            ||
| 331 | for i, l in enumerate(lang_tuple):  | 
            ||
| 332 |         outfile.write('L_' + l.upper() + ' = 2**' + str(i) + '\n') | 
            ||
| 333 |     outfile.write('\n\n') | 
            ||
| 334 | |||
| 335 |     tail_text += '\nBMDATA = {}\n' | 
            ||
| 336 | |||
| 337 |     subdirs = ('gen', 'sep', 'ash') | 
            ||
| 338 | |||
| 339 | for s in subdirs:  | 
            ||
| 340 |         tail_text += '\nBMDATA[\'' + s + '\'] = {}\n' | 
            ||
| 341 |         tail_text += 'BMDATA[\'' + s + '\'][\'approx\'] = {}\n' | 
            ||
| 342 |         tail_text += 'BMDATA[\'' + s + '\'][\'exact\'] = {}\n' | 
            ||
| 343 |         tail_text += 'BMDATA[\'' + s + '\'][\'rules\'] = {}\n' | 
            ||
| 344 |         tail_text += 'BMDATA[\'' + s + '\'][\'hebrew\'] = {}\n\n' | 
            ||
| 345 | tail_text += (  | 
            ||
| 346 | 'BMDATA[\''  | 
            ||
| 347 | + s  | 
            ||
| 348 | + '\'][\'language_rules\'] = _'  | 
            ||
| 349 | + s.upper()  | 
            ||
| 350 | + '_LANGUAGE_RULES\n'  | 
            ||
| 351 | )  | 
            ||
| 352 | tail_text += (  | 
            ||
| 353 | 'BMDATA[\''  | 
            ||
| 354 | + s  | 
            ||
| 355 | + '\'][\'languages\'] = _'  | 
            ||
| 356 | + s.upper()  | 
            ||
| 357 | + '_LANGUAGES\n'  | 
            ||
| 358 | )  | 
            ||
| 359 | |||
| 360 | phps = [  | 
            ||
| 361 | f  | 
            ||
| 362 | for f in sorted(listdir(bmdir + s + '/'))  | 
            ||
| 363 |             if (isfile(bmdir + s + '/' + f) and f.endswith('.php')) | 
            ||
| 364 | ]  | 
            ||
| 365 | for infilename in phps:  | 
            ||
| 366 | for pfx in (  | 
            ||
| 367 | 'rules',  | 
            ||
| 368 | 'approx',  | 
            ||
| 369 | 'exact',  | 
            ||
| 370 | 'hebrew',  | 
            ||
| 371 | 'language',  | 
            ||
| 372 | 'lang',  | 
            ||
| 373 | ):  | 
            ||
| 374 | if infilename.startswith(pfx):  | 
            ||
| 375 | array_seen = False  | 
            ||
| 376 | infilepath = bmdir + s + '/' + infilename  | 
            ||
| 377 | infileenc = chardet.detect(open(infilepath, 'rb').read())[  | 
            ||
| 378 | 'encoding'  | 
            ||
| 379 | ]  | 
            ||
| 380 | print(s + '/' + infilename) # noqa: T001  | 
            ||
| 381 | infile = codecs.open(infilepath, 'r', infileenc)  | 
            ||
| 382 |                     # if infilename.startswith('lang'): | 
            ||
| 383 | # tuplename = infilename[:-4]  | 
            ||
| 384 | # else:  | 
            ||
| 385 | # tuplename = pfx + '_' + infilename[len(pfx) : -4]  | 
            ||
| 386 | # indent = len(tuplename) + 21  | 
            ||
| 387 | |||
| 388 |                     outfile.write('# ' + s + '/' + infilename + '\n') | 
            ||
| 389 | |||
| 390 | ignore = True  | 
            ||
| 391 | for line in infile:  | 
            ||
| 392 | if 'function Language' in line:  | 
            ||
| 393 | break  | 
            ||
| 394 | if not ignore:  | 
            ||
| 395 | if re.search(r'\?>', line):  | 
            ||
| 396 | ignore = True  | 
            ||
| 397 | else:  | 
            ||
| 398 | line = pythonize(line, infilename[:-4], s)  | 
            ||
| 399 |                                 if line.startswith('BMDATA'): | 
            ||
| 400 | tail_text += line  | 
            ||
| 401 | else:  | 
            ||
| 402 | outfile.write(line)  | 
            ||
| 403 | if '*/' in line:  | 
            ||
| 404 | ignore = False  | 
            ||
| 405 | |||
| 406 |                     outfile.write('\n\n') | 
            ||
| 407 | break  | 
            ||
| 408 | |||
| 409 | outfile.write(tail_text)  | 
            ||
| 410 | |||
| 411 | outfile.close()  | 
            ||
| 412 | outfilelines = codecs.open(outfilename, 'r', 'utf-8').readlines()  | 
            ||
| 413 | outfile = codecs.open(outfilename, 'w', 'utf-8')  | 
            ||
| 414 | nl = False  | 
            ||
| 415 | fixlanguagesarray = False  | 
            ||
| 416 | |||
| 417 | sep_lang = (  | 
            ||
| 418 |         "('any', 'french', 'hebrew', 'italian', 'portuguese', 'spanish')" | 
            ||
| 419 | )  | 
            ||
| 420 | |||
| 421 | for line in outfilelines:  | 
            ||
| 422 | line = line.rstrip()  | 
            ||
| 423 | if line:  | 
            ||
| 424 | if fixlanguagesarray:  | 
            ||
| 425 | line = ' ' + line.strip()  | 
            ||
| 426 | fixlanguagesarray = False  | 
            ||
| 427 | if len(line) > 79 or sep_lang in line:  | 
            ||
| 428 | line += ' # noqa: E501'  | 
            ||
| 429 | outfile.write(line)  | 
            ||
| 430 |             if not line.endswith('='): | 
            ||
| 431 |                 outfile.write('\n') | 
            ||
| 432 | else:  | 
            ||
| 433 | fixlanguagesarray = True  | 
            ||
| 434 | nl = False  | 
            ||
| 435 | else:  | 
            ||
| 436 | if not nl:  | 
            ||
| 437 |                 outfile.write('\n') | 
            ||
| 438 | nl = True  | 
            ||
| 439 | |||
| 443 |