bm_php2py._run_script() - Code Metrics - Inspection of "0.3.6" - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#141)

by Chris

created 2018-11-17 06:08 UTC

bm_php2py._run_script() F

↳ Parent: bm_php2py

Complexity

Conditions

Size

Total Lines	386
Code Lines	257

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	54
eloc	257
nop	0
dl	0
loc	386
rs	0
c	0
b	0
f	0

How to fix Long Method Complexity

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Copyright 2014-2018 by Christopher C. Little.
# This file is part of Abydos.
#
# Abydos is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Abydos is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.


"""bm_php2py.py.

This helper script converts Beider-Morse Phonetic Matching Algorithm (BMPM)
code from PHP to Python.

It assumes that the BMPM code is located at ../../bmpm (relative to this
directory in the abydos repository).

It reads the BMPM reference implementation and generates the file
../abydos/_beider_morse_data.py.

The file _beider_morse.py may still need manual changes to be made after this
script is run.
"""

from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals,
)

import codecs
import re
import sys
from os import listdir
from os.path import isfile

# noinspection PyPackageRequirements
import chardet


def _run_script():
    # The list of languages from BMPM to support (might need to be updated or
    # tuned as BMPM is updated)
    lang_tuple = (
        'any',
        'arabic',
        'cyrillic',
        'czech',
        'dutch',
        'english',
        'french',
        'german',
        'greek',
        'greeklatin',
        'hebrew',
        'hungarian',
        'italian',
        'latvian',
        'polish',
        'portuguese',
        'romanian',
        'russian',
        'spanish',
        'turkish',
    )

    lang_dict = {}
    for i, l in enumerate(lang_tuple):
        lang_dict[l] = 2 ** i
    lang_dict['common'] = "'common'"

    nl = False
    array_seen = False

    tail_text = ''

    def c2u(name):
        """Convert camelCase (used in PHP) to Python-standard snake_case.

        Src:
        https://stackoverflow.com/questions/1175208/elegant-python-function-to-convert-camelcase-to-snake-case

        Parameters
        ----------
        name: A function or variable name in camelCase

        Returns
        -------
        str: The name in snake_case

        """
        s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
        s1 = re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
        return s1

    def pythonize(line, fn='', subdir='gen'):
        """Convert a line of BMPM code from PHP to Python.

        Parameters
        ----------
        line : str
            A line of code
            fn : str
            A filename
            subdir : str
            The file's subdirectory

        Returns
        -------
        The code in Python

        """
        global nl, array_seen

        if '$all' in line:
            return ''
        if 'make the sum of all languages be visible in the function' in line:
            return ''

        line = line.strip()

        if 'array' in line and not line.startswith('//'):
            array_seen = True

        line = re.sub('//+', '#', line)
        # line = re.sub('"\.\((\$.+?)\)\."', r'\1', line)
        if line and re.search(r'array\("[^"]+?"\)', line):
            # print("### " + line)
            line = ''
        line = line.replace('array', '')
        line = re.sub(r'^\s*', '', line)
        line = re.sub(';$', '', line)
        line = re.sub('^include_.+', '', line)

        line = re.sub(
            r'\$(approx|rules|exact)\[LanguageIndex\("([^"]+)", '
            + r'\$languages\)\] = \$([a-zA-Z]+)',
            lambda m: (
                "BMDATA['"
                + subdir
                + "']['"
                + m.group(1)
                + "'][L_"
                + m.group(2).upper()
                + '] = _'
                + subdir.upper()
                + '_'
                + c2u(m.group(3)).upper()
            ),
            line,
        )

        line = re.sub(
            r'\$(approx|rules|exact|hebrew)([A-Za-z]+) = _merge'
            + r'\(\$([a-zA-Z]+), \$([a-zA-Z]+)\)',
            lambda m: (
                "BMDATA['"
                + subdir
                + "']['"
                + m.group(1)
                + "'][L_"
                + c2u(m.group(2)).upper()
                + '] = _'
                + subdir.upper()
                + '_'
                + c2u(m.group(3)).upper()
                + ' + _'
                + subdir.upper()
                + '_'
                + c2u(m.group(4)).upper()
            ),
            line,
        )

        line = re.sub(
            r'\$(approx|rules|exact)\[LanguageIndex\("([^"]+)", '
            + r'\$languages\)\] = _merge\(\$([a-zA-Z]+), \$([a-zA-Z]+)\)',
            lambda m: (
                "BMDATA['"
                + subdir
                + "']['"
                + m.group(1)
                + "'][L_"
                + c2u(m.group(2)).upper()
                + '] = _'
                + subdir.upper()
                + '_'
                + c2u(m.group(3)).upper()
                + ' + _'
                + subdir.upper()
                + '_'
                + c2u(m.group(4)).upper()
            ),
            line,
        )

        line = re.sub(
            r'^\$([a-zA-Z]+)',
            lambda m: '_' + s.upper() + '_' + c2u(m.group(1)).upper(),
            line,
        )

        for _ in range(len(lang_tuple)):
            line = re.sub(r'($[a-zA-Z]+) *\+ *($[a-zA-Z]+)', r'\1\+\2', line)

        line = re.sub(
            r'\$([a-zA-Z]+)',
            lambda m: (
                'L_' + m.group(1).upper()
                if m.group(1) in lang_dict
                else '$' + m.group(1)
            ),
            line,
        )
        line = re.sub(r'\[\"\.\((L_[A-Z_+]+)\)\.\"\]', r'[\1]', line)

        line = re.sub(
            'L_([A-Z]+)', lambda m: str(lang_dict[m.group(1).lower()]), line
        )
        for _ in range(4):
            line = re.sub(
                r'([0-9]+) *\+ *([0-9]+)',
                lambda m: str(int(m.group(1)) + int(m.group(2))),
                line,
            )

        if fn == 'lang':
            if len(line.split(',')) >= 3:
                parts = line.split(',')
                parts[0] = re.sub('/(.+?)/', r'\1', parts[0])
                # parts[1] = re.sub('\$', 'L_', parts[1])
                # parts[1] = re.sub(' *\+ *', '|', parts[1])
                parts[2] = parts[2].title()
                line = ','.join(parts)

        if 'languagenames' in fn:
            line = line.replace('"', "'")
            line = line.replace("','", "', '")
            if line and line[0] == "'":
                line = ' ' * 14 + line

        # fix upstream
        # line = line.replace('ë', 'ü')

        comment = ''
        if '#' in line:
            hashsign = line.find('#')
            comment = line[hashsign:]
            code = line[:hashsign]
        else:
            code = line

        code = code.rstrip()
        comment = comment.strip()
        if not re.match(r'^\s*$', code):
            comment = '  ' + comment

        if '(' in code and ')' in code:
            prefix = code[: code.find('(') + 1]
            suffix = code[code.rfind(')') :]
            tuplecontent = code[len(prefix) : len(code) - len(suffix)]

            elts = tuplecontent.split(',')
            for i in range(len(elts)):
                elts[i] = elts[i].strip()
                if elts[i][0] == '"' and elts[i][-1] == '"':
                    elts[i] = "'" + elts[i][1:-1].replace("'", "\\'") + "'"
            tuplecontent = ', '.join(elts)

            code = prefix + tuplecontent + suffix

        line = code + comment
        line = re.sub('# *', '# ', line)

        if line:
            nl = False
            if array_seen and not (
                line[0] == '_' or line.startswith('BMDATA')
            ):
                line = ' ' * 4 + line
            return line + '\n'
        elif not nl:
            nl = True
            return '\n'
        else:
            return ''

    if len(sys.argv) > 1:
        bmdir = sys.argv[1].rstrip('/') + '/'
    else:
        bmdir = '../../bmpm/'

    outfilename = '../abydos/phonetic/_beider_morse_data.py'
    outfile = codecs.open(outfilename, 'w', 'utf-8')

    outfile.write(
        r'# -*- coding: utf-8 -*-\n\n# Copyright 2014-2018 by \
Christopher C. Little.\n# This file is part of Abydos.\n#\n# This file is \
based on Alexander Beider and Stephen P. Morse\'s implementation\n# of the \
Beider-Morse Phonetic Matching (BMPM) System, available at\n# \
http://stevemorse.org/phonetics/bmpm.htm.\n#\n# Abydos is free software: \
you can redistribute it and/or modify\n# it under the terms of the GNU \
General Public License as published by\n# the Free Software Foundation, \
either version 3 of the License, or\n# (at your option) any later version.\n\
#\n# Abydos is distributed in the hope that it will be useful,\n# but WITHOUT \
ANY WARRANTY; without even the implied warranty of\n# MERCHANTABILITY or \
FITNESS FOR A PARTICULAR PURPOSE. See the\n# GNU General Public License for \
more details.\n#\n# You should have received a copy of the GNU General Public \
License\n# along with Abydos. If not, see <http://www.gnu.org/licenses/>.\n\n\
"""abydos.phonetic._beider_morse_data.\n\nBehind-the-scenes constants, \
rules, etc. for the Beider-Morse Phonentic\nMatching (BMPM) algorithm\n\nDO \
NOT EDIT - This document is automatically generated from the reference\n\
implementation in PHP.\n"""\n# pylint: disable=line-too-long\n\nfrom \
__future__ import (\n    absolute_import,\n    division,\n    print_function,\
    unicode_literals,\n)\n'
    )

    outfile.write('L_NONE = 0\n')
    for i, l in enumerate(lang_tuple):
        outfile.write('L_' + l.upper() + ' = 2**' + str(i) + '\n')
    outfile.write('\n\n')

    tail_text += '\nBMDATA = {}\n'

    subdirs = ('gen', 'sep', 'ash')

    for s in subdirs:
        tail_text += '\nBMDATA[\'' + s + '\'] = {}\n'
        tail_text += 'BMDATA[\'' + s + '\'][\'approx\'] = {}\n'
        tail_text += 'BMDATA[\'' + s + '\'][\'exact\'] = {}\n'
        tail_text += 'BMDATA[\'' + s + '\'][\'rules\'] = {}\n'
        tail_text += 'BMDATA[\'' + s + '\'][\'hebrew\'] = {}\n\n'
        tail_text += (
            'BMDATA[\''
            + s
            + '\'][\'language_rules\'] = _'
            + s.upper()
            + '_LANGUAGE_RULES\n'
        )
        tail_text += (
            'BMDATA[\''
            + s
            + '\'][\'languages\'] = _'
            + s.upper()
            + '_LANGUAGES\n'
        )

        phps = [
            f
            for f in sorted(listdir(bmdir + s + '/'))
            if (isfile(bmdir + s + '/' + f) and f.endswith('.php'))
        ]
        for infilename in phps:
            for pfx in (
                'rules',
                'approx',
                'exact',
                'hebrew',
                'language',
                'lang',
            ):
                if infilename.startswith(pfx):
                    array_seen = False
                    infilepath = bmdir + s + '/' + infilename
                    infileenc = chardet.detect(open(infilepath, 'rb').read())[
                        'encoding'
                    ]
                    print(s + '/' + infilename)  # noqa: T001
                    infile = codecs.open(infilepath, 'r', infileenc)
                    # if infilename.startswith('lang'):
                    #     tuplename = infilename[:-4]
                    # else:
                    #     tuplename = pfx + '_' + infilename[len(pfx) : -4]
                    # indent = len(tuplename) + 21

                    outfile.write('# ' + s + '/' + infilename + '\n')

                    ignore = True
                    for line in infile:
                        if 'function Language' in line:
                            break
                        if not ignore:
                            if re.search(r'\?>', line):
                                ignore = True
                            else:
                                line = pythonize(line, infilename[:-4], s)
                                if line.startswith('BMDATA'):
                                    tail_text += line
                                else:
                                    outfile.write(line)
                        if '*/' in line:
                            ignore = False

                    outfile.write('\n\n')
                    break

    outfile.write(tail_text)

    outfile.close()
    outfilelines = codecs.open(outfilename, 'r', 'utf-8').readlines()
    outfile = codecs.open(outfilename, 'w', 'utf-8')
    nl = False
    fixlanguagesarray = False

    sep_lang = (
        "('any', 'french', 'hebrew', 'italian', 'portuguese', 'spanish')"
    )

    for line in outfilelines:
        line = line.rstrip()
        if line:
            if fixlanguagesarray:
                line = ' ' + line.strip()
                fixlanguagesarray = False
            if len(line) > 79 or sep_lang in line:
                line += '  # noqa: E501'
            outfile.write(line)
            if not line.endswith('='):
                outfile.write('\n')
            else:
                fixlanguagesarray = True
            nl = False
        else:
            if not nl:
                outfile.write('\n')
            nl = True


if __name__ == '__main__':
    _run_script()


1			#!/usr/bin/env python
2			# -- coding: utf-8 --
3
4			# Copyright 2014-2018 by Christopher C. Little.
5			# This file is part of Abydos.
6			#
7			# Abydos is free software: you can redistribute it and/or modify
8			# it under the terms of the GNU General Public License as published by
9			# the Free Software Foundation, either version 3 of the License, or
10			# (at your option) any later version.
11			#
12			# Abydos is distributed in the hope that it will be useful,
13			# but WITHOUT ANY WARRANTY; without even the implied warranty of
14			# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15			# GNU General Public License for more details.
16			#
17			# You should have received a copy of the GNU General Public License
18			# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
19
20
21			"""bm_php2py.py.
22
23			This helper script converts Beider-Morse Phonetic Matching Algorithm (BMPM)
24			code from PHP to Python.
25
26			It assumes that the BMPM code is located at ../../bmpm (relative to this
27			directory in the abydos repository).
28
29			It reads the BMPM reference implementation and generates the file
30			../abydos/_beider_morse_data.py.
31
32			The file _beider_morse.py may still need manual changes to be made after this
33			script is run.
34			"""
35
36			from __future__ import (
37			absolute_import,
38			division,
39			print_function,
40			unicode_literals,
41			)
42
43			import codecs
44			import re
45			import sys
46			from os import listdir
47			from os.path import isfile
48
49			# noinspection PyPackageRequirements
50			import chardet
51
52
53			def _run_script():
54			# The list of languages from BMPM to support (might need to be updated or
55			# tuned as BMPM is updated)
56			lang_tuple = (
57			'any',
58			'arabic',
59			'cyrillic',
60			'czech',
61			'dutch',
62			'english',
63			'french',
64			'german',
65			'greek',
66			'greeklatin',
67			'hebrew',
68			'hungarian',
69			'italian',
70			'latvian',
71			'polish',
72			'portuguese',
73			'romanian',
74			'russian',
75			'spanish',
76			'turkish',
77			)
78
79			lang_dict = {}
80			for i, l in enumerate(lang_tuple):
81			lang_dict[l] = 2 ** i
82			lang_dict['common'] = "'common'"
83
84			nl = False
85			array_seen = False
86
87			tail_text = ''
88
89			def c2u(name):
90			"""Convert camelCase (used in PHP) to Python-standard snake_case.
91
92			Src:
93			https://stackoverflow.com/questions/1175208/elegant-python-function-to-convert-camelcase-to-snake-case
94
95			Parameters
96			----------
97			name: A function or variable name in camelCase
98
99			Returns
100			-------
101			str: The name in snake_case
102
103			"""
104			s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
105			s1 = re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
106			return s1
107
108			def pythonize(line, fn='', subdir='gen'):
109			"""Convert a line of BMPM code from PHP to Python.
110
111			Parameters
112			----------
113			line : str
114			A line of code
115			fn : str
116			A filename
117			subdir : str
118			The file's subdirectory
119
120			Returns
121			-------
122			The code in Python
123
124			"""
125			global nl, array_seen
126
127			if '$all' in line:
128			return ''
129			if 'make the sum of all languages be visible in the function' in line:
130			return ''
131
132			line = line.strip()
133
134			if 'array' in line and not line.startswith('//'):
135			array_seen = True
136
137			line = re.sub('//+', '#', line)
138			# line = re.sub('"\.\((\$.+?)\)\."', r'\1', line)
139			if line and re.search(r'array\("[^"]+?"\)', line):
140			# print("### " + line)
141			line = ''
142			line = line.replace('array', '')
143			line = re.sub(r'^\s*', '', line)
144			line = re.sub(';$', '', line)
145			line = re.sub('^include_.+', '', line)
146
147			line = re.sub(
148			r'\$(approx\|rules\|exact)\[LanguageIndex\("([^"]+)", '
149			+ r'\$languages\)\] = \$([a-zA-Z]+)',
150			lambda m: (
151			"BMDATA['"
152			+ subdir
153			+ "']['"
154			+ m.group(1)
155			+ "'][L_"
156			+ m.group(2).upper()
157			+ '] = _'
158			+ subdir.upper()
159			+ '_'
160			+ c2u(m.group(3)).upper()
161			),
162			line,
163			)
164
165			line = re.sub(
166			r'\$(approx\|rules\|exact\|hebrew)([A-Za-z]+) = _merge'
167			+ r'\(\$([a-zA-Z]+), \$([a-zA-Z]+)\)',
168			lambda m: (
169			"BMDATA['"
170			+ subdir
171			+ "']['"
172			+ m.group(1)
173			+ "'][L_"
174			+ c2u(m.group(2)).upper()
175			+ '] = _'
176			+ subdir.upper()
177			+ '_'
178			+ c2u(m.group(3)).upper()
179			+ ' + _'
180			+ subdir.upper()
181			+ '_'
182			+ c2u(m.group(4)).upper()
183			),
184			line,
185			)
186
187			line = re.sub(
188			r'\$(approx\|rules\|exact)\[LanguageIndex\("([^"]+)", '
189			+ r'\$languages\)\] = _merge\(\$([a-zA-Z]+), \$([a-zA-Z]+)\)',
190			lambda m: (
191			"BMDATA['"
192			+ subdir
193			+ "']['"
194			+ m.group(1)
195			+ "'][L_"
196			+ c2u(m.group(2)).upper()
197			+ '] = _'
198			+ subdir.upper()
199			+ '_'
200			+ c2u(m.group(3)).upper()
201			+ ' + _'
202			+ subdir.upper()
203			+ '_'
204			+ c2u(m.group(4)).upper()
205			),
206			line,
207			)
208
209			line = re.sub(
210			r'^\$([a-zA-Z]+)',
211			lambda m: '_' + s.upper() + '_' + c2u(m.group(1)).upper(),
212			line,
213			)
214
215			for _ in range(len(lang_tuple)):
216			line = re.sub(r'($[a-zA-Z]+) \+ ($[a-zA-Z]+)', r'\1\+\2', line)
217
218			line = re.sub(
219			r'\$([a-zA-Z]+)',
220			lambda m: (
221			'L_' + m.group(1).upper()
222			if m.group(1) in lang_dict
223			else '$' + m.group(1)
224			),
225			line,
226			)
227			line = re.sub(r'\[\"\.\((L_[A-Z_+]+)\)\.\"\]', r'[\1]', line)
228
229			line = re.sub(
230			'L_([A-Z]+)', lambda m: str(lang_dict[m.group(1).lower()]), line
231			)
232			for _ in range(4):
233			line = re.sub(
234			r'([0-9]+) \+ ([0-9]+)',
235			lambda m: str(int(m.group(1)) + int(m.group(2))),
236			line,
237			)
238
239			if fn == 'lang':
240			if len(line.split(',')) >= 3:
241			parts = line.split(',')
242			parts[0] = re.sub('/(.+?)/', r'\1', parts[0])
243			# parts[1] = re.sub('\$', 'L_', parts[1])
244			# parts[1] = re.sub(' \+ ', '\|', parts[1])
245			parts[2] = parts[2].title()
246			line = ','.join(parts)
247
248			if 'languagenames' in fn:
249			line = line.replace('"', "'")
250			line = line.replace("','", "', '")
251			if line and line[0] == "'":
252			line = ' ' * 14 + line
253
254			# fix upstream
255			# line = line.replace('ë', 'ü')
256
257			comment = ''
258			if '#' in line:
259			hashsign = line.find('#')
260			comment = line[hashsign:]
261			code = line[:hashsign]
262			else:
263			code = line
264
265			code = code.rstrip()
266			comment = comment.strip()
267			if not re.match(r'^\s*$', code):
268			comment = ' ' + comment
269
270			if '(' in code and ')' in code:
271			prefix = code[: code.find('(') + 1]
272			suffix = code[code.rfind(')') :]
273			tuplecontent = code[len(prefix) : len(code) - len(suffix)]
274
275			elts = tuplecontent.split(',')
276			for i in range(len(elts)):
277			elts[i] = elts[i].strip()
278			if elts[i][0] == '"' and elts[i][-1] == '"':
279			elts[i] = "'" + elts[i][1:-1].replace("'", "\\'") + "'"
280			tuplecontent = ', '.join(elts)
281
282			code = prefix + tuplecontent + suffix
283
284			line = code + comment
285			line = re.sub('# *', '# ', line)
286
287			if line:
288			nl = False
289			if array_seen and not (
290			line[0] == '_' or line.startswith('BMDATA')
291			):
292			line = ' ' * 4 + line
293			return line + '\n'
294			elif not nl:
295			nl = True
296			return '\n'
297			else:
298			return ''
299
300			if len(sys.argv) > 1:
301			bmdir = sys.argv[1].rstrip('/') + '/'
302			else:
303			bmdir = '../../bmpm/'
304
305			outfilename = '../abydos/phonetic/_beider_morse_data.py'
306			outfile = codecs.open(outfilename, 'w', 'utf-8')
307
308			outfile.write(
309			r'# -- coding: utf-8 --\n\n# Copyright 2014-2018 by \
310			Christopher C. Little.\n# This file is part of Abydos.\n#\n# This file is \
311			based on Alexander Beider and Stephen P. Morse\'s implementation\n# of the \
312			Beider-Morse Phonetic Matching (BMPM) System, available at\n# \
313			http://stevemorse.org/phonetics/bmpm.htm.\n#\n# Abydos is free software: \
314			you can redistribute it and/or modify\n# it under the terms of the GNU \
315			General Public License as published by\n# the Free Software Foundation, \
316			either version 3 of the License, or\n# (at your option) any later version.\n\
317			#\n# Abydos is distributed in the hope that it will be useful,\n# but WITHOUT \
318			ANY WARRANTY; without even the implied warranty of\n# MERCHANTABILITY or \
319			FITNESS FOR A PARTICULAR PURPOSE. See the\n# GNU General Public License for \
320			more details.\n#\n# You should have received a copy of the GNU General Public \
321			License\n# along with Abydos. If not, see <http://www.gnu.org/licenses/>.\n\n\
322			"""abydos.phonetic._beider_morse_data.\n\nBehind-the-scenes constants, \
323			rules, etc. for the Beider-Morse Phonentic\nMatching (BMPM) algorithm\n\nDO \
324			NOT EDIT - This document is automatically generated from the reference\n\
325			implementation in PHP.\n"""\n# pylint: disable=line-too-long\n\nfrom \
326			__future__ import (\n absolute_import,\n division,\n print_function,\
327			unicode_literals,\n)\n'
328			)
329
330			outfile.write('L_NONE = 0\n')
331			for i, l in enumerate(lang_tuple):
332			outfile.write('L_' + l.upper() + ' = 2**' + str(i) + '\n')
333			outfile.write('\n\n')
334
335			tail_text += '\nBMDATA = {}\n'
336
337			subdirs = ('gen', 'sep', 'ash')
338
339			for s in subdirs:
340			tail_text += '\nBMDATA[\'' + s + '\'] = {}\n'
341			tail_text += 'BMDATA[\'' + s + '\'][\'approx\'] = {}\n'
342			tail_text += 'BMDATA[\'' + s + '\'][\'exact\'] = {}\n'
343			tail_text += 'BMDATA[\'' + s + '\'][\'rules\'] = {}\n'
344			tail_text += 'BMDATA[\'' + s + '\'][\'hebrew\'] = {}\n\n'
345			tail_text += (
346			'BMDATA[\''
347			+ s
348			+ '\'][\'language_rules\'] = _'
349			+ s.upper()
350			+ '_LANGUAGE_RULES\n'
351			)
352			tail_text += (
353			'BMDATA[\''
354			+ s
355			+ '\'][\'languages\'] = _'
356			+ s.upper()
357			+ '_LANGUAGES\n'
358			)
359
360			phps = [
361			f
362			for f in sorted(listdir(bmdir + s + '/'))
363			if (isfile(bmdir + s + '/' + f) and f.endswith('.php'))
364			]
365			for infilename in phps:
366			for pfx in (
367			'rules',
368			'approx',
369			'exact',
370			'hebrew',
371			'language',
372			'lang',
373			):
374			if infilename.startswith(pfx):
375			array_seen = False
376			infilepath = bmdir + s + '/' + infilename
377			infileenc = chardet.detect(open(infilepath, 'rb').read())[
378			'encoding'
379			]
380			print(s + '/' + infilename) # noqa: T001
381			infile = codecs.open(infilepath, 'r', infileenc)
382			# if infilename.startswith('lang'):
383			# tuplename = infilename[:-4]
384			# else:
385			# tuplename = pfx + '_' + infilename[len(pfx) : -4]
386			# indent = len(tuplename) + 21
387
388			outfile.write('# ' + s + '/' + infilename + '\n')
389
390			ignore = True
391			for line in infile:
392			if 'function Language' in line:
393			break
394			if not ignore:
395			if re.search(r'\?>', line):
396			ignore = True
397			else:
398			line = pythonize(line, infilename[:-4], s)
399			if line.startswith('BMDATA'):
400			tail_text += line
401			else:
402			outfile.write(line)
403			if '*/' in line:
404			ignore = False
405
406			outfile.write('\n\n')
407			break
408
409			outfile.write(tail_text)
410
411			outfile.close()
412			outfilelines = codecs.open(outfilename, 'r', 'utf-8').readlines()
413			outfile = codecs.open(outfilename, 'w', 'utf-8')
414			nl = False
415			fixlanguagesarray = False
416
417			sep_lang = (
418			"('any', 'french', 'hebrew', 'italian', 'portuguese', 'spanish')"
419			)
420
421			for line in outfilelines:
422			line = line.rstrip()
423			if line:
424			if fixlanguagesarray:
425			line = ' ' + line.strip()
426			fixlanguagesarray = False
427			if len(line) > 79 or sep_lang in line:
428			line += ' # noqa: E501'
429			outfile.write(line)
430			if not line.endswith('='):
431			outfile.write('\n')
432			else:
433			fixlanguagesarray = True
434			nl = False
435			else:
436			if not nl:
437			outfile.write('\n')
438			nl = True
439
440
441			if __name__ == '__main__':
442			_run_script()
443

chrislit / abydos

Pull Request — master (#141)

bm_php2py._run_script() F

Complexity

Size

Duplication

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like