Completed
Push — master ( 892a3d...ce0b1b )
by Gonzalo
8s
created

PythonFormatter   B

Complexity

Total Complexity 38

Size/Duplication

Total Lines 141
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 141
rs 8.3999
wmc 38

6 Methods

Rating   Name   Duplication   Size   Complexity  
A _setup_headers() 0 11 3
A __init__() 0 6 1
B _add_missing_init_py() 0 24 6
F _add_headers() 0 46 13
F run() 0 37 14
A format_string() 0 3 1
1
# -*- coding: utf-8 -*-
2
# -----------------------------------------------------------------------------
3
# Copyright (c) 2016 Continuum Analytics, Inc.
4
#
5
# Licensed under the terms of the MIT License
6
# (see LICENSE.txt for details)
7
# -----------------------------------------------------------------------------
8
"""Generic and custom code formatters."""
9
10
# Standard library imports
11
import codecs
12
import json
13
import os
14
import platform
15
import re
16
import subprocess
17
import sys
18
19
# Third party imports
20
from yapf.yapflib.yapf_api import FormatCode
21
import autopep8
22
import isort
23
24
# Local imports
25
from ciocheck.config import DEFAULT_COPYRIGHT_HEADER
26
from ciocheck.tools import Tool
27
from ciocheck.utils import atomic_replace, cpu_count, diff
28
29
HERE = os.path.dirname(os.path.realpath(__file__))
30
31
32
class Formatter(Tool):
33
    """Generic formatter tool."""
34
35
    @classmethod
36
    def format_task(cls, path):
37
        """Forma trask executed by paralell script helper."""
38
        changed = False
39
        old_contents, new_contents = '', ''
40
        error = None
41
        try:
42
            old_contents, new_contents, encoding = cls.format_file(path)
43
            changed = new_contents != old_contents
44
        except Exception as err:
45
            error = "{name} crashed on {path}: {error}".format(
46
                name=cls.name, path=path, error=err)
47
48
        if changed:
49
            result = {
50
                'path': path,
51
                'error': error,
52
                #            'new-contents': new_contents,
53
                #            'old-contents': old_contents,
54
                'diff': diff(old_contents, new_contents),
55
                'created': False,  # pyformat might create new init files.
56
            }
57
            atomic_replace(path, new_contents, encoding)
58
        else:
59
            return {}
60
61
        return result
62
63
    @classmethod
64
    def format_string(cls, old_contents):
65
        """Format content of a file."""
66
        raise NotImplementedError
67
68
    @classmethod
69
    def format_file(cls, path):
70
        """Format file for use with task queue."""
71
        with open(path, 'r') as file_obj:
72
            old_contents = file_obj.read()
73
        return cls.format_string(old_contents)
74
75
    def run(self, paths):
76
        """Format paths."""
77
        raise NotImplementedError
78
79
80
class IsortFormatter(Formatter):
81
    """Isort code formatter."""
82
83
    language = 'python'
84
    name = 'isort'
85
    extensions = ('py', )
86
87
    # Config
88
    config_file = '.isort.cfg'
89
    config_sections = [('isort', 'settings')]
90
91
    def run(self, paths):
92
        """Format paths."""
93
        pass
94
95
    @classmethod
96
    def format_string(cls, old_contents):
97
        """Format content of a file."""
98
        new_contents = isort.SortImports(file_contents=old_contents).output
99
        return old_contents, new_contents, 'utf-8'
100
101
102
class YapfFormatter(Formatter):
103
    """Yapf code formatter."""
104
105
    language = 'python'
106
    name = 'yapf'
107
    extensions = ('py', )
108
109
    # Config
110
    config_file = '.style.yapf'
111
    config_sections = [('yapf:style', 'style')]
112
113
    def run(self, paths):
114
        """Format paths."""
115
        pass
116
117
    @classmethod
118
    def format_string(cls, old_contents):
119
        """Format file for use with task queue."""
120
        # cmd_root is assigned to formatter inside format_task... ugly!
121
        style_config = os.path.join(cls.cmd_root, cls.config_file)
122
        # It might be tempting to use the "inplace" option to FormatFile, but
123
        # it doesn't do an atomic replace, which is dangerous, so don't use
124
        # it unless you submit a fix to yapf.
125
        (new_contents, changed) = FormatCode(
126
            old_contents, style_config=style_config)
127
128
        if platform.system() == 'Windows':
129
            # yapf screws up line endings on windows
130
            new_contents = new_contents.replace("\r\n", "\n")
131
132
            if len(old_contents) == 0:
133
                # Windows yapf seems to force a newline? I dunno
134
                new_contents = ""
135
        return old_contents, new_contents, 'utf-8'
136
137
138
class Autopep8Formatter(Formatter):
139
    """Autopep8 code formatter."""
140
141
    language = 'python'
142
    name = 'autopep8'
143
    extensions = ('py', )
144
145
    # Config
146
    config_file = '.autopep8'
147
    config_sections = [('autopep8', 'pep8')]
148
149
    def run(self, paths):
150
        """Format paths."""
151
        pass
152
153
    @classmethod
154
    def format_string(cls, old_contents):
155
        """Format file for use with task queue."""
156
        config_options = cls.make_config_dictionary()
157
        config_options = {}
158
        new_contents = autopep8.fix_code(old_contents, options=config_options)
159
        return old_contents, new_contents, 'utf-8'
160
161
162
class MultiFormatter(object):
163
    """Formatter handling multiple formatters in parallel."""
164
165
    language = 'generic'
166
    name = 'multiformatter'
167
168
    def __init__(self, cmd_root, check):
169
        """Formatter handling multiple formatters in parallel."""
170
        self.cmd_root = cmd_root
171
        self.check = check
172
173
    def _format_files(self, paths):
174
        """Helper method to start a seaparate subprocess."""
175
        cmd = [sys.executable, os.path.join(HERE, 'format_task.py')]
176
        env = os.environ.copy()
177
        env['CIOCHECK_PROJECT_ROOT'] = self.cmd_root
178
        env['CIOCHECK_CHECK'] = str(self.check)
179
        proc = subprocess.Popen(
180
            cmd + paths,
181
            env=env,
182
            stdout=subprocess.PIPE,
183
            stderr=subprocess.PIPE)
184
        return proc
185
186
    def _format_results(self, results):
187
        """Rearrange results for standard consumption."""
188
        new_results = {}
189
        for item in results:
190
            for key, value in item.items():
191
                if key not in new_results:
192
                    new_results[key] = []
193
                new_results[key].append(value)
194
195
        # Sort by path
196
        for key, values in new_results.items():
197
            new_results[key] = sorted(values, key=lambda dic: dic['path'])
198
        return new_results
199
200
    @property
201
    def extensions(self):
202
        """Return all extensions of the used multiformatters."""
203
        all_extensions = []
204
        for formatter in MULTI_FORMATTERS:
205
            all_extensions += list(formatter.extensions)
206
        return all_extensions
207
208
    def run(self, paths):
209
        """
210
        Run formatters.
211
212
        This uses some silly multi-process stuff because Yapf is very slow and
213
        CPU-bound.
214
215
        Not using a multiprocessing because not sure how its "magic" (pickling,
216
        __main__ import) really works.
217
        """
218
        processes = []
219
        if isinstance(paths, dict):
220
            paths = list(sorted(paths.keys()))
221
222
        def await_one_process():
223
            """Wait for one process and parse output."""
224
            if processes:
225
                # We pop(0) because the first process is the oldest
226
                proc = processes.pop(0)
227
                output, error = proc.communicate()
228
229
                if isinstance(output, bytes):
230
                    output = output.decode()
231
232
                if isinstance(error, bytes):
233
                    error = error.decode()
234
235
                output = json.loads(output)
236
                if error:
237
                    print(error)
238
239
                return output, error
240
241
        def await_all_processes():
242
            """Wait for all processes."""
243
            results = []
244
            while processes:
245
                output, error = await_one_process()
246
                output = [o for o in output if o]
247
                if output:
248
                    results += output
249
                if error:
250
                    print(error)
251
            return results
252
253
        def take_n(items, amount):
254
            """Take n items to pass to the processes."""
255
            result = []
256
            while amount > 0 and items:
257
                result.append(items.pop(0))  # Keep order
258
                amount = amount - 1
259
            return list(sorted(result))
260
261
        while paths:
262
            # We send a few files to each process to try to reduce per-process
263
            # setup time
264
            some_files = take_n(paths, 3)
265
            processes.append(self._format_files(some_files))
266
267
            # Don't run too many at once, this is a goofy algorithm
268
            if len(processes) > (cpu_count() * 3):
269
                while len(processes) > cpu_count():
270
                    await_one_process()
271
272
        assert [] == paths
273
        results = await_all_processes()
274
        results = self._format_results(results)
275
        assert [] == processes
276
        return results
277
278
279
class PythonFormatter(Formatter):
280
    """Handle __init__.py addition and headers (copyright and encoding)."""
281
282
    language = 'python'
283
    name = 'pyformat'
284
    extensions = ('py', )
285
286
    COPYRIGHT_RE = re.compile('# *Copyright ')
287
288
    def __init__(self, cmd_root):
289
        """Handle __init__.py addition and headers (copyright and encoding)."""
290
        super(PythonFormatter, self).__init__(cmd_root)
291
        self.config = None
292
        self.copyright_header = None
293
        self.encoding_header = None
294
295
    def _setup_headers(self):
296
        """Load custom encoding and copyright headers if defined."""
297
        self.encoding_header = self.config.get_value('header')
298
299
        copyright_file = self.config.get_value('copyright_file')
300
        copyright_path = os.path.join(self.cmd_root, copyright_file)
301
        if os.path.isfile(copyright_path):
302
            with open(copyright_path, 'r') as file_obj:
303
                self.copyright_header = file_obj.read()
304
        else:
305
            self.copyright_header = DEFAULT_COPYRIGHT_HEADER
306
307
    def _add_headers(self, path, header, copy):
308
        """Add headers as needed in file."""
309
        with codecs.open(path, 'r', 'utf-8') as file_obj:
310
            old_contents = file_obj.read()
311
312
        have_encoding = (self.encoding_header in old_contents)
313
        have_copyright = (self.COPYRIGHT_RE.search(old_contents) is not None)
314
315
        if have_encoding and have_copyright:
316
            return {}
317
318
        # Note: do NOT automatically change the copyright owner or date. The
319
        # copyright owner/date is a statement of legal reality, not a way to
320
        # create legal reality. All we do here is add an owner/date if there
321
        # is none; if it's incorrect, the person creating/reviewing the pull
322
        # request will need to fix it. If there's already an owner/date then
323
        # we leave it as-is assuming someone has manually chosen it.
324
        contents = ''
325
        if have_encoding and not have_copyright:
326
            # Remove the header from old content so that it is positioned
327
            # correctly
328
            lines = old_contents.splitlines(True)
329
            # FIXME: Is this safe on win and linux?
330
            lines = [l for l in lines if self.encoding_header not in l]
331
            old_contents = ''.join(lines)
332
            contents = self.encoding_header
333
334
        if not have_encoding and header:
335
            contents += self.encoding_header
336
337
        if not have_copyright and copy:
338
            contents += self.copyright_header
339
        new_contents = contents + old_contents
340
        if new_contents != old_contents:
341
            results = {
342
                'path': path,
343
                'diff': diff(old_contents, new_contents),
344
                'created': False,
345
                'error': None,
346
                'added-copy': not have_encoding and header,
347
                'added-header': not have_copyright and copy,
348
            }
349
            atomic_replace(path, new_contents, 'utf-8')
350
        else:
351
            results = {}
352
        return results
353
354
    def _add_missing_init_py(self, paths):
355
        """Add missing __init__.py files in the module subdirectories."""
356
        results = []
357
        folders = [os.path.dirname(p) for p in paths]
358
359
        # Avoid adding an init on repo level if setup.py or other script on the
360
        # top level has changed
361
        if self.cmd_root in folders:
362
            folders.remove(self.cmd_root)
363
364
        for folder in folders:
365
            init_py = os.path.join(folder, "__init__.py")
366
            exists = os.path.exists(init_py)
367
            if not exists:
368
                with codecs.open(init_py, 'w', 'utf-8') as handle:
369
                    handle.flush()
370
                result = {
371
                    'path': init_py,
372
                    'created': not exists,
373
                    'diff': diff('', ''),
374
                    'error': None,
375
                }
376
                results.append(result)
377
        return results
378
379
    def format_string(self, string):
380
        """Format content of a file."""
381
        pass
382
383
    def run(self, paths):
384
        """Run pyformat formatter."""
385
        paths = list(sorted([p for p in paths]))
386
        add_copyright = self.config.get_value('add_copyright')
387
        add_header = self.config.get_value('add_header')
388
        add_init = self.config.get_value('add_init')
389
390
        results_init = []
391
        if add_init:
392
            results_init = self._add_missing_init_py(paths)
393
            new_paths = [item['path'] for item in results_init]
394
            paths += new_paths
395
            paths = list(sorted(paths))
396
397
        results_header_copyright = []
398
        if add_header or add_copyright:
399
            self._setup_headers()
400
            for path in paths:
401
                result = self._add_headers(
402
                    path, header=add_header, copy=add_copyright)
403
                if result:
404
                    results_header_copyright.append(result)
405
406
        for result in results_header_copyright:
407
            path = result['path']
408
            res = [item for item in results_init]
409
410
            if res:
411
                result['created'] = res[0]['created']
412
413
        if add_copyright or add_header:
414
            results = results_header_copyright
415
        elif add_init:
416
            results = results_init
417
        else:
418
            results = []
419
        return results
420
421
422
MULTI_FORMATTERS = [
423
    IsortFormatter,
424
    YapfFormatter,
425
    Autopep8Formatter,
426
]
427
FORMATTERS = [
428
    PythonFormatter,
429
    IsortFormatter,
430
    YapfFormatter,
431
    Autopep8Formatter,
432
]
433
434
435
def test():
436
    """Main local test."""
437
    pass
438
439
440
if __name__ == '__main__':
441
    test()
442