GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.

MultiFormatter.await_all_processes()   B
last analyzed

Complexity

Conditions 6

Size

Total Lines 11

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 6
c 1
b 0
f 0
dl 0
loc 11
rs 8
1
# -*- coding: utf-8 -*-
2
# -----------------------------------------------------------------------------
3
# Copyright (c) 2016 Continuum Analytics, Inc.
4
#
5
# Licensed under the terms of the MIT License
6
# (see LICENSE.txt for details)
7
# -----------------------------------------------------------------------------
8
"""Generic and custom code formatters."""
9
10
# Standard library imports
11
import codecs
12
import json
13
import os
14
import platform
15
import re
16
import subprocess
17
import sys
18
19
# Third party imports
20
from yapf.yapflib.yapf_api import FormatCode
21
import autopep8
22
import isort
23
24
# Local imports
25
from ciocheck.config import DEFAULT_COPYRIGHT_HEADER
26
from ciocheck.tools import Tool
27
from ciocheck.utils import atomic_replace, cpu_count, diff
28
29
HERE = os.path.dirname(os.path.realpath(__file__))
30
31
32
class Formatter(Tool):
33
    """Generic formatter tool."""
34
35
    @classmethod
36
    def format_task(cls, path):
37
        """Forma trask executed by paralell script helper."""
38
        changed = False
39
        old_contents, new_contents = '', ''
40
        error = None
41
        try:
42
            old_contents, new_contents, encoding = cls.format_file(path)
43
            changed = new_contents != old_contents
44
        except Exception as err:
45
            error = "{name} crashed on {path}: {error}".format(
46
                name=cls.name, path=path, error=err)
47
48
        if changed:
49
            result = {
50
                'path': path,
51
                'error': error,
52
                #            'new-contents': new_contents,
53
                #            'old-contents': old_contents,
54
                'diff': diff(old_contents, new_contents),
55
                'created': False,  # pyformat might create new init files.
56
            }
57
            atomic_replace(path, new_contents, encoding)
58
        else:
59
            return {}
60
61
        return result
62
63
    @classmethod
64
    def format_string(cls, old_contents):
65
        """Format content of a file."""
66
        raise NotImplementedError
67
68
    @classmethod
69
    def format_file(cls, path):
70
        """Format file for use with task queue."""
71
        with open(path, 'r') as file_obj:
72
            old_contents = file_obj.read()
73
        return cls.format_string(old_contents)
74
75
    def run(self, paths):
76
        """Format paths."""
77
        raise NotImplementedError
78
79
80
class IsortFormatter(Formatter):
81
    """Isort code formatter."""
82
83
    language = 'python'
84
    name = 'isort'
85
    extensions = ('py', )
86
87
    # Config
88
    config_file = '.isort.cfg'
89
    config_sections = [('isort', 'settings')]
90
91
    def run(self, paths):
92
        """Format paths."""
93
        pass
94
95
    @classmethod
96
    def format_string(cls, old_contents):
97
        """Format content of a file."""
98
        new_contents = isort.SortImports(file_contents=old_contents).output
99
        return old_contents, new_contents, 'utf-8'
100
101
102
class YapfFormatter(Formatter):
103
    """Yapf code formatter."""
104
105
    language = 'python'
106
    name = 'yapf'
107
    extensions = ('py', )
108
109
    # Config
110
    config_file = '.style.yapf'
111
    config_sections = [('yapf:style', 'style')]
112
113
    def run(self, paths):
114
        """Format paths."""
115
        pass
116
117
    @classmethod
118
    def format_string(cls, old_contents):
119
        """Format file for use with task queue."""
120
        # cmd_root is assigned to formatter inside format_task... ugly!
121
        style_config = os.path.join(cls.cmd_root, cls.config_file)
122
        # It might be tempting to use the "inplace" option to FormatFile, but
123
        # it doesn't do an atomic replace, which is dangerous, so don't use
124
        # it unless you submit a fix to yapf.
125
        (new_contents, changed) = FormatCode(
126
            old_contents, style_config=style_config)
127
128
        if platform.system() == 'Windows':
129
            # yapf screws up line endings on windows
130
            new_contents = new_contents.replace("\r\n", "\n")
131
132
            if len(old_contents) == 0:
133
                # Windows yapf seems to force a newline? I dunno
134
                new_contents = ""
135
        return old_contents, new_contents, 'utf-8'
136
137
138
class Autopep8Formatter(Formatter):
139
    """Autopep8 code formatter."""
140
141
    language = 'python'
142
    name = 'autopep8'
143
    extensions = ('py', )
144
145
    # Config
146
    config_file = '.autopep8'
147
    config_sections = [('autopep8', 'pep8')]
148
149
    def run(self, paths):
150
        """Format paths."""
151
        pass
152
153
    @classmethod
154
    def format_string(cls, old_contents):
155
        """Format file for use with task queue."""
156
        config_options = cls.make_config_dictionary()
157
        config_options = {}
158
        new_contents = autopep8.fix_code(old_contents, options=config_options)
159
        return old_contents, new_contents, 'utf-8'
160
161
162
class MultiFormatter(object):
163
    """Formatter handling multiple formatters in parallel."""
164
165
    language = 'generic'
166
    name = 'multiformatter'
167
168
    def __init__(self, cmd_root, check):
169
        """Formatter handling multiple formatters in parallel."""
170
        self.cmd_root = cmd_root
171
        self.check = check
172
173
    def _format_files(self, paths):
174
        """Helper method to start a seaparate subprocess."""
175
        cmd = [sys.executable, os.path.join(HERE, 'format_task.py')]
176
        env = os.environ.copy()
177
        env['CIOCHECK_PROJECT_ROOT'] = self.cmd_root
178
        env['CIOCHECK_CHECK'] = str(self.check)
179
        proc = subprocess.Popen(
180
            cmd + paths,
181
            env=env,
182
            stdout=subprocess.PIPE,
183
            stderr=subprocess.PIPE)
184
        return proc
185
186
    def _format_results(self, results):
187
        """Rearrange results for standard consumption."""
188
        new_results = {}
189
        for item in results:
190
            for key, value in item.items():
191
                if key not in new_results:
192
                    new_results[key] = []
193
                new_results[key].append(value)
194
195
        # Sort by path
196
        for key, values in new_results.items():
197
            new_results[key] = sorted(values, key=lambda dic: dic['path'])
198
        return new_results
199
200
    @property
201
    def extensions(self):
202
        """Return all extensions of the used multiformatters."""
203
        all_extensions = []
204
        for formatter in MULTI_FORMATTERS:
205
            all_extensions += list(formatter.extensions)
206
        return all_extensions
207
208
    def run(self, paths):
209
        """
210
        Run formatters.
211
212
        This uses some silly multi-process stuff because Yapf is very slow and
213
        CPU-bound.
214
215
        Not using a multiprocessing because not sure how its "magic" (pickling,
216
        __main__ import) really works.
217
        """
218
        processes = []
219
        if isinstance(paths, dict):
220
            paths = list(sorted(paths.keys()))
221
222
        def await_one_process():
223
            """Wait for one process and parse output."""
224
            if processes:
225
                # We pop(0) because the first process is the oldest
226
                proc = processes.pop(0)
227
                output, error = proc.communicate()
228
229
                if isinstance(output, bytes):
230
                    output = output.decode()
231
232
                if isinstance(error, bytes):
233
                    error = error.decode()
234
235
                output = json.loads(output)
236
                if error:
237
                    print(error)
238
239
                return output, error
240
241
        def await_all_processes():
242
            """Wait for all processes."""
243
            results = []
244
            while processes:
245
                output, error = await_one_process()
246
                output = [o for o in output if o]
247
                if output:
248
                    results += output
249
                if error:
250
                    print(error)
251
            return results
252
253
        def take_n(items, amount):
254
            """Take n items to pass to the processes."""
255
            result = []
256
            while amount > 0 and items:
257
                result.append(items.pop(0))  # Keep order
258
                amount = amount - 1
259
            return list(sorted(result))
260
261
        while paths:
262
            # We send a few files to each process to try to reduce per-process
263
            # setup time
264
            some_files = take_n(paths, 3)
265
            processes.append(self._format_files(some_files))
266
267
            # Don't run too many at once, this is a goofy algorithm
268
            if len(processes) > (cpu_count() * 3):
269
                while len(processes) > cpu_count():
270
                    await_one_process()
271
272
        assert [] == paths
273
        results = await_all_processes()
274
        results = self._format_results(results)
275
        assert [] == processes
276
        return results
277
278
279
class PythonFormatter(Formatter):
280
    """Handle __init__.py addition and headers (copyright and encoding)."""
281
282
    language = 'python'
283
    name = 'pyformat'
284
    extensions = ('py', )
285
286
    COPYRIGHT_RE = re.compile('# *Copyright ')
287
288
    def __init__(self, cmd_root):
289
        """Handle __init__.py addition and headers (copyright and encoding)."""
290
        super(PythonFormatter, self).__init__(cmd_root)
291
        self.config = None
292
        self.copyright_header = None
293
        self.encoding_header = None
294
295
    def _setup_headers(self):
296
        """Load custom encoding and copyright headers if defined."""
297
        self.encoding_header = self.config.get_value('header')
298
299
        copyright_file = self.config.get_value('copyright_file')
300
        copyright_path = os.path.join(self.cmd_root, copyright_file)
301
        if os.path.isfile(copyright_path):
302
            with open(copyright_path, 'r') as file_obj:
303
                self.copyright_header = file_obj.read()
304
        else:
305
            self.copyright_header = DEFAULT_COPYRIGHT_HEADER
306
307
    def _add_headers(self, path, header, copy):
308
        """Add headers as needed in file."""
309
        with codecs.open(path, 'r', 'utf-8') as file_obj:
310
            old_contents = file_obj.read()
311
312
        have_encoding = (self.encoding_header in old_contents)
313
        have_copyright = (self.COPYRIGHT_RE.search(old_contents) is not None)
314
315
        if have_encoding and have_copyright:
316
            return {}
317
318
        # Note: do NOT automatically change the copyright owner or date. The
319
        # copyright owner/date is a statement of legal reality, not a way to
320
        # create legal reality. All we do here is add an owner/date if there
321
        # is none; if it's incorrect, the person creating/reviewing the pull
322
        # request will need to fix it. If there's already an owner/date then
323
        # we leave it as-is assuming someone has manually chosen it.
324
        contents = ''
325
        if have_encoding and not have_copyright:
326
            # Remove the header from old content so that it is positioned
327
            # correctly
328
            lines = old_contents.splitlines(True)
329
            # FIXME: Is this safe on win and linux?
330
            lines = [l for l in lines if self.encoding_header not in l]
331
            old_contents = ''.join(lines)
332
            contents = self.encoding_header
333
334
        if not have_encoding and header:
335
            contents += self.encoding_header
336
337
        if not have_copyright and copy:
338
            contents += self.copyright_header
339
        new_contents = contents + old_contents
340
        if new_contents != old_contents:
341
            results = {
342
                'path': path,
343
                'diff': diff(old_contents, new_contents),
344
                'created': False,
345
                'error': None,
346
                'added-copy': not have_encoding and header,
347
                'added-header': not have_copyright and copy,
348
            }
349
            atomic_replace(path, new_contents, 'utf-8')
350
        else:
351
            results = {}
352
        return results
353
354
    def _add_missing_init_py(self, paths):
355
        """Add missing __init__.py files in the module subdirectories."""
356
        results = []
357
        folders = [os.path.dirname(p) for p in paths]
358
359
        # Avoid adding an init on repo level if setup.py or other script on the
360
        # top level has changed
361
        if self.cmd_root in folders:
362
            folders.remove(self.cmd_root)
363
364
        for folder in folders:
365
            init_py = os.path.join(folder, "__init__.py")
366
            exists = os.path.exists(init_py)
367
            if not exists:
368
                with codecs.open(init_py, 'w', 'utf-8') as handle:
369
                    handle.flush()
370
                result = {
371
                    'path': init_py,
372
                    'created': not exists,
373
                    'diff': diff('', ''),
374
                    'error': None,
375
                }
376
                results.append(result)
377
        return results
378
379
    def format_string(self, string):
380
        """Format content of a file."""
381
        pass
382
383
    def run(self, paths):
384
        """Run pyformat formatter."""
385
        paths = list(sorted([p for p in paths]))
386
        add_copyright = self.config.get_value('add_copyright')
387
        add_header = self.config.get_value('add_header')
388
        add_init = self.config.get_value('add_init')
389
390
        results_init = []
391
        if add_init:
392
            results_init = self._add_missing_init_py(paths)
393
            new_paths = [item['path'] for item in results_init]
394
            paths += new_paths
395
            paths = list(sorted(paths))
396
397
        results_header_copyright = []
398
        if add_header or add_copyright:
399
            self._setup_headers()
400
            for path in paths:
401
                result = self._add_headers(
402
                    path, header=add_header, copy=add_copyright)
403
                if result:
404
                    results_header_copyright.append(result)
405
406
        for result in results_header_copyright:
407
            path = result['path']
408
            res = [item for item in results_init]
409
410
            if res:
411
                result['created'] = res[0]['created']
412
413
        if add_copyright or add_header:
414
            results = results_header_copyright
415
        elif add_init:
416
            results = results_init
417
        else:
418
            results = []
419
        return results
420
421
422
MULTI_FORMATTERS = [
423
    IsortFormatter,
424
    YapfFormatter,
425
    Autopep8Formatter,
426
]
427
FORMATTERS = [
428
    PythonFormatter,
429
    IsortFormatter,
430
    YapfFormatter,
431
    Autopep8Formatter,
432
]
433
434
435
def test():
436
    """Main local test."""
437
    pass
438
439
440
if __name__ == '__main__':
441
    test()
442