TravisData   F
last analyzed

Complexity

Total Complexity 70

Size/Duplication

Total Lines 429
Duplicated Lines 0 %

Importance

Changes 3
Bugs 0 Features 0
Metric Value
c 3
b 0
f 0
dl 0
loc 429
rs 2.7272
wmc 70

18 Methods

Rating   Name   Duplication   Size   Complexity  
A __init__() 0 22 2
A parse_job_log_file() 0 17 3
A parse_travis_worker_tag() 0 22 3
A get_build_data() 0 23 2
C set_build_matrix() 0 63 8
A parse_job_log() 0 8 1
A get_job_data() 0 18 1
D parse_travis_time_tag() 0 34 8
B process_build_job() 0 24 2
A has_timing_tags() 0 13 3
B parse_job_log_stream() 0 20 6
A get_started_at() 0 6 2
D get_substage_name() 0 34 8
B process_pull_request_data() 0 28 6
B process_job_data() 0 42 2
A get_finished_at() 0 6 2
B get_job_duration() 0 11 5
B process_build_jobs() 0 16 6

How to fix   Complexity   

Complex Class

Complex classes like TravisData often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# vim: set expandtab sw=4 ts=4:
2
"""
3
Functions and classes to retrieve and parse Travis CI build data.
4
5
Copyright (C) 2014-2016 Dieter Adriaenssens <[email protected]>
6
7
This file is part of buildtimetrend/python-lib
8
<https://github.com/buildtimetrend/python-lib/>
9
10
This program is free software: you can redistribute it and/or modify
11
it under the terms of the GNU Affero General Public License as published by
12
the Free Software Foundation, either version 3 of the License, or
13
any later version.
14
15
This program is distributed in the hope that it will be useful,
16
but WITHOUT ANY WARRANTY; without even the implied warranty of
17
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
GNU Affero General Public License for more details.
19
20
You should have received a copy of the GNU Affero General Public License
21
along with this program. If not, see <http://www.gnu.org/licenses/>.
22
"""
23
from builtins import str
24
from builtins import object
25
import re
26
import json
27
from buildtimetrend import logger
28
from buildtimetrend import tools
29
from buildtimetrend.buildjob import BuildJob
30
from buildtimetrend.collection import Collection
31
from buildtimetrend.travis.connector import TravisOrgConnector
32
from buildtimetrend.travis.connector import TravisConnector
33
from buildtimetrend.travis.substage import TravisSubstage
34
try:
35
    # For Python 3.0 and later
36
    from urllib.error import HTTPError, URLError
37
except ImportError:
38
    # Fall back to Python 2's urllib2
39
    from urllib2 import HTTPError, URLError
40
41
42
# strings to parse timestamps in Travis CI log file
43
TRAVIS_LOG_PARSE_TIMING_STRINGS = [
44
    r'travis_time:end:(?P<end_hash>.*):start=(?P<start_timestamp>\d+),'
45
    r'finish=(?P<finish_timestamp>\d+),duration=(?P<duration>\d+)\x0d\x1b',
46
    r'travis_fold:end:(?P<end_stage>\w+)\.(?P<end_substage>\d+)\x0d\x1b',
47
    r'travis_fold:start:(?P<start_stage>\w+)\.(?P<start_substage>\d+)\x0d\x1b',
48
    r'travis_time:start:(?P<start_hash>.*)\x0d\x1b\[0K',
49
    r'\$\ (?P<command>.*)\r',
50
]
51
TRAVIS_LOG_PARSE_WORKER_STRING = r'Using worker:\ (?P<hostname>.*):(?P<os>.*)'
52
53
54
class TravisData(object):
55
56
    """Gather data from Travis CI using the API."""
57
58
    def __init__(self, repo, build_id, connector=None):
59
        """
60
        Retrieve Travis CI build data using the API.
61
62
        Parameters:
63
        - repo : github repository slug (fe. buildtimetrend/python-lib)
64
        - build_id : Travis CI build id (fe. 158)
65
        - connector : Travis Connector instance
66
        """
67
        self.builds_data = {}
68
        self.build_jobs = {}
69
        self.current_build_data = {}
70
        self.current_job = BuildJob()
71
        self.travis_substage = None
72
        self.repo = repo
73
        self.build_id = str(build_id)
74
        # set TravisConnector if it is defined
75
        if isinstance(connector, TravisConnector):
76
            self.connector = connector
77
        # use Travis Org connector by default
78
        else:
79
            self.connector = TravisOrgConnector()
80
81
    def get_build_data(self):
82
        """
83
        Retrieve Travis CI build data.
84
85
        Returns true if retrieving data was succesful, false on error.
86
        """
87
        request = 'repos/{repo}/builds?number={build_id}'.format(
88
            repo=self.repo, build_id=self.build_id
89
        )
90
        try:
91
            self.builds_data = self.connector.json_request(request)
92
        except (HTTPError, URLError) as msg:
93
            logger.error("Error getting build data from Travis CI: %s", msg)
94
            return False
95
96
        # log builds_data
97
        logger.debug(
98
            "Build #%s data : %s",
99
            str(self.build_id),
100
            json.dumps(self.builds_data, sort_keys=True, indent=2)
101
        )
102
103
        return True
104
105
    def get_substage_name(self, command):
106
        """
107
        Resolve Travis CI substage name that corresponds to a cli command.
108
109
        Parameters:
110
        - command : cli command
111
        """
112
        if not tools.is_string(command):
113
            return ""
114
115
        if len(self.current_build_data) > 0 and \
116
                "config" in self.current_build_data:
117
            build_config = self.current_build_data["config"]
118
        else:
119
            logger.warning(
120
                "Travis CI build config is not set"
121
            )
122
            return ""
123
124
        # check if build_config collection is empty
125
        if build_config:
126
            for stage_name, commands in build_config.items():
127
                if tools.is_list(commands) and command in commands:
128
                    substage_number = commands.index(command) + 1
129
                    substage_name = "{stage}.{substage:d}".format(
130
                        stage=stage_name, substage=substage_number
131
                    )
132
                    logger.debug(
133
                        "Substage %s corresponds to '%s'",
134
                        substage_name, command
135
                    )
136
                    return substage_name
137
138
        return ""
139
140
    def process_build_jobs(self):
141
        """
142
        Retrieve Travis CI build job data.
143
144
        Method is a generator, iterate result to get each processed build job.
145
        """
146
        if len(self.builds_data) > 0 and "builds" in self.builds_data:
147
            for build in self.builds_data['builds']:
148
                self.current_build_data = build
149
150
                if "job_ids" in build:
151
                    for job_id in build['job_ids']:
152
                        yield self.process_build_job(job_id)
153
154
            # reset current_build_data after builds are processed
155
            self.current_build_data = {}
156
157
    def process_build_job(self, job_id):
158
        """
159
        Retrieve Travis CI build job data.
160
161
        Parameters:
162
        - job_id : ID of the job to process
163
        """
164
        if job_id is None:
165
            return None
166
167
        # retrieve job data from Travis CI
168
        job_data = self.get_job_data(job_id)
169
        # process build/job data
170
        self.process_job_data(job_data)
171
        # parse Travis CI job log file
172
        self.parse_job_log(job_id)
173
174
        # store build job
175
        self.build_jobs[str(job_id)] = self.current_job
176
        # create new build job instance
177
        self.current_job = BuildJob()
178
179
        # return processed build job
180
        return self.build_jobs[str(job_id)]
181
182
    def get_job_data(self, job_id):
183
        """
184
        Retrieve Travis CI job data.
185
186
        Parameters:
187
        - job_id : ID of the job to process
188
        """
189
        request = 'jobs/{:s}'.format(str(job_id))
190
        job_data = self.connector.json_request(request)
191
192
        # log job_data
193
        logger.debug(
194
            "Job #%s data : %s",
195
            str(job_id),
196
            json.dumps(job_data, sort_keys=True, indent=2)
197
        )
198
199
        return job_data
200
201
    def process_job_data(self, job_data):
202
        """
203
        Process Job/build data.
204
205
        Set build/job properties :
206
        - Build/job ID
207
        - build result : passed, failed, errored
208
        - git repo
209
        - git branch
210
        - CI platform : Travis
211
        - build matrix (language, language version, compiler, ...)
212
        - build_trigger : push, pull_request
213
        - pull_request (is_pull_request, title, number)
214
215
        Parameters:
216
        - job_data : dictionary with Travis CI job data
217
        """
218
        self.current_job.add_property(
219
            "build",
220
            # buildnumber is part before "." of job number
221
            job_data['job']['number'].split(".")[0]
222
        )
223
        self.current_job.add_property("job", job_data['job']['number'])
224
        self.current_job.add_property("branch", job_data['commit']['branch'])
225
        self.current_job.add_property(
226
            "repo",
227
            job_data['job']['repository_slug']
228
        )
229
        self.current_job.add_property("ci_platform", 'travis')
230
        self.current_job.add_property("result", job_data['job']['state'])
231
232
        self.set_build_matrix(job_data)
233
234
        self.process_pull_request_data()
235
236
        self.current_job.set_started_at(job_data['job']['started_at'])
237
        self.current_job.set_finished_at(job_data['job']['finished_at'])
238
239
        # calculate job duration from start and finished timestamps
240
        # if no timing tags are available
241
        if not self.has_timing_tags():
242
            self.current_job.add_property("duration", self.get_job_duration())
243
244
    def set_build_matrix(self, job_data):
245
        """
246
        Retrieve build matrix data from job data and store in properties.
247
248
        Properties :
249
        - language
250
        - language version (if applicable)
251
        - compiler (if applicable)
252
        - operating system
253
        - environment parameters
254
255
        Parameters:
256
        - job_data : dictionary with Travis CI job data
257
        """
258
        # check if job config data exists
259
        if 'job' not in job_data or 'config' not in job_data['job']:
260
            logger.warning("Job config data doesn't exist")
261
            return
262
263
        build_matrix = Collection()
264
        job_config = job_data['job']['config']
265
266
        if 'language' in job_config:
267
            language = job_config['language']
268
            build_matrix.add_item('language', language)
269
270
            # set language version
271
            # ('d', 'dart', 'go', 'perl', 'php', 'python', 'rust')
272
            if language in job_config:
273
                if language == 'android':
274
                    build_matrix.add_item(
275
                        "language_components",
276
                        " ".join(job_config[language]["components"])
277
                    )
278
                else:
279
                    build_matrix.add_item(
280
                        'language_version',
281
                        str(job_config[language])
282
                    )
283
284
        # language specific build matrix parameters
285
        parameters = {
286
            'ghc': 'ghc',  # Haskell
287
            'jdk': 'jdk',  # Java, Android, Groovy, Ruby, Scala
288
            'lein': 'lein',  # Clojure
289
            'mono': 'mono',  # C#, F#, Visual Basic
290
            'node_js': 'node_js',  # Javascript
291
            'otp_release': 'otp_release',  # Erlang
292
            'rvm': 'rvm',  # Ruby, Objective-C
293
            'gemfile': 'gemfile',  # Ruby, Objective-C
294
            'xcode_sdk': 'xcode_sdk',  # Objective-C
295
            'xcode_scheme': 'xcode_scheme',  # Objective-C
296
            'compiler': 'compiler',  # C, C++
297
            'os': 'os',
298
            'env': 'parameters'
299
        }
300
        for parameter, name in parameters.items():
301
            if parameter in job_config:
302
                build_matrix.add_item(name, str(job_config[parameter]))
303
304
        self.current_job.add_property(
305
            "build_matrix",
306
            build_matrix.get_items_with_summary()
307
        )
308
309
    def process_pull_request_data(self):
310
        """Retrieve pull request data from Travis CI API."""
311
        # check if collection is empty
312
        if self.current_build_data:
313
            if "event_type" in self.current_build_data:
314
                # build trigger (push or pull_request)
315
                self.current_job.add_property(
316
                    "build_trigger",
317
                    self.current_build_data["event_type"]
318
                )
319
320
            # pull_request
321
            pull_request_data = {}
322
            if "pull_request" in self.current_build_data:
323
                pull_request_data["is_pull_request"] = \
324
                    self.current_build_data["pull_request"]
325
            else:
326
                pull_request_data["is_pull_request"] = False
327
328
            if "pull_request_title" in self.current_build_data:
329
                pull_request_data["title"] = \
330
                    self.current_build_data["pull_request_title"]
331
332
            if "pull_request_number" in self.current_build_data:
333
                pull_request_data["number"] = \
334
                    self.current_build_data["pull_request_number"]
335
336
            self.current_job.add_property("pull_request", pull_request_data)
337
338
    def parse_job_log(self, job_id):
339
        """
340
        Parse Travis CI job log.
341
342
        Parameters:
343
        - job_id : ID of the job to process
344
        """
345
        self.parse_job_log_stream(self.connector.download_job_log(job_id))
346
347
    def parse_job_log_file(self, filename):
348
        """
349
        Open a Travis CI log file and parse it.
350
351
        Parameters :
352
        - filename : filename of Travis CI log
353
        Returns false if file doesn't exist, true if it was read successfully.
354
        """
355
        # load timestamps file
356
        if not tools.check_file(filename):
357
            return False
358
359
        # read timestamps, calculate stage duration
360
        with open(filename, 'rb') as file_stream:
361
            self.parse_job_log_stream(file_stream)
362
363
        return True
364
365
    def parse_job_log_stream(self, stream):
366
        """
367
        Parse Travis CI job log stream.
368
369
        Parameters:
370
        - stream : stream of job log file
371
        """
372
        self.travis_substage = TravisSubstage()
373
        check_timing_tags = self.has_timing_tags()
374
375
        for line in stream:
376
            # convert to str if line is bytes type
377
            if isinstance(line, bytes):
378
                line = line.decode('utf-8')
379
            # parse Travis CI timing tags
380
            if check_timing_tags and 'travis_' in line:
381
                self.parse_travis_time_tag(line)
382
            # parse Travis CI worker tag
383
            if 'Using worker:' in line:
384
                self.parse_travis_worker_tag(line)
385
386
    def parse_travis_time_tag(self, line):
387
        """
388
        Parse and process Travis CI timing tags.
389
390
        Parameters:
391
        - line : line from logfile containing Travis CI tags
392
        """
393
        if self.travis_substage is None:
394
            self.travis_substage = TravisSubstage()
395
396
        escaped_line = line.replace('\x0d', '*').replace('\x1b', 'ESC')
397
        logger.debug('line : %s', escaped_line)
398
399
        # parse Travis CI timing tags
400
        for parse_string in TRAVIS_LOG_PARSE_TIMING_STRINGS:
401
            result = re.search(parse_string, line)
402
            if result:
403
                self.travis_substage.process_parsed_tags(result.groupdict())
404
405
                # when finished : log stage and create a new instance
406
                if self.travis_substage.has_finished():
407
                    # set substage name, if it is not set
408
                    if not self.travis_substage.has_name() and \
409
                            self.travis_substage.has_command():
410
                        self.travis_substage.set_name(
411
                            self.get_substage_name(
412
                                self.travis_substage.get_command()
413
                            )
414
                        )
415
416
                    # only log complete substages
417
                    if not self.travis_substage.finished_incomplete:
418
                        self.current_job.add_stage(self.travis_substage.stage)
419
                    self.travis_substage = TravisSubstage()
420
421
    def parse_travis_worker_tag(self, line):
422
        """
423
        Parse and process Travis CI worker tag.
424
425
        Parameters:
426
        - line : line from logfile containing Travis CI tags
427
        """
428
        logger.debug('line : %s', line)
429
430
        # parse Travis CI worker tags
431
        result = re.search(TRAVIS_LOG_PARSE_WORKER_STRING, line)
432
        if not result:
433
            return
434
435
        worker_tags = result.groupdict()
436
437
        # check if parameter worker_tags is a dictionary and
438
        # if it contains all required tags
439
        tag_list = list({'hostname', 'os'})
440
        if tools.check_dict(worker_tags, "worker_tags", tag_list):
441
            logger.debug("Worker tags : %s", worker_tags)
442
            self.current_job.add_property("worker", worker_tags)
443
444
    def has_timing_tags(self):
445
        """
446
        Check if Travis CI job log has timing tags.
447
448
        Timing tags were introduced on Travis CI starting 2014-08-07,
449
        check if started_at is more recent.
450
        """
451
        started_at = self.current_job.get_property("started_at")
452
        if started_at is None or "timestamp_seconds" not in started_at:
453
            return False
454
455
        # 1407369600 is epoch timestamp of 2014-08-07T00:00:00Z
456
        return started_at["timestamp_seconds"] > 1407369600
457
458
    def get_job_duration(self):
459
        """Calculate build job duration."""
460
        started_at = self.current_job.get_property("started_at")
461
        finished_at = self.current_job.get_property("finished_at")
462
        if started_at is None or "timestamp_seconds" not in started_at or \
463
                finished_at is None or "timestamp_seconds" not in finished_at:
464
            return 0.0
465
466
        timestamp_start = float(started_at["timestamp_seconds"])
467
        timestamp_end = float(finished_at["timestamp_seconds"])
468
        return timestamp_end - timestamp_start
469
470
    def get_started_at(self):
471
        """Retrieve timestamp when build was started."""
472
        if tools.check_dict(self.current_build_data, key_list=["started_at"]):
473
            return self.current_build_data['started_at']
474
        else:
475
            return None
476
477
    def get_finished_at(self):
478
        """Retrieve timestamp when build finished."""
479
        if tools.check_dict(self.current_build_data, key_list=["finished_at"]):
480
            return self.current_build_data['finished_at']
481
        else:
482
            return None
483