Completed
Push — master ( 219e97...2114cf )
by Dieter
01:21
created

process_pull_request_data()   B

Complexity

Conditions 6

Size

Total Lines 28

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 6
dl 0
loc 28
rs 7.5385
1
# vim: set expandtab sw=4 ts=4:
2
"""
3
Functions and classes to retrieve and parse Travis CI build data.
4
5
Copyright (C) 2014-2015 Dieter Adriaenssens <[email protected]>
6
7
This file is part of buildtimetrend/python-lib
8
<https://github.com/buildtimetrend/python-lib/>
9
10
This program is free software: you can redistribute it and/or modify
11
it under the terms of the GNU Affero General Public License as published by
12
the Free Software Foundation, either version 3 of the License, or
13
any later version.
14
15
This program is distributed in the hope that it will be useful,
16
but WITHOUT ANY WARRANTY; without even the implied warranty of
17
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
GNU Affero General Public License for more details.
19
20
You should have received a copy of the GNU Affero General Public License
21
along with this program. If not, see <http://www.gnu.org/licenses/>.
22
"""
23
from builtins import str
24
from builtins import object
25
import re
26
import json
27
from buildtimetrend import logger
28
from buildtimetrend.tools import check_file
29
from buildtimetrend.tools import check_dict
30
from buildtimetrend.tools import is_string
31
from buildtimetrend.buildjob import BuildJob
32
from buildtimetrend.collection import Collection
33
from buildtimetrend.travis.connector import TravisOrgConnector
34
from buildtimetrend.travis.connector import TravisConnector
35
from buildtimetrend.travis.substage import TravisSubstage
36
try:
37
    # For Python 3.0 and later
38
    from urllib.error import HTTPError, URLError
39
except ImportError:
40
    # Fall back to Python 2's urllib2
41
    from urllib2 import HTTPError, URLError
42
43
44
# strings to parse timestamps in Travis CI log file
45
TRAVIS_LOG_PARSE_TIMING_STRINGS = [
46
    r'travis_time:end:(?P<end_hash>.*):start=(?P<start_timestamp>\d+),'
47
    r'finish=(?P<finish_timestamp>\d+),duration=(?P<duration>\d+)\x0d\x1b',
48
    r'travis_fold:end:(?P<end_stage>\w+)\.(?P<end_substage>\d+)\x0d\x1b',
49
    r'travis_fold:start:(?P<start_stage>\w+)\.(?P<start_substage>\d+)\x0d\x1b',
50
    r'travis_time:start:(?P<start_hash>.*)\x0d\x1b\[0K',
51
    r'\$\ (?P<command>.*)\r',
52
]
53
TRAVIS_LOG_PARSE_WORKER_STRING = r'Using worker:\ (?P<hostname>.*):(?P<os>.*)'
54
55
56
class TravisData(object):
57
58
    """Gather data from Travis CI using the API."""
59
60
    def __init__(self, repo, build_id, connector=None):
61
        """
62
        Retrieve Travis CI build data using the API.
63
64
        Parameters:
65
        - repo : github repository slug (fe. buildtimetrend/python-lib)
66
        - build_id : Travis CI build id (fe. 158)
67
        - connector : Travis Connector instance
68
        """
69
        self.builds_data = {}
70
        self.build_jobs = {}
71
        self.current_build_data = {}
72
        self.current_job = BuildJob()
73
        self.travis_substage = None
74
        self.repo = repo
75
        self.build_id = str(build_id)
76
        # set TravisConnector if it is defined
77
        if isinstance(connector, TravisConnector):
78
            self.connector = connector
79
        # use Travis Org connector by default
80
        else:
81
            self.connector = TravisOrgConnector()
82
83
    def get_build_data(self):
84
        """
85
        Retrieve Travis CI build data.
86
87
        Returns true if retrieving data was succesful, false on error.
88
        """
89
        request = 'repos/{repo}/builds?number={build_id}'.format(
90
            repo=self.repo, build_id=self.build_id
91
        )
92
        try:
93
            self.builds_data = self.connector.json_request(request)
94
        except (HTTPError, URLError) as msg:
95
            logger.error("Error getting build data from Travis CI: %s", msg)
96
            return False
97
98
        # log builds_data
99
        logger.debug(
100
            "Build #%s data : %s",
101
            str(self.build_id),
102
            json.dumps(self.builds_data, sort_keys=True, indent=2)
103
        )
104
105
        return True
106
107
    def get_substage_name(self, command):
108
        """
109
        Resolve Travis CI substage name that corresponds to a cli command.
110
111
        Parameters:
112
        - command : cli command
113
        """
114
        if not is_string(command):
115
            return ""
116
117
        if len(self.current_build_data) > 0 and \
118
                "config" in self.current_build_data:
119
            build_config = self.current_build_data["config"]
120
        else:
121
            logger.warning(
122
                "Travis CI build config is not set"
123
            )
124
            return ""
125
126
        # check if build_config collection is empty
127
        if build_config:
128
            for stage_name, commands in build_config.items():
129
                if type(commands) is list and command in commands:
130
                    substage_number = commands.index(command) + 1
131
                    substage_name = "{stage}.{substage:d}".format(
132
                        stage=stage_name, substage=substage_number
133
                    )
134
                    logger.debug(
135
                        "Substage %s corresponds to '%s'",
136
                        substage_name, command
137
                    )
138
                    return substage_name
139
140
        return ""
141
142
    def process_build_jobs(self):
143
        """
144
        Retrieve Travis CI build job data.
145
146
        Method is a generator, iterate result to get each processed build job.
147
        """
148
        if len(self.builds_data) > 0 and "builds" in self.builds_data:
149
            for build in self.builds_data['builds']:
150
                self.current_build_data = build
151
152
                if "job_ids" in build:
153
                    for job_id in build['job_ids']:
154
                        yield self.process_build_job(job_id)
155
156
            # reset current_build_data after builds are processed
157
            self.current_build_data = {}
158
159
    def process_build_job(self, job_id):
160
        """
161
        Retrieve Travis CI build job data.
162
163
        Parameters:
164
        - job_id : ID of the job to process
165
        """
166
        if job_id is None:
167
            return None
168
169
        # retrieve job data from Travis CI
170
        job_data = self.get_job_data(job_id)
171
        # process build/job data
172
        self.process_job_data(job_data)
173
        # parse Travis CI job log file
174
        self.parse_job_log(job_id)
175
176
        # store build job
177
        self.build_jobs[str(job_id)] = self.current_job
178
        # create new build job instance
179
        self.current_job = BuildJob()
180
181
        # return processed build job
182
        return self.build_jobs[str(job_id)]
183
184
    def get_job_data(self, job_id):
185
        """
186
        Retrieve Travis CI job data.
187
188
        Parameters:
189
        - job_id : ID of the job to process
190
        """
191
        request = 'jobs/{:s}'.format(str(job_id))
192
        job_data = self.connector.json_request(request)
193
194
        # log job_data
195
        logger.debug(
196
            "Job #%s data : %s",
197
            str(job_id),
198
            json.dumps(job_data, sort_keys=True, indent=2)
199
        )
200
201
        return job_data
202
203
    def process_job_data(self, job_data):
204
        """
205
        Process Job/build data.
206
207
        Set build/job properties :
208
        - Build/job ID
209
        - build result : passed, failed, errored
210
        - git repo
211
        - git branch
212
        - CI platform : Travis
213
        - build matrix (language, language version, compiler, ...)
214
        - build_trigger : push, pull_request
215
        - pull_request (is_pull_request, title, number)
216
217
        Parameters:
218
        - job_data : dictionary with Travis CI job data
219
        """
220
        self.current_job.add_property(
221
            "build",
222
            # buildnumber is part before "." of job number
223
            job_data['job']['number'].split(".")[0]
224
        )
225
        self.current_job.add_property("job", job_data['job']['number'])
226
        self.current_job.add_property("branch", job_data['commit']['branch'])
227
        self.current_job.add_property(
228
            "repo",
229
            job_data['job']['repository_slug']
230
        )
231
        self.current_job.add_property("ci_platform", 'travis')
232
        self.current_job.add_property("result", job_data['job']['state'])
233
234
        self.set_build_matrix(job_data)
235
236
        self.process_pull_request_data()
237
238
        self.current_job.set_started_at(job_data['job']['started_at'])
239
        self.current_job.set_finished_at(job_data['job']['finished_at'])
240
241
        # calculate job duration from start and finished timestamps
242
        # if no timing tags are available
243
        if not self.has_timing_tags():
244
            self.current_job.add_property("duration", self.get_job_duration())
245
246
    def set_build_matrix(self, job_data):
247
        """
248
        Retrieve build matrix data from job data and store in properties.
249
250
        Properties :
251
        - language
252
        - language version (if applicable)
253
        - compiler (if applicable)
254
        - operating system
255
        - environment parameters
256
257
        Parameters:
258
        - job_data : dictionary with Travis CI job data
259
        """
260
        build_matrix = Collection()
261
262
        job_config = job_data['job']['config']
263
264
        language = job_config['language']
265
        build_matrix.add_item("language", language)
266
267
        # set language version
268
        # ('d', 'dart', 'go', 'perl', 'php', 'python', 'rust')
269
        if language in job_config:
270
            if language == "android":
271
                build_matrix.add_item(
272
                    "language_components",
273
                    " ".join(job_config[language]["components"])
274
                )
275
            else:
276
                build_matrix.add_item(
277
                    "language_version",
278
                    str(job_config[language])
279
                )
280
281
        # language specific build matrix parameters
282
        parameters = {
283
            'ghc': 'ghc',  # Haskell
284
            'jdk': 'jdk',  # Java, Android, Groovy, Ruby, Scala
285
            'lein': 'lein',  # Clojure
286
            'mono': 'mono',  # C#, F#, Visual Basic
287
            'node_js': 'node_js',  # Javascript
288
            'otp_release': 'otp_release',  # Erlang
289
            'rvm': 'rvm',  # Ruby, Objective-C
290
            'gemfile': 'gemfile',  # Ruby, Objective-C
291
            'xcode_sdk': 'xcode_sdk',  # Objective-C
292
            'xcode_scheme': 'xcode_scheme',  # Objective-C
293
            'compiler': 'compiler',  # C, C++
294
            'os': 'os',
295
            'env': 'parameters'
296
        }
297
        for parameter, name in parameters.items():
298
            if parameter in job_config:
299
                build_matrix.add_item(name, str(job_config[parameter]))
300
301
        self.current_job.add_property(
302
            "build_matrix",
303
            build_matrix.get_items_with_summary()
304
        )
305
306
    def process_pull_request_data(self):
307
        """Retrieve pull request data from Travis CI API."""
308
        # check if collection is empty
309
        if self.current_build_data:
310
            if "event_type" in self.current_build_data:
311
                # build trigger (push or pull_request)
312
                self.current_job.add_property(
313
                    "build_trigger",
314
                    self.current_build_data["event_type"]
315
                )
316
317
            # pull_request
318
            pull_request_data = {}
319
            if "pull_request" in self.current_build_data:
320
                pull_request_data["is_pull_request"] = \
321
                    self.current_build_data["pull_request"]
322
            else:
323
                pull_request_data["is_pull_request"] = False
324
325
            if "pull_request_title" in self.current_build_data:
326
                pull_request_data["title"] = \
327
                    self.current_build_data["pull_request_title"]
328
329
            if "pull_request_number" in self.current_build_data:
330
                pull_request_data["number"] = \
331
                    self.current_build_data["pull_request_number"]
332
333
            self.current_job.add_property("pull_request", pull_request_data)
334
335
    def parse_job_log(self, job_id):
336
        """
337
        Parse Travis CI job log.
338
339
        Parameters:
340
        - job_id : ID of the job to process
341
        """
342
        self.parse_job_log_stream(self.connector.download_job_log(job_id))
343
344
    def parse_job_log_file(self, filename):
345
        """
346
        Open a Travis CI log file and parse it.
347
348
        Parameters :
349
        - filename : filename of Travis CI log
350
        Returns false if file doesn't exist, true if it was read successfully.
351
        """
352
        # load timestamps file
353
        if not check_file(filename):
354
            return False
355
356
        # read timestamps, calculate stage duration
357
        with open(filename, 'rb') as file_stream:
358
            self.parse_job_log_stream(file_stream)
359
360
        return True
361
362
    def parse_job_log_stream(self, stream):
363
        """
364
        Parse Travis CI job log stream.
365
366
        Parameters:
367
        - stream : stream of job log file
368
        """
369
        self.travis_substage = TravisSubstage()
370
        check_timing_tags = self.has_timing_tags()
371
372
        for line in stream:
373
            # convert to str if line is bytes type
374
            if isinstance(line, bytes):
375
                line = line.decode('utf-8')
376
            # parse Travis CI timing tags
377
            if check_timing_tags and 'travis_' in line:
378
                self.parse_travis_time_tag(line)
379
            # parse Travis CI worker tag
380
            if 'Using worker:' in line:
381
                self.parse_travis_worker_tag(line)
382
383
    def parse_travis_time_tag(self, line):
384
        """
385
        Parse and process Travis CI timing tags.
386
387
        Parameters:
388
        - line : line from logfile containing Travis CI tags
389
        """
390
        if self.travis_substage is None:
391
            self.travis_substage = TravisSubstage()
392
393
        escaped_line = line.replace('\x0d', '*').replace('\x1b', 'ESC')
394
        logger.debug('line : %s', escaped_line)
395
396
        # parse Travis CI timing tags
397
        for parse_string in TRAVIS_LOG_PARSE_TIMING_STRINGS:
398
            result = re.search(parse_string, line)
399
            if result:
400
                self.travis_substage.process_parsed_tags(result.groupdict())
401
402
                # when finished : log stage and create a new instance
403
                if self.travis_substage.has_finished():
404
                    # set substage name, if it is not set
405
                    if not self.travis_substage.has_name() and \
406
                            self.travis_substage.has_command():
407
                        self.travis_substage.set_name(
408
                            self.get_substage_name(
409
                                self.travis_substage.get_command()
410
                            )
411
                        )
412
413
                    # only log complete substages
414
                    if not self.travis_substage.finished_incomplete:
415
                        self.current_job.add_stage(self.travis_substage.stage)
416
                    self.travis_substage = TravisSubstage()
417
418
    def parse_travis_worker_tag(self, line):
419
        """
420
        Parse and process Travis CI worker tag.
421
422
        Parameters:
423
        - line : line from logfile containing Travis CI tags
424
        """
425
        logger.debug('line : %s', line)
426
427
        # parse Travis CI worker tags
428
        result = re.search(TRAVIS_LOG_PARSE_WORKER_STRING, line)
429
        if not result:
430
            return
431
432
        worker_tags = result.groupdict()
433
434
        # check if parameter worker_tags is a dictionary and
435
        # if it contains all required tags
436
        tag_list = list({'hostname', 'os'})
437
        if check_dict(worker_tags, "worker_tags", tag_list):
438
            logger.debug("Worker tags : %s", worker_tags)
439
            self.current_job.add_property("worker", worker_tags)
440
441
    def has_timing_tags(self):
442
        """
443
        Check if Travis CI job log has timing tags.
444
445
        Timing tags were introduced on Travis CI starting 2014-08-07,
446
        check if started_at is more recent.
447
        """
448
        started_at = self.current_job.get_property("started_at")
449
        if started_at is None or "timestamp_seconds" not in started_at:
450
            return False
451
452
        # 1407369600 is epoch timestamp of 2014-08-07T00:00:00Z
453
        return started_at["timestamp_seconds"] > 1407369600
454
455
    def get_job_duration(self):
456
        """Calculate build job duration."""
457
        started_at = self.current_job.get_property("started_at")
458
        finished_at = self.current_job.get_property("finished_at")
459
        if started_at is None or "timestamp_seconds" not in started_at or \
460
                finished_at is None or "timestamp_seconds" not in finished_at:
461
            return 0.0
462
463
        timestamp_start = float(started_at["timestamp_seconds"])
464
        timestamp_end = float(finished_at["timestamp_seconds"])
465
        return timestamp_end - timestamp_start
466
467
    def get_started_at(self):
468
        """Retrieve timestamp when build was started."""
469
        if check_dict(self.current_build_data, key_list=["started_at"]):
470
            return self.current_build_data['started_at']
471
        else:
472
            return None
473
474
    def get_finished_at(self):
475
        """Retrieve timestamp when build finished."""
476
        if check_dict(self.current_build_data, key_list=["finished_at"]):
477
            return self.current_build_data['finished_at']
478
        else:
479
            return None
480