| Total Complexity | 70 |
| Total Lines | 429 |
| Duplicated Lines | 0 % |
Complex classes like buildtimetrend.travis.TravisData often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | # vim: set expandtab sw=4 ts=4: |
||
| 54 | class TravisData(object): |
||
| 55 | |||
| 56 | """Gather data from Travis CI using the API.""" |
||
| 57 | |||
| 58 | def __init__(self, repo, build_id, connector=None): |
||
| 59 | """ |
||
| 60 | Retrieve Travis CI build data using the API. |
||
| 61 | |||
| 62 | Parameters: |
||
| 63 | - repo : github repository slug (fe. buildtimetrend/python-lib) |
||
| 64 | - build_id : Travis CI build id (fe. 158) |
||
| 65 | - connector : Travis Connector instance |
||
| 66 | """ |
||
| 67 | self.builds_data = {} |
||
| 68 | self.build_jobs = {} |
||
| 69 | self.current_build_data = {} |
||
| 70 | self.current_job = BuildJob() |
||
| 71 | self.travis_substage = None |
||
| 72 | self.repo = repo |
||
| 73 | self.build_id = str(build_id) |
||
| 74 | # set TravisConnector if it is defined |
||
| 75 | if isinstance(connector, TravisConnector): |
||
| 76 | self.connector = connector |
||
| 77 | # use Travis Org connector by default |
||
| 78 | else: |
||
| 79 | self.connector = TravisOrgConnector() |
||
| 80 | |||
| 81 | def get_build_data(self): |
||
| 82 | """ |
||
| 83 | Retrieve Travis CI build data. |
||
| 84 | |||
| 85 | Returns true if retrieving data was succesful, false on error. |
||
| 86 | """ |
||
| 87 | request = 'repos/{repo}/builds?number={build_id}'.format( |
||
| 88 | repo=self.repo, build_id=self.build_id |
||
| 89 | ) |
||
| 90 | try: |
||
| 91 | self.builds_data = self.connector.json_request(request) |
||
| 92 | except (HTTPError, URLError) as msg: |
||
| 93 | logger.error("Error getting build data from Travis CI: %s", msg) |
||
| 94 | return False |
||
| 95 | |||
| 96 | # log builds_data |
||
| 97 | logger.debug( |
||
| 98 | "Build #%s data : %s", |
||
| 99 | str(self.build_id), |
||
| 100 | json.dumps(self.builds_data, sort_keys=True, indent=2) |
||
| 101 | ) |
||
| 102 | |||
| 103 | return True |
||
| 104 | |||
| 105 | def get_substage_name(self, command): |
||
| 106 | """ |
||
| 107 | Resolve Travis CI substage name that corresponds to a cli command. |
||
| 108 | |||
| 109 | Parameters: |
||
| 110 | - command : cli command |
||
| 111 | """ |
||
| 112 | if not tools.is_string(command): |
||
| 113 | return "" |
||
| 114 | |||
| 115 | if len(self.current_build_data) > 0 and \ |
||
| 116 | "config" in self.current_build_data: |
||
| 117 | build_config = self.current_build_data["config"] |
||
| 118 | else: |
||
| 119 | logger.warning( |
||
| 120 | "Travis CI build config is not set" |
||
| 121 | ) |
||
| 122 | return "" |
||
| 123 | |||
| 124 | # check if build_config collection is empty |
||
| 125 | if build_config: |
||
| 126 | for stage_name, commands in build_config.items(): |
||
| 127 | if tools.is_list(commands) and command in commands: |
||
| 128 | substage_number = commands.index(command) + 1 |
||
| 129 | substage_name = "{stage}.{substage:d}".format( |
||
| 130 | stage=stage_name, substage=substage_number |
||
| 131 | ) |
||
| 132 | logger.debug( |
||
| 133 | "Substage %s corresponds to '%s'", |
||
| 134 | substage_name, command |
||
| 135 | ) |
||
| 136 | return substage_name |
||
| 137 | |||
| 138 | return "" |
||
| 139 | |||
| 140 | def process_build_jobs(self): |
||
| 141 | """ |
||
| 142 | Retrieve Travis CI build job data. |
||
| 143 | |||
| 144 | Method is a generator, iterate result to get each processed build job. |
||
| 145 | """ |
||
| 146 | if len(self.builds_data) > 0 and "builds" in self.builds_data: |
||
| 147 | for build in self.builds_data['builds']: |
||
| 148 | self.current_build_data = build |
||
| 149 | |||
| 150 | if "job_ids" in build: |
||
| 151 | for job_id in build['job_ids']: |
||
| 152 | yield self.process_build_job(job_id) |
||
| 153 | |||
| 154 | # reset current_build_data after builds are processed |
||
| 155 | self.current_build_data = {} |
||
| 156 | |||
| 157 | def process_build_job(self, job_id): |
||
| 158 | """ |
||
| 159 | Retrieve Travis CI build job data. |
||
| 160 | |||
| 161 | Parameters: |
||
| 162 | - job_id : ID of the job to process |
||
| 163 | """ |
||
| 164 | if job_id is None: |
||
| 165 | return None |
||
| 166 | |||
| 167 | # retrieve job data from Travis CI |
||
| 168 | job_data = self.get_job_data(job_id) |
||
| 169 | # process build/job data |
||
| 170 | self.process_job_data(job_data) |
||
| 171 | # parse Travis CI job log file |
||
| 172 | self.parse_job_log(job_id) |
||
| 173 | |||
| 174 | # store build job |
||
| 175 | self.build_jobs[str(job_id)] = self.current_job |
||
| 176 | # create new build job instance |
||
| 177 | self.current_job = BuildJob() |
||
| 178 | |||
| 179 | # return processed build job |
||
| 180 | return self.build_jobs[str(job_id)] |
||
| 181 | |||
| 182 | def get_job_data(self, job_id): |
||
| 183 | """ |
||
| 184 | Retrieve Travis CI job data. |
||
| 185 | |||
| 186 | Parameters: |
||
| 187 | - job_id : ID of the job to process |
||
| 188 | """ |
||
| 189 | request = 'jobs/{:s}'.format(str(job_id)) |
||
| 190 | job_data = self.connector.json_request(request) |
||
| 191 | |||
| 192 | # log job_data |
||
| 193 | logger.debug( |
||
| 194 | "Job #%s data : %s", |
||
| 195 | str(job_id), |
||
| 196 | json.dumps(job_data, sort_keys=True, indent=2) |
||
| 197 | ) |
||
| 198 | |||
| 199 | return job_data |
||
| 200 | |||
| 201 | def process_job_data(self, job_data): |
||
| 202 | """ |
||
| 203 | Process Job/build data. |
||
| 204 | |||
| 205 | Set build/job properties : |
||
| 206 | - Build/job ID |
||
| 207 | - build result : passed, failed, errored |
||
| 208 | - git repo |
||
| 209 | - git branch |
||
| 210 | - CI platform : Travis |
||
| 211 | - build matrix (language, language version, compiler, ...) |
||
| 212 | - build_trigger : push, pull_request |
||
| 213 | - pull_request (is_pull_request, title, number) |
||
| 214 | |||
| 215 | Parameters: |
||
| 216 | - job_data : dictionary with Travis CI job data |
||
| 217 | """ |
||
| 218 | self.current_job.add_property( |
||
| 219 | "build", |
||
| 220 | # buildnumber is part before "." of job number |
||
| 221 | job_data['job']['number'].split(".")[0] |
||
| 222 | ) |
||
| 223 | self.current_job.add_property("job", job_data['job']['number']) |
||
| 224 | self.current_job.add_property("branch", job_data['commit']['branch']) |
||
| 225 | self.current_job.add_property( |
||
| 226 | "repo", |
||
| 227 | job_data['job']['repository_slug'] |
||
| 228 | ) |
||
| 229 | self.current_job.add_property("ci_platform", 'travis') |
||
| 230 | self.current_job.add_property("result", job_data['job']['state']) |
||
| 231 | |||
| 232 | self.set_build_matrix(job_data) |
||
| 233 | |||
| 234 | self.process_pull_request_data() |
||
| 235 | |||
| 236 | self.current_job.set_started_at(job_data['job']['started_at']) |
||
| 237 | self.current_job.set_finished_at(job_data['job']['finished_at']) |
||
| 238 | |||
| 239 | # calculate job duration from start and finished timestamps |
||
| 240 | # if no timing tags are available |
||
| 241 | if not self.has_timing_tags(): |
||
| 242 | self.current_job.add_property("duration", self.get_job_duration()) |
||
| 243 | |||
| 244 | def set_build_matrix(self, job_data): |
||
| 245 | """ |
||
| 246 | Retrieve build matrix data from job data and store in properties. |
||
| 247 | |||
| 248 | Properties : |
||
| 249 | - language |
||
| 250 | - language version (if applicable) |
||
| 251 | - compiler (if applicable) |
||
| 252 | - operating system |
||
| 253 | - environment parameters |
||
| 254 | |||
| 255 | Parameters: |
||
| 256 | - job_data : dictionary with Travis CI job data |
||
| 257 | """ |
||
| 258 | # check if job config data exists |
||
| 259 | if 'job' not in job_data or 'config' not in job_data['job']: |
||
| 260 | logger.warning("Job config data doesn't exist") |
||
| 261 | return |
||
| 262 | |||
| 263 | build_matrix = Collection() |
||
| 264 | job_config = job_data['job']['config'] |
||
| 265 | |||
| 266 | if 'language' in job_config: |
||
| 267 | language = job_config['language'] |
||
| 268 | build_matrix.add_item('language', language) |
||
| 269 | |||
| 270 | # set language version |
||
| 271 | # ('d', 'dart', 'go', 'perl', 'php', 'python', 'rust') |
||
| 272 | if language in job_config: |
||
| 273 | if language == 'android': |
||
| 274 | build_matrix.add_item( |
||
| 275 | "language_components", |
||
| 276 | " ".join(job_config[language]["components"]) |
||
| 277 | ) |
||
| 278 | else: |
||
| 279 | build_matrix.add_item( |
||
| 280 | 'language_version', |
||
| 281 | str(job_config[language]) |
||
| 282 | ) |
||
| 283 | |||
| 284 | # language specific build matrix parameters |
||
| 285 | parameters = { |
||
| 286 | 'ghc': 'ghc', # Haskell |
||
| 287 | 'jdk': 'jdk', # Java, Android, Groovy, Ruby, Scala |
||
| 288 | 'lein': 'lein', # Clojure |
||
| 289 | 'mono': 'mono', # C#, F#, Visual Basic |
||
| 290 | 'node_js': 'node_js', # Javascript |
||
| 291 | 'otp_release': 'otp_release', # Erlang |
||
| 292 | 'rvm': 'rvm', # Ruby, Objective-C |
||
| 293 | 'gemfile': 'gemfile', # Ruby, Objective-C |
||
| 294 | 'xcode_sdk': 'xcode_sdk', # Objective-C |
||
| 295 | 'xcode_scheme': 'xcode_scheme', # Objective-C |
||
| 296 | 'compiler': 'compiler', # C, C++ |
||
| 297 | 'os': 'os', |
||
| 298 | 'env': 'parameters' |
||
| 299 | } |
||
| 300 | for parameter, name in parameters.items(): |
||
| 301 | if parameter in job_config: |
||
| 302 | build_matrix.add_item(name, str(job_config[parameter])) |
||
| 303 | |||
| 304 | self.current_job.add_property( |
||
| 305 | "build_matrix", |
||
| 306 | build_matrix.get_items_with_summary() |
||
| 307 | ) |
||
| 308 | |||
| 309 | def process_pull_request_data(self): |
||
| 310 | """Retrieve pull request data from Travis CI API.""" |
||
| 311 | # check if collection is empty |
||
| 312 | if self.current_build_data: |
||
| 313 | if "event_type" in self.current_build_data: |
||
| 314 | # build trigger (push or pull_request) |
||
| 315 | self.current_job.add_property( |
||
| 316 | "build_trigger", |
||
| 317 | self.current_build_data["event_type"] |
||
| 318 | ) |
||
| 319 | |||
| 320 | # pull_request |
||
| 321 | pull_request_data = {} |
||
| 322 | if "pull_request" in self.current_build_data: |
||
| 323 | pull_request_data["is_pull_request"] = \ |
||
| 324 | self.current_build_data["pull_request"] |
||
| 325 | else: |
||
| 326 | pull_request_data["is_pull_request"] = False |
||
| 327 | |||
| 328 | if "pull_request_title" in self.current_build_data: |
||
| 329 | pull_request_data["title"] = \ |
||
| 330 | self.current_build_data["pull_request_title"] |
||
| 331 | |||
| 332 | if "pull_request_number" in self.current_build_data: |
||
| 333 | pull_request_data["number"] = \ |
||
| 334 | self.current_build_data["pull_request_number"] |
||
| 335 | |||
| 336 | self.current_job.add_property("pull_request", pull_request_data) |
||
| 337 | |||
| 338 | def parse_job_log(self, job_id): |
||
| 339 | """ |
||
| 340 | Parse Travis CI job log. |
||
| 341 | |||
| 342 | Parameters: |
||
| 343 | - job_id : ID of the job to process |
||
| 344 | """ |
||
| 345 | self.parse_job_log_stream(self.connector.download_job_log(job_id)) |
||
| 346 | |||
| 347 | def parse_job_log_file(self, filename): |
||
| 348 | """ |
||
| 349 | Open a Travis CI log file and parse it. |
||
| 350 | |||
| 351 | Parameters : |
||
| 352 | - filename : filename of Travis CI log |
||
| 353 | Returns false if file doesn't exist, true if it was read successfully. |
||
| 354 | """ |
||
| 355 | # load timestamps file |
||
| 356 | if not tools.check_file(filename): |
||
| 357 | return False |
||
| 358 | |||
| 359 | # read timestamps, calculate stage duration |
||
| 360 | with open(filename, 'rb') as file_stream: |
||
| 361 | self.parse_job_log_stream(file_stream) |
||
| 362 | |||
| 363 | return True |
||
| 364 | |||
| 365 | def parse_job_log_stream(self, stream): |
||
| 366 | """ |
||
| 367 | Parse Travis CI job log stream. |
||
| 368 | |||
| 369 | Parameters: |
||
| 370 | - stream : stream of job log file |
||
| 371 | """ |
||
| 372 | self.travis_substage = TravisSubstage() |
||
| 373 | check_timing_tags = self.has_timing_tags() |
||
| 374 | |||
| 375 | for line in stream: |
||
| 376 | # convert to str if line is bytes type |
||
| 377 | if isinstance(line, bytes): |
||
| 378 | line = line.decode('utf-8') |
||
| 379 | # parse Travis CI timing tags |
||
| 380 | if check_timing_tags and 'travis_' in line: |
||
| 381 | self.parse_travis_time_tag(line) |
||
| 382 | # parse Travis CI worker tag |
||
| 383 | if 'Using worker:' in line: |
||
| 384 | self.parse_travis_worker_tag(line) |
||
| 385 | |||
| 386 | def parse_travis_time_tag(self, line): |
||
| 387 | """ |
||
| 388 | Parse and process Travis CI timing tags. |
||
| 389 | |||
| 390 | Parameters: |
||
| 391 | - line : line from logfile containing Travis CI tags |
||
| 392 | """ |
||
| 393 | if self.travis_substage is None: |
||
| 394 | self.travis_substage = TravisSubstage() |
||
| 395 | |||
| 396 | escaped_line = line.replace('\x0d', '*').replace('\x1b', 'ESC') |
||
| 397 | logger.debug('line : %s', escaped_line) |
||
| 398 | |||
| 399 | # parse Travis CI timing tags |
||
| 400 | for parse_string in TRAVIS_LOG_PARSE_TIMING_STRINGS: |
||
| 401 | result = re.search(parse_string, line) |
||
| 402 | if result: |
||
| 403 | self.travis_substage.process_parsed_tags(result.groupdict()) |
||
| 404 | |||
| 405 | # when finished : log stage and create a new instance |
||
| 406 | if self.travis_substage.has_finished(): |
||
| 407 | # set substage name, if it is not set |
||
| 408 | if not self.travis_substage.has_name() and \ |
||
| 409 | self.travis_substage.has_command(): |
||
| 410 | self.travis_substage.set_name( |
||
| 411 | self.get_substage_name( |
||
| 412 | self.travis_substage.get_command() |
||
| 413 | ) |
||
| 414 | ) |
||
| 415 | |||
| 416 | # only log complete substages |
||
| 417 | if not self.travis_substage.finished_incomplete: |
||
| 418 | self.current_job.add_stage(self.travis_substage.stage) |
||
| 419 | self.travis_substage = TravisSubstage() |
||
| 420 | |||
| 421 | def parse_travis_worker_tag(self, line): |
||
| 422 | """ |
||
| 423 | Parse and process Travis CI worker tag. |
||
| 424 | |||
| 425 | Parameters: |
||
| 426 | - line : line from logfile containing Travis CI tags |
||
| 427 | """ |
||
| 428 | logger.debug('line : %s', line) |
||
| 429 | |||
| 430 | # parse Travis CI worker tags |
||
| 431 | result = re.search(TRAVIS_LOG_PARSE_WORKER_STRING, line) |
||
| 432 | if not result: |
||
| 433 | return |
||
| 434 | |||
| 435 | worker_tags = result.groupdict() |
||
| 436 | |||
| 437 | # check if parameter worker_tags is a dictionary and |
||
| 438 | # if it contains all required tags |
||
| 439 | tag_list = list({'hostname', 'os'}) |
||
| 440 | if tools.check_dict(worker_tags, "worker_tags", tag_list): |
||
| 441 | logger.debug("Worker tags : %s", worker_tags) |
||
| 442 | self.current_job.add_property("worker", worker_tags) |
||
| 443 | |||
| 444 | def has_timing_tags(self): |
||
| 445 | """ |
||
| 446 | Check if Travis CI job log has timing tags. |
||
| 447 | |||
| 448 | Timing tags were introduced on Travis CI starting 2014-08-07, |
||
| 449 | check if started_at is more recent. |
||
| 450 | """ |
||
| 451 | started_at = self.current_job.get_property("started_at") |
||
| 452 | if started_at is None or "timestamp_seconds" not in started_at: |
||
| 453 | return False |
||
| 454 | |||
| 455 | # 1407369600 is epoch timestamp of 2014-08-07T00:00:00Z |
||
| 456 | return started_at["timestamp_seconds"] > 1407369600 |
||
| 457 | |||
| 458 | def get_job_duration(self): |
||
| 459 | """Calculate build job duration.""" |
||
| 460 | started_at = self.current_job.get_property("started_at") |
||
| 461 | finished_at = self.current_job.get_property("finished_at") |
||
| 462 | if started_at is None or "timestamp_seconds" not in started_at or \ |
||
| 463 | finished_at is None or "timestamp_seconds" not in finished_at: |
||
| 464 | return 0.0 |
||
| 465 | |||
| 466 | timestamp_start = float(started_at["timestamp_seconds"]) |
||
| 467 | timestamp_end = float(finished_at["timestamp_seconds"]) |
||
| 468 | return timestamp_end - timestamp_start |
||
| 469 | |||
| 470 | def get_started_at(self): |
||
| 471 | """Retrieve timestamp when build was started.""" |
||
| 472 | if tools.check_dict(self.current_build_data, key_list=["started_at"]): |
||
| 473 | return self.current_build_data['started_at'] |
||
| 474 | else: |
||
| 475 | return None |
||
| 476 | |||
| 477 | def get_finished_at(self): |
||
| 478 | """Retrieve timestamp when build finished.""" |
||
| 479 | if tools.check_dict(self.current_build_data, key_list=["finished_at"]): |
||
| 480 | return self.current_build_data['finished_at'] |
||
| 481 | else: |
||
| 482 | return None |
||
| 483 |