| Total Complexity | 99 |
| Total Lines | 449 |
| Duplicated Lines | 0 % |
Complex classes like lasio.LASFile often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | '''las.py - read Log ASCII Standard files |
||
| 348 | class LASFile(object): |
||
| 349 | |||
| 350 | '''LAS file object. |
||
| 351 | |||
| 352 | Keyword Arguments: |
||
| 353 | file_ref: either a filename, an open file object, or a string of |
||
| 354 | a LAS file contents. |
||
| 355 | encoding (str): character encoding to open file_ref with |
||
| 356 | encoding_errors (str): "strict", "replace" (default), "ignore" - how to |
||
| 357 | handle errors with encodings (see standard library codecs module or |
||
| 358 | Python Unicode HOWTO for more information) |
||
| 359 | autodetect_encoding (bool): use chardet/ccharet to detect encoding |
||
| 360 | autodetect_encoding_chars (int/None): number of chars to read from LAS |
||
| 361 | file for auto-detection of encoding. |
||
| 362 | |||
| 363 | ''' |
||
| 364 | def __init__(self, file_ref=None, **kwargs): |
||
| 365 | |||
| 366 | self._text = '' |
||
| 367 | self._use_pandas = "auto" |
||
| 368 | self.index_unit = None |
||
| 369 | self.sections = { |
||
| 370 | "Version": DEFAULT_ITEMS["Version"], |
||
| 371 | "Well": DEFAULT_ITEMS["Well"], |
||
| 372 | "Curves": DEFAULT_ITEMS["Curves"], |
||
| 373 | "Parameter": DEFAULT_ITEMS["Parameter"], |
||
| 374 | "Other": str(DEFAULT_ITEMS["Other"]), |
||
| 375 | } |
||
| 376 | |||
| 377 | if not (file_ref is None): |
||
| 378 | self.read(file_ref, **kwargs) |
||
| 379 | |||
| 380 | def read(self, file_ref, use_pandas="auto", null_subs=True, **kwargs): |
||
| 381 | '''Read a LAS file. |
||
| 382 | |||
| 383 | Arguments: |
||
| 384 | file_ref: either a filename, an open file object, or a string of |
||
| 385 | a LAS file contents. |
||
| 386 | |||
| 387 | Keyword Arguments: |
||
| 388 | use_pandas (str): bool or "auto" -- use pandas if available -- provide |
||
| 389 | False option for faster loading where pandas functionality is not |
||
| 390 | needed. "auto" becomes True if pandas is installed, and False if not. |
||
| 391 | encoding (str): character encoding to open file_ref with |
||
| 392 | encoding_errors (str): "strict", "replace" (default), "ignore" - how to |
||
| 393 | handle errors with encodings (see standard library codecs module or |
||
| 394 | Python Unicode HOWTO for more information) |
||
| 395 | autodetect_encoding (bool): use chardet/cchardet to detect encoding |
||
| 396 | autodetect_encoding_chars (int/None): number of chars to read from LAS |
||
| 397 | file for auto-detection of encoding. |
||
| 398 | |||
| 399 | ''' |
||
| 400 | if not use_pandas is None: |
||
| 401 | self._use_pandas = use_pandas |
||
| 402 | |||
| 403 | f = open_file(file_ref, **kwargs) |
||
| 404 | |||
| 405 | self._text = f.read() |
||
| 406 | logger.debug("LASFile.read LAS content is type %s" % type(self._text)) |
||
| 407 | |||
| 408 | reader = Reader(self._text, version=1.2) |
||
| 409 | self.sections["Version"] = reader.read_section('~V') |
||
| 410 | |||
| 411 | # Set version |
||
| 412 | try: |
||
| 413 | # raise Exception("%s %s" % (type(self.version['VERS']), self.version["VERS"])) |
||
| 414 | reader.version = self.version['VERS'].value |
||
| 415 | except KeyError: |
||
| 416 | raise KeyError("No key VERS in ~V section") |
||
| 417 | |||
| 418 | # Validate version |
||
| 419 | try: |
||
| 420 | assert reader.version in (1.2, 2) |
||
| 421 | except AssertionError: |
||
| 422 | logger.warning("LAS spec version is %s -- neither 1.2 nor 2" % |
||
| 423 | reader.version) |
||
| 424 | if reader.version < 2: |
||
| 425 | reader.version = 1.2 |
||
| 426 | else: |
||
| 427 | reader.version = 2 |
||
| 428 | reader.wrap = self.version['WRAP'].value == 'YES' |
||
| 429 | |||
| 430 | self.sections["Well"] = reader.read_section('~W') |
||
| 431 | self.sections["Curves"] = reader.read_section('~C') |
||
| 432 | try: |
||
| 433 | self.sections["Parameter"] = reader.read_section('~P') |
||
| 434 | except LASHeaderError: |
||
| 435 | logger.warning(traceback.format_exc().splitlines()[-1]) |
||
| 436 | self.sections["Other"] = reader.read_raw_text('~O') |
||
| 437 | |||
| 438 | # Set null value |
||
| 439 | reader.null = self.well['NULL'].value |
||
| 440 | |||
| 441 | data = reader.read_data(len(self.curves), null_subs=null_subs) |
||
| 442 | |||
| 443 | for i, c in enumerate(self.curves): |
||
| 444 | d = data[:, i] |
||
| 445 | c.data = d |
||
| 446 | |||
| 447 | if (self.well["STRT"].unit.upper() == "M" and |
||
| 448 | self.well["STOP"].unit.upper() == "M" and |
||
| 449 | self.well["STEP"].unit.upper() == "M" and |
||
| 450 | self.curves[0].unit.upper() == "M"): |
||
| 451 | self.index_unit = "M" |
||
| 452 | elif (self.well["STRT"].unit.upper() in ("F", "FT") and |
||
| 453 | self.well["STOP"].unit.upper() in ("F", "FT") and |
||
| 454 | self.well["STEP"].unit.upper() in ("F", "FT") and |
||
| 455 | self.curves[0].unit.upper() in ("F", "FT")): |
||
| 456 | self.index_unit = "FT" |
||
| 457 | |||
| 458 | self.refresh() |
||
| 459 | |||
| 460 | def refresh(self, use_pandas=None): |
||
| 461 | '''Refresh curve names and indices.''' |
||
| 462 | if not use_pandas is None: |
||
| 463 | self._use_pandas = use_pandas |
||
| 464 | |||
| 465 | # n = len(self.curves) |
||
| 466 | # for i, curve in enumerate(self.curves): |
||
| 467 | # self[curve.mnemonic] = curve.data |
||
| 468 | # self[i] = curve.data |
||
| 469 | # self[i - n] = curve.data |
||
| 470 | |||
| 471 | if not self._use_pandas is False: |
||
| 472 | try: |
||
| 473 | import pandas |
||
| 474 | except ImportError: |
||
| 475 | logger.info( |
||
| 476 | "pandas not installed - skipping LASFile.df creation") |
||
| 477 | self._use_pandas = False |
||
| 478 | |||
| 479 | if self._use_pandas: |
||
| 480 | self.df = pandas.DataFrame(self.data, columns=self.keys()) |
||
| 481 | self.df.set_index(self.curves[0].mnemonic, inplace=True) |
||
| 482 | |||
| 483 | @property |
||
| 484 | def data(self): |
||
| 485 | '''2D array of data from LAS file.''' |
||
| 486 | return numpy.vstack([c.data for c in self.curves]).T |
||
| 487 | |||
| 488 | def write(self, file_object, version=None, wrap=None, |
||
| 489 | STRT=None, STOP=None, STEP=None, fmt="%10.5g"): |
||
| 490 | '''Write to a file. |
||
| 491 | |||
| 492 | Arguments: |
||
| 493 | file_object: a file_like object opening for writing. |
||
| 494 | version (float): either 1.2 or 2 |
||
| 495 | wrap (bool): True, False, or None (last uses WRAP item in version) |
||
| 496 | STRT (float): optional override to automatic calculation using |
||
| 497 | the first index curve value. |
||
| 498 | STOP (float): optional override to automatic calculation using |
||
| 499 | the last index curve value. |
||
| 500 | STEP (float): optional override to automatic calculation using |
||
| 501 | the first step size in the index curve. |
||
| 502 | fmt (str): format string for numerical data being written to data |
||
| 503 | section. |
||
| 504 | |||
| 505 | Examples: |
||
| 506 | |||
| 507 | >>> with open("test_output.las", mode="w") as f: |
||
| 508 | ... lasfile_obj.write(f, 2.0) # <-- this method |
||
| 509 | |||
| 510 | ''' |
||
| 511 | if wrap is None: |
||
| 512 | wrap = self.version["WRAP"] == "YES" |
||
| 513 | elif wrap is True: |
||
| 514 | self.version["WRAP"] = HeaderItem( |
||
| 515 | "WRAP", "", "YES", "Multiple lines per depth step") |
||
| 516 | elif wrap is False: |
||
| 517 | self.version["WRAP"] = HeaderItem( |
||
| 518 | "WRAP", "", "NO", "One line per depth step") |
||
| 519 | lines = [] |
||
| 520 | |||
| 521 | assert version in (1.2, 2, None) |
||
| 522 | if version is None: |
||
| 523 | version = self.version["VERS"].value |
||
| 524 | if version == 1.2: |
||
| 525 | self.version["VERS"] = HeaderItem( |
||
| 526 | "VERS", "", 1.2, "CWLS LOG ASCII STANDARD - VERSION 1.2") |
||
| 527 | elif version == 2: |
||
| 528 | self.version["VERS"] = HeaderItem( |
||
| 529 | "VERS", "", 2.0, "CWLS log ASCII Standard -VERSION 2.0") |
||
| 530 | |||
| 531 | if STRT is None: |
||
| 532 | STRT = self.index[0] |
||
| 533 | if STOP is None: |
||
| 534 | STOP = self.index[-1] |
||
| 535 | if STEP is None: |
||
| 536 | STEP = self.index[1] - self.index[0] # Faster than numpy.gradient |
||
| 537 | self.well["STRT"].value = STRT |
||
| 538 | self.well["STOP"].value = STOP |
||
| 539 | self.well["STEP"].value = STEP |
||
| 540 | |||
| 541 | |||
| 542 | # Check for any changes in the pandas dataframe and if there are, |
||
| 543 | # create new curves so they are reflected in the output LAS file. |
||
| 544 | |||
| 545 | if self._use_pandas: |
||
| 546 | curve_names = lambda: [ci.mnemonic for ci in self.curves] |
||
| 547 | for df_curve_name in list(self.df.columns.values): |
||
| 548 | if not df_curve_name in curve_names(): |
||
| 549 | self.add_curve(df_curve_name, self.df[df_curve_name]) |
||
| 550 | |||
| 551 | # Write each section. |
||
| 552 | |||
| 553 | # ~Version |
||
| 554 | logger.debug('LASFile.write Version section') |
||
| 555 | lines.append("~Version ".ljust(60, "-")) |
||
| 556 | order_func = get_section_order_function("Version", version) |
||
| 557 | section_widths = get_section_widths("Version", self.version, version, order_func) |
||
| 558 | for header_item in self.version.values(): |
||
| 559 | mnemonic = header_item.original_mnemonic |
||
| 560 | # logger.debug("LASFile.write " + str(header_item)) |
||
| 561 | order = order_func(mnemonic) |
||
| 562 | # logger.debug("LASFile.write order = %s" % (order, )) |
||
| 563 | logger.debug('LASFile.write %s\norder=%s section_widths=%s' % (header_item, order, section_widths)) |
||
| 564 | formatter_func = get_formatter_function(order, **section_widths) |
||
| 565 | line = formatter_func(header_item) |
||
| 566 | lines.append(line) |
||
| 567 | |||
| 568 | # ~Well |
||
| 569 | logger.debug('LASFile.write Well section') |
||
| 570 | lines.append("~Well ".ljust(60, "-")) |
||
| 571 | order_func = get_section_order_function("Well", version) |
||
| 572 | section_widths = get_section_widths("Well", self.well, version, order_func) |
||
| 573 | # logger.debug('LASFile.write well section_widths=%s' % section_widths) |
||
| 574 | for header_item in self.well.values(): |
||
| 575 | mnemonic = header_item.original_mnemonic |
||
| 576 | order = order_func(mnemonic) |
||
| 577 | logger.debug('LASFile.write %s\norder=%s section_widths=%s' % (header_item, order, section_widths)) |
||
| 578 | formatter_func = get_formatter_function(order, **section_widths) |
||
| 579 | line = formatter_func(header_item) |
||
| 580 | lines.append(line) |
||
| 581 | |||
| 582 | # ~Curves |
||
| 583 | logger.debug('LASFile.write Curves section') |
||
| 584 | lines.append("~Curves ".ljust(60, "-")) |
||
| 585 | order_func = get_section_order_function("Curves", version) |
||
| 586 | section_widths = get_section_widths("Curves", self.curves, version, order_func) |
||
| 587 | for header_item in self.curves: |
||
| 588 | mnemonic = header_item.original_mnemonic |
||
| 589 | order = order_func(mnemonic) |
||
| 590 | formatter_func = get_formatter_function(order, **section_widths) |
||
| 591 | line = formatter_func(header_item) |
||
| 592 | lines.append(line) |
||
| 593 | |||
| 594 | # ~Params |
||
| 595 | lines.append("~Params ".ljust(60, "-")) |
||
| 596 | order_func = get_section_order_function("Parameter", version) |
||
| 597 | section_widths = get_section_widths("Parameter", self.params, version, order_func) |
||
| 598 | for header_item in self.params.values(): |
||
| 599 | mnemonic = header_item.original_mnemonic |
||
| 600 | order = order_func(mnemonic) |
||
| 601 | formatter_func = get_formatter_function(order, **section_widths) |
||
| 602 | line = formatter_func(header_item) |
||
| 603 | lines.append(line) |
||
| 604 | |||
| 605 | # ~Other |
||
| 606 | lines.append("~Other ".ljust(60, "-")) |
||
| 607 | lines += self.other.splitlines() |
||
| 608 | |||
| 609 | lines.append("~ASCII ".ljust(60, "-")) |
||
| 610 | |||
| 611 | file_object.write("\n".join(lines)) |
||
| 612 | file_object.write("\n") |
||
| 613 | |||
| 614 | data_arr = numpy.column_stack([c.data for c in self.curves]) |
||
| 615 | nrows, ncols = data_arr.shape |
||
| 616 | |||
| 617 | def format_data_section_line(n, fmt, l=10, spacer=" "): |
||
| 618 | if numpy.isnan(n): |
||
| 619 | return spacer + str(self.well["NULL"].value).rjust(l) |
||
| 620 | else: |
||
| 621 | return spacer + (fmt % n).rjust(l) |
||
| 622 | |||
| 623 | twrapper = textwrap.TextWrapper(width=79) |
||
| 624 | for i in range(nrows): |
||
| 625 | depth_slice = '' |
||
| 626 | for j in range(ncols): |
||
| 627 | depth_slice += format_data_section_line(data_arr[i, j], fmt) |
||
| 628 | |||
| 629 | if wrap: |
||
| 630 | lines = twrapper.wrap(depth_slice) |
||
| 631 | logger.debug("LASFile.write Wrapped %d lines out of %s" % |
||
| 632 | (len(lines), depth_slice)) |
||
| 633 | else: |
||
| 634 | lines = [depth_slice] |
||
| 635 | |||
| 636 | if self.version["VERS"].value == 1.2: |
||
| 637 | for line in lines: |
||
| 638 | if len(line) > 255: |
||
| 639 | logger.warning("LASFile.write Data line > 256 chars: %s" % line) |
||
| 640 | |||
| 641 | for line in lines: |
||
| 642 | file_object.write(line + "\n") |
||
| 643 | |||
| 644 | def get_curve(self, mnemonic): |
||
| 645 | '''Return Curve object. |
||
| 646 | |||
| 647 | Arguments: |
||
| 648 | mnemonic (str): the name of the curve |
||
| 649 | |||
| 650 | Returns: |
||
| 651 | A Curve object, not just the data array. |
||
| 652 | |||
| 653 | ''' |
||
| 654 | for curve in self.curves: |
||
| 655 | if curve.mnemonic == mnemonic: |
||
| 656 | return curve |
||
| 657 | |||
| 658 | # def __getattr__(self, key): |
||
| 659 | # # if hasattr(self, 'sections'): |
||
| 660 | # # if key in self.sections['Curves']: |
||
| 661 | # # return self[key] |
||
| 662 | # # else: |
||
| 663 | # # raise AttributeError |
||
| 664 | # pass |
||
| 665 | |||
| 666 | def __getitem__(self, key): |
||
| 667 | if isinstance(key, int): |
||
| 668 | return self.curves[key].data |
||
| 669 | elif isinstance(key, str): |
||
| 670 | if key in self.keys(): |
||
| 671 | return self.curves[key].data |
||
| 672 | else: |
||
| 673 | super(LASFile, self).__getitem__(key) |
||
| 674 | |||
| 675 | # def __setattr__(self, key, value): |
||
| 676 | # assert NotImplementedError('not yet') |
||
| 677 | |||
| 678 | def __setitem__(self, key, value): |
||
| 679 | assert NotImplementedError('not yet') |
||
| 680 | |||
| 681 | def keys(self): |
||
| 682 | return [c.mnemonic for c in self.curves] |
||
| 683 | |||
| 684 | def values(self): |
||
| 685 | return [c.data for c in self.curves] |
||
| 686 | |||
| 687 | def items(self): |
||
| 688 | return [(c.mnemonic, c.data) for c in self.curves] |
||
| 689 | |||
| 690 | def iterkeys(self): |
||
| 691 | return iter(list(self.keys())) |
||
| 692 | |||
| 693 | def itervalues(self): |
||
| 694 | return iter(list(self.values())) |
||
| 695 | |||
| 696 | def iteritems(self): |
||
| 697 | return iter(list(self.items())) |
||
| 698 | |||
| 699 | @property |
||
| 700 | def version(self): |
||
| 701 | return self.sections["Version"] |
||
| 702 | |||
| 703 | @version.setter |
||
| 704 | def version(self, section): |
||
| 705 | self.sections["Version"] = section |
||
| 706 | |||
| 707 | @property |
||
| 708 | def well(self): |
||
| 709 | return self.sections["Well"] |
||
| 710 | |||
| 711 | @well.setter |
||
| 712 | def well(self, section): |
||
| 713 | self.sections["Well"] = section |
||
| 714 | |||
| 715 | @property |
||
| 716 | def curves(self): |
||
| 717 | return self.sections["Curves"] |
||
| 718 | |||
| 719 | @curves.setter |
||
| 720 | def curves(self, section): |
||
| 721 | self.sections["Curves"] = section |
||
| 722 | |||
| 723 | @property |
||
| 724 | def params(self): |
||
| 725 | return self.sections["Parameter"] |
||
| 726 | |||
| 727 | @params.setter |
||
| 728 | def params(self, section): |
||
| 729 | self.sections["Parameter"] = section |
||
| 730 | |||
| 731 | @property |
||
| 732 | def other(self): |
||
| 733 | return self.sections["Other"] |
||
| 734 | |||
| 735 | @other.setter |
||
| 736 | def other(self, section): |
||
| 737 | self.sections["Other"] = section |
||
| 738 | |||
| 739 | |||
| 740 | @property |
||
| 741 | def metadata(self): |
||
| 742 | s = SectionItems() |
||
| 743 | for section in self.sections: |
||
| 744 | for item in section: |
||
| 745 | s.append(item) |
||
| 746 | return s |
||
| 747 | |||
| 748 | @metadata.setter |
||
| 749 | def metadata(self, value): |
||
| 750 | raise Warning('Set values in the version/well/params attrs directly') |
||
| 751 | |||
| 752 | @property |
||
| 753 | def df(self): |
||
| 754 | if self._use_pandas: |
||
| 755 | return self._df |
||
| 756 | else: |
||
| 757 | logger.warning( |
||
| 758 | "pandas is not installed or use_pandas was set to False") |
||
| 759 | # raise Warning("pandas is not installed or use_pandas was set to False") |
||
| 760 | |||
| 761 | @df.setter |
||
| 762 | def df(self, value): |
||
| 763 | self._df = value |
||
| 764 | |||
| 765 | @property |
||
| 766 | def index(self): |
||
| 767 | return self.data[:, 0] |
||
| 768 | |||
| 769 | @property |
||
| 770 | def depth_m(self): |
||
| 771 | if self.index_unit == "M": |
||
| 772 | return self.index |
||
| 773 | elif self.index_unit == "FT": |
||
| 774 | return self.index * 0.3048 |
||
| 775 | else: |
||
| 776 | raise LASUnknownUnitError("Unit of depth index not known") |
||
| 777 | |||
| 778 | @property |
||
| 779 | def depth_ft(self): |
||
| 780 | if self.index_unit == "M": |
||
| 781 | return self.index / 0.3048 |
||
| 782 | elif self.index_unit == "FT": |
||
| 783 | return self.index |
||
| 784 | else: |
||
| 785 | raise LASUnknownUnitError("Unit of depth index not known") |
||
| 786 | |||
| 787 | def add_curve(self, mnemonic, data, unit="", descr="", value=""): |
||
| 788 | # assert not mnemonic in self.curves |
||
| 789 | curve = CurveItem(mnemonic, unit, value, descr) |
||
| 790 | curve.data = data |
||
| 791 | self.curves[mnemonic] = curve |
||
| 792 | self.refresh() |
||
| 793 | |||
| 794 | @property |
||
| 795 | def header(self): |
||
| 796 | return self.sections |
||
| 797 | |||
| 1270 |