Total Complexity | 99 |
Total Lines | 449 |
Duplicated Lines | 0 % |
Complex classes like lasio.LASFile often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | '''las.py - read Log ASCII Standard files |
||
348 | class LASFile(object): |
||
349 | |||
350 | '''LAS file object. |
||
351 | |||
352 | Keyword Arguments: |
||
353 | file_ref: either a filename, an open file object, or a string of |
||
354 | a LAS file contents. |
||
355 | encoding (str): character encoding to open file_ref with |
||
356 | encoding_errors (str): "strict", "replace" (default), "ignore" - how to |
||
357 | handle errors with encodings (see standard library codecs module or |
||
358 | Python Unicode HOWTO for more information) |
||
359 | autodetect_encoding (bool): use chardet/ccharet to detect encoding |
||
360 | autodetect_encoding_chars (int/None): number of chars to read from LAS |
||
361 | file for auto-detection of encoding. |
||
362 | |||
363 | ''' |
||
364 | def __init__(self, file_ref=None, **kwargs): |
||
365 | |||
366 | self._text = '' |
||
367 | self._use_pandas = "auto" |
||
368 | self.index_unit = None |
||
369 | self.sections = { |
||
370 | "Version": DEFAULT_ITEMS["Version"], |
||
371 | "Well": DEFAULT_ITEMS["Well"], |
||
372 | "Curves": DEFAULT_ITEMS["Curves"], |
||
373 | "Parameter": DEFAULT_ITEMS["Parameter"], |
||
374 | "Other": str(DEFAULT_ITEMS["Other"]), |
||
375 | } |
||
376 | |||
377 | if not (file_ref is None): |
||
378 | self.read(file_ref, **kwargs) |
||
379 | |||
380 | def read(self, file_ref, use_pandas="auto", null_subs=True, **kwargs): |
||
381 | '''Read a LAS file. |
||
382 | |||
383 | Arguments: |
||
384 | file_ref: either a filename, an open file object, or a string of |
||
385 | a LAS file contents. |
||
386 | |||
387 | Keyword Arguments: |
||
388 | use_pandas (str): bool or "auto" -- use pandas if available -- provide |
||
389 | False option for faster loading where pandas functionality is not |
||
390 | needed. "auto" becomes True if pandas is installed, and False if not. |
||
391 | encoding (str): character encoding to open file_ref with |
||
392 | encoding_errors (str): "strict", "replace" (default), "ignore" - how to |
||
393 | handle errors with encodings (see standard library codecs module or |
||
394 | Python Unicode HOWTO for more information) |
||
395 | autodetect_encoding (bool): use chardet/cchardet to detect encoding |
||
396 | autodetect_encoding_chars (int/None): number of chars to read from LAS |
||
397 | file for auto-detection of encoding. |
||
398 | |||
399 | ''' |
||
400 | if not use_pandas is None: |
||
401 | self._use_pandas = use_pandas |
||
402 | |||
403 | f = open_file(file_ref, **kwargs) |
||
404 | |||
405 | self._text = f.read() |
||
406 | logger.debug("LASFile.read LAS content is type %s" % type(self._text)) |
||
407 | |||
408 | reader = Reader(self._text, version=1.2) |
||
409 | self.sections["Version"] = reader.read_section('~V') |
||
410 | |||
411 | # Set version |
||
412 | try: |
||
413 | # raise Exception("%s %s" % (type(self.version['VERS']), self.version["VERS"])) |
||
414 | reader.version = self.version['VERS'].value |
||
415 | except KeyError: |
||
416 | raise KeyError("No key VERS in ~V section") |
||
417 | |||
418 | # Validate version |
||
419 | try: |
||
420 | assert reader.version in (1.2, 2) |
||
421 | except AssertionError: |
||
422 | logger.warning("LAS spec version is %s -- neither 1.2 nor 2" % |
||
423 | reader.version) |
||
424 | if reader.version < 2: |
||
425 | reader.version = 1.2 |
||
426 | else: |
||
427 | reader.version = 2 |
||
428 | reader.wrap = self.version['WRAP'].value == 'YES' |
||
429 | |||
430 | self.sections["Well"] = reader.read_section('~W') |
||
431 | self.sections["Curves"] = reader.read_section('~C') |
||
432 | try: |
||
433 | self.sections["Parameter"] = reader.read_section('~P') |
||
434 | except LASHeaderError: |
||
435 | logger.warning(traceback.format_exc().splitlines()[-1]) |
||
436 | self.sections["Other"] = reader.read_raw_text('~O') |
||
437 | |||
438 | # Set null value |
||
439 | reader.null = self.well['NULL'].value |
||
440 | |||
441 | data = reader.read_data(len(self.curves), null_subs=null_subs) |
||
442 | |||
443 | for i, c in enumerate(self.curves): |
||
444 | d = data[:, i] |
||
445 | c.data = d |
||
446 | |||
447 | if (self.well["STRT"].unit.upper() == "M" and |
||
448 | self.well["STOP"].unit.upper() == "M" and |
||
449 | self.well["STEP"].unit.upper() == "M" and |
||
450 | self.curves[0].unit.upper() == "M"): |
||
451 | self.index_unit = "M" |
||
452 | elif (self.well["STRT"].unit.upper() in ("F", "FT") and |
||
453 | self.well["STOP"].unit.upper() in ("F", "FT") and |
||
454 | self.well["STEP"].unit.upper() in ("F", "FT") and |
||
455 | self.curves[0].unit.upper() in ("F", "FT")): |
||
456 | self.index_unit = "FT" |
||
457 | |||
458 | self.refresh() |
||
459 | |||
460 | def refresh(self, use_pandas=None): |
||
461 | '''Refresh curve names and indices.''' |
||
462 | if not use_pandas is None: |
||
463 | self._use_pandas = use_pandas |
||
464 | |||
465 | # n = len(self.curves) |
||
466 | # for i, curve in enumerate(self.curves): |
||
467 | # self[curve.mnemonic] = curve.data |
||
468 | # self[i] = curve.data |
||
469 | # self[i - n] = curve.data |
||
470 | |||
471 | if not self._use_pandas is False: |
||
472 | try: |
||
473 | import pandas |
||
474 | except ImportError: |
||
475 | logger.info( |
||
476 | "pandas not installed - skipping LASFile.df creation") |
||
477 | self._use_pandas = False |
||
478 | |||
479 | if self._use_pandas: |
||
480 | self.df = pandas.DataFrame(self.data, columns=self.keys()) |
||
481 | self.df.set_index(self.curves[0].mnemonic, inplace=True) |
||
482 | |||
483 | @property |
||
484 | def data(self): |
||
485 | '''2D array of data from LAS file.''' |
||
486 | return numpy.vstack([c.data for c in self.curves]).T |
||
487 | |||
488 | def write(self, file_object, version=None, wrap=None, |
||
489 | STRT=None, STOP=None, STEP=None, fmt="%10.5g"): |
||
490 | '''Write to a file. |
||
491 | |||
492 | Arguments: |
||
493 | file_object: a file_like object opening for writing. |
||
494 | version (float): either 1.2 or 2 |
||
495 | wrap (bool): True, False, or None (last uses WRAP item in version) |
||
496 | STRT (float): optional override to automatic calculation using |
||
497 | the first index curve value. |
||
498 | STOP (float): optional override to automatic calculation using |
||
499 | the last index curve value. |
||
500 | STEP (float): optional override to automatic calculation using |
||
501 | the first step size in the index curve. |
||
502 | fmt (str): format string for numerical data being written to data |
||
503 | section. |
||
504 | |||
505 | Examples: |
||
506 | |||
507 | >>> with open("test_output.las", mode="w") as f: |
||
508 | ... lasfile_obj.write(f, 2.0) # <-- this method |
||
509 | |||
510 | ''' |
||
511 | if wrap is None: |
||
512 | wrap = self.version["WRAP"] == "YES" |
||
513 | elif wrap is True: |
||
514 | self.version["WRAP"] = HeaderItem( |
||
515 | "WRAP", "", "YES", "Multiple lines per depth step") |
||
516 | elif wrap is False: |
||
517 | self.version["WRAP"] = HeaderItem( |
||
518 | "WRAP", "", "NO", "One line per depth step") |
||
519 | lines = [] |
||
520 | |||
521 | assert version in (1.2, 2, None) |
||
522 | if version is None: |
||
523 | version = self.version["VERS"].value |
||
524 | if version == 1.2: |
||
525 | self.version["VERS"] = HeaderItem( |
||
526 | "VERS", "", 1.2, "CWLS LOG ASCII STANDARD - VERSION 1.2") |
||
527 | elif version == 2: |
||
528 | self.version["VERS"] = HeaderItem( |
||
529 | "VERS", "", 2.0, "CWLS log ASCII Standard -VERSION 2.0") |
||
530 | |||
531 | if STRT is None: |
||
532 | STRT = self.index[0] |
||
533 | if STOP is None: |
||
534 | STOP = self.index[-1] |
||
535 | if STEP is None: |
||
536 | STEP = self.index[1] - self.index[0] # Faster than numpy.gradient |
||
537 | self.well["STRT"].value = STRT |
||
538 | self.well["STOP"].value = STOP |
||
539 | self.well["STEP"].value = STEP |
||
540 | |||
541 | |||
542 | # Check for any changes in the pandas dataframe and if there are, |
||
543 | # create new curves so they are reflected in the output LAS file. |
||
544 | |||
545 | if self._use_pandas: |
||
546 | curve_names = lambda: [ci.mnemonic for ci in self.curves] |
||
547 | for df_curve_name in list(self.df.columns.values): |
||
548 | if not df_curve_name in curve_names(): |
||
549 | self.add_curve(df_curve_name, self.df[df_curve_name]) |
||
550 | |||
551 | # Write each section. |
||
552 | |||
553 | # ~Version |
||
554 | logger.debug('LASFile.write Version section') |
||
555 | lines.append("~Version ".ljust(60, "-")) |
||
556 | order_func = get_section_order_function("Version", version) |
||
557 | section_widths = get_section_widths("Version", self.version, version, order_func) |
||
558 | for header_item in self.version.values(): |
||
559 | mnemonic = header_item.original_mnemonic |
||
560 | # logger.debug("LASFile.write " + str(header_item)) |
||
561 | order = order_func(mnemonic) |
||
562 | # logger.debug("LASFile.write order = %s" % (order, )) |
||
563 | logger.debug('LASFile.write %s\norder=%s section_widths=%s' % (header_item, order, section_widths)) |
||
564 | formatter_func = get_formatter_function(order, **section_widths) |
||
565 | line = formatter_func(header_item) |
||
566 | lines.append(line) |
||
567 | |||
568 | # ~Well |
||
569 | logger.debug('LASFile.write Well section') |
||
570 | lines.append("~Well ".ljust(60, "-")) |
||
571 | order_func = get_section_order_function("Well", version) |
||
572 | section_widths = get_section_widths("Well", self.well, version, order_func) |
||
573 | # logger.debug('LASFile.write well section_widths=%s' % section_widths) |
||
574 | for header_item in self.well.values(): |
||
575 | mnemonic = header_item.original_mnemonic |
||
576 | order = order_func(mnemonic) |
||
577 | logger.debug('LASFile.write %s\norder=%s section_widths=%s' % (header_item, order, section_widths)) |
||
578 | formatter_func = get_formatter_function(order, **section_widths) |
||
579 | line = formatter_func(header_item) |
||
580 | lines.append(line) |
||
581 | |||
582 | # ~Curves |
||
583 | logger.debug('LASFile.write Curves section') |
||
584 | lines.append("~Curves ".ljust(60, "-")) |
||
585 | order_func = get_section_order_function("Curves", version) |
||
586 | section_widths = get_section_widths("Curves", self.curves, version, order_func) |
||
587 | for header_item in self.curves: |
||
588 | mnemonic = header_item.original_mnemonic |
||
589 | order = order_func(mnemonic) |
||
590 | formatter_func = get_formatter_function(order, **section_widths) |
||
591 | line = formatter_func(header_item) |
||
592 | lines.append(line) |
||
593 | |||
594 | # ~Params |
||
595 | lines.append("~Params ".ljust(60, "-")) |
||
596 | order_func = get_section_order_function("Parameter", version) |
||
597 | section_widths = get_section_widths("Parameter", self.params, version, order_func) |
||
598 | for header_item in self.params.values(): |
||
599 | mnemonic = header_item.original_mnemonic |
||
600 | order = order_func(mnemonic) |
||
601 | formatter_func = get_formatter_function(order, **section_widths) |
||
602 | line = formatter_func(header_item) |
||
603 | lines.append(line) |
||
604 | |||
605 | # ~Other |
||
606 | lines.append("~Other ".ljust(60, "-")) |
||
607 | lines += self.other.splitlines() |
||
608 | |||
609 | lines.append("~ASCII ".ljust(60, "-")) |
||
610 | |||
611 | file_object.write("\n".join(lines)) |
||
612 | file_object.write("\n") |
||
613 | |||
614 | data_arr = numpy.column_stack([c.data for c in self.curves]) |
||
615 | nrows, ncols = data_arr.shape |
||
616 | |||
617 | def format_data_section_line(n, fmt, l=10, spacer=" "): |
||
618 | if numpy.isnan(n): |
||
619 | return spacer + str(self.well["NULL"].value).rjust(l) |
||
620 | else: |
||
621 | return spacer + (fmt % n).rjust(l) |
||
622 | |||
623 | twrapper = textwrap.TextWrapper(width=79) |
||
624 | for i in range(nrows): |
||
625 | depth_slice = '' |
||
626 | for j in range(ncols): |
||
627 | depth_slice += format_data_section_line(data_arr[i, j], fmt) |
||
628 | |||
629 | if wrap: |
||
630 | lines = twrapper.wrap(depth_slice) |
||
631 | logger.debug("LASFile.write Wrapped %d lines out of %s" % |
||
632 | (len(lines), depth_slice)) |
||
633 | else: |
||
634 | lines = [depth_slice] |
||
635 | |||
636 | if self.version["VERS"].value == 1.2: |
||
637 | for line in lines: |
||
638 | if len(line) > 255: |
||
639 | logger.warning("LASFile.write Data line > 256 chars: %s" % line) |
||
640 | |||
641 | for line in lines: |
||
642 | file_object.write(line + "\n") |
||
643 | |||
644 | def get_curve(self, mnemonic): |
||
645 | '''Return Curve object. |
||
646 | |||
647 | Arguments: |
||
648 | mnemonic (str): the name of the curve |
||
649 | |||
650 | Returns: |
||
651 | A Curve object, not just the data array. |
||
652 | |||
653 | ''' |
||
654 | for curve in self.curves: |
||
655 | if curve.mnemonic == mnemonic: |
||
656 | return curve |
||
657 | |||
658 | # def __getattr__(self, key): |
||
659 | # # if hasattr(self, 'sections'): |
||
660 | # # if key in self.sections['Curves']: |
||
661 | # # return self[key] |
||
662 | # # else: |
||
663 | # # raise AttributeError |
||
664 | # pass |
||
665 | |||
666 | def __getitem__(self, key): |
||
667 | if isinstance(key, int): |
||
668 | return self.curves[key].data |
||
669 | elif isinstance(key, str): |
||
670 | if key in self.keys(): |
||
671 | return self.curves[key].data |
||
672 | else: |
||
673 | super(LASFile, self).__getitem__(key) |
||
674 | |||
675 | # def __setattr__(self, key, value): |
||
676 | # assert NotImplementedError('not yet') |
||
677 | |||
678 | def __setitem__(self, key, value): |
||
679 | assert NotImplementedError('not yet') |
||
680 | |||
681 | def keys(self): |
||
682 | return [c.mnemonic for c in self.curves] |
||
683 | |||
684 | def values(self): |
||
685 | return [c.data for c in self.curves] |
||
686 | |||
687 | def items(self): |
||
688 | return [(c.mnemonic, c.data) for c in self.curves] |
||
689 | |||
690 | def iterkeys(self): |
||
691 | return iter(list(self.keys())) |
||
692 | |||
693 | def itervalues(self): |
||
694 | return iter(list(self.values())) |
||
695 | |||
696 | def iteritems(self): |
||
697 | return iter(list(self.items())) |
||
698 | |||
699 | @property |
||
700 | def version(self): |
||
701 | return self.sections["Version"] |
||
702 | |||
703 | @version.setter |
||
704 | def version(self, section): |
||
705 | self.sections["Version"] = section |
||
706 | |||
707 | @property |
||
708 | def well(self): |
||
709 | return self.sections["Well"] |
||
710 | |||
711 | @well.setter |
||
712 | def well(self, section): |
||
713 | self.sections["Well"] = section |
||
714 | |||
715 | @property |
||
716 | def curves(self): |
||
717 | return self.sections["Curves"] |
||
718 | |||
719 | @curves.setter |
||
720 | def curves(self, section): |
||
721 | self.sections["Curves"] = section |
||
722 | |||
723 | @property |
||
724 | def params(self): |
||
725 | return self.sections["Parameter"] |
||
726 | |||
727 | @params.setter |
||
728 | def params(self, section): |
||
729 | self.sections["Parameter"] = section |
||
730 | |||
731 | @property |
||
732 | def other(self): |
||
733 | return self.sections["Other"] |
||
734 | |||
735 | @other.setter |
||
736 | def other(self, section): |
||
737 | self.sections["Other"] = section |
||
738 | |||
739 | |||
740 | @property |
||
741 | def metadata(self): |
||
742 | s = SectionItems() |
||
743 | for section in self.sections: |
||
744 | for item in section: |
||
745 | s.append(item) |
||
746 | return s |
||
747 | |||
748 | @metadata.setter |
||
749 | def metadata(self, value): |
||
750 | raise Warning('Set values in the version/well/params attrs directly') |
||
751 | |||
752 | @property |
||
753 | def df(self): |
||
754 | if self._use_pandas: |
||
755 | return self._df |
||
756 | else: |
||
757 | logger.warning( |
||
758 | "pandas is not installed or use_pandas was set to False") |
||
759 | # raise Warning("pandas is not installed or use_pandas was set to False") |
||
760 | |||
761 | @df.setter |
||
762 | def df(self, value): |
||
763 | self._df = value |
||
764 | |||
765 | @property |
||
766 | def index(self): |
||
767 | return self.data[:, 0] |
||
768 | |||
769 | @property |
||
770 | def depth_m(self): |
||
771 | if self.index_unit == "M": |
||
772 | return self.index |
||
773 | elif self.index_unit == "FT": |
||
774 | return self.index * 0.3048 |
||
775 | else: |
||
776 | raise LASUnknownUnitError("Unit of depth index not known") |
||
777 | |||
778 | @property |
||
779 | def depth_ft(self): |
||
780 | if self.index_unit == "M": |
||
781 | return self.index / 0.3048 |
||
782 | elif self.index_unit == "FT": |
||
783 | return self.index |
||
784 | else: |
||
785 | raise LASUnknownUnitError("Unit of depth index not known") |
||
786 | |||
787 | def add_curve(self, mnemonic, data, unit="", descr="", value=""): |
||
788 | # assert not mnemonic in self.curves |
||
789 | curve = CurveItem(mnemonic, unit, value, descr) |
||
790 | curve.data = data |
||
791 | self.curves[mnemonic] = curve |
||
792 | self.refresh() |
||
793 | |||
794 | @property |
||
795 | def header(self): |
||
796 | return self.sections |
||
797 | |||
1270 |