Passed
Push — master ( cdc1b3...61816f )
by Kent
01:23 queued 41s
created

LASFile.data()   A

Complexity

Conditions 1

Size

Total Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
c 0
b 0
f 0
dl 0
loc 3
rs 10
1
from __future__ import print_function
2
3
# Standard library packages
4
import codecs
5
import csv
6
import json
7
import logging
8
import os
9
import re
10
import textwrap
11
import traceback
12
13
# get basestring in py3
14
15
try:
16
    unicode = unicode
17
except NameError:
18
    # 'unicode' is undefined, must be Python 3
19
    unicode = str
20
    basestring = (str, bytes)
21
else:
22
    # 'unicode' exists, must be Python 2
23
    bytes = str
24
    basestring = basestring
25
26
# Required third-party packages available on PyPi:
27
28
import numpy as np
29
30
# internal lasio imports
31
32
from . import exceptions
33
from .las_items import (
34
    HeaderItem, CurveItem, SectionItems, OrderedDict)
35
from . import defaults
36
from . import reader
37
from . import writer
38
39
logger = logging.getLogger(__name__)
40
41
42
class LASFile(object):
43
44
    '''LAS file object.
45
46
    Keyword Arguments:
47
        file_ref (file-like object, str): either a filename, an open file 
48
            object, or a string containing the contents of a file.
49
50
    See these routines for additional keyword arguments you can use when
51
    reading in a LAS file:
52
53
    * :func:`lasio.reader.open_with_codecs` - manage issues relate to character
54
      encodings
55
    * :meth:`lasio.las.LASFile.read` - control how NULL values and errors are
56
      handled during parsing
57
58
    Attributes:
59
        encoding (str or None): the character encoding used when reading the
60
            file in from disk
61
62
    '''
63
64
    def __init__(self, file_ref=None, **read_kwargs):
65
        super(LASFile, self).__init__()
66
        self._text = ''
67
        self.index_unit = None
68
        default_items = defaults.get_default_items()
69
        self.sections = {
70
            'Version': default_items['Version'],
71
            'Well': default_items['Well'],
72
            'Curves': default_items['Curves'],
73
            'Parameter': default_items['Parameter'],
74
            'Other': str(default_items['Other']),
75
        }
76
77
        if not (file_ref is None):
78
            self.read(file_ref, **read_kwargs)
79
80
    def read(self, file_ref, 
81
             ignore_data=False, read_policy='default', null_policy='common',
82
             ignore_header_errors=False, mnemonic_case='upper', 
83
             **kwargs):
84
        '''Read a LAS file.
85
86
        Arguments:
87
            file_ref (file-like object, str): either a filename, an open file 
88
                object, or a string containing the contents of a file.
89
90
        Keyword Arguments:
91
            null_subs (bool): if True, replace invalid values with np.nan
92
            ignore_data (bool): if True, do not read in any of the actual data, 
93
                just the header metadata. False by default.
94
            ignore_header_errors (bool): ignore LASHeaderErrors (False by 
95
                default)
96
            mnemonic_case (str): 'preserve': keep the case of HeaderItem mnemonics
97
                                 'upper': convert all HeaderItem mnemonics to uppercase
98
                                 'lower': convert all HeaderItem mnemonics to lowercase
99
100
        See :func:`lasio.reader.open_with_codecs` for additional keyword
101
        arguments which help to manage issues relate to character encodings.
102
103
        '''
104
105
        file_obj, self.encoding = reader.open_file(file_ref, **kwargs)
106
107
        regexp_subs, value_null_subs, version_NULL = reader.get_substitutions(
108
            read_policy, null_policy)
109
110
        self.raw_sections = reader.read_file_contents(
111
            file_obj, regexp_subs, value_null_subs, ignore_data=ignore_data, )
112
113
        if len(self.raw_sections) == 0:
114
            raise KeyError('No ~ sections found. Is this a LAS file?')
115
        
116
        if hasattr(file_obj, "close"):
117
            file_obj.close()
118
119
        def add_section(pattern, name, **sect_kws):
120
            raw_section = self.match_raw_section(pattern)
121
            drop = []
122
            if raw_section:
123
                self.sections[name] = reader.parse_header_section(raw_section, 
124
                                                                  **sect_kws)
125
                drop.append(raw_section["title"])
126
            else:
127
                logger.warning("Header section %s regexp=%s was not found."
128
                               % (name, pattern))
129
            for key in drop:
130
                self.raw_sections.pop(key)
131
132
        add_section("~V", "Version", version=1.2, 
133
                    ignore_header_errors=ignore_header_errors,
134
                    mnemonic_case=mnemonic_case)
135
136
        # Establish version and wrap values if possible.
137
138
        try:
139
            version = self.version['VERS'].value
140
        except KeyError:
141
            logger.warning('VERS item not found in the ~V section.')
142
            version = None
143
144
        try:
145
            wrap = self.version['WRAP'].value
146
        except KeyError:
147
            logger.warning('WRAP item not found in the ~V section')
148
            wrap = None
149
150
        # Validate version.
151
        #
152
        # If VERS was missing and version = None, then the file will be read in
153
        # as if version were 2.0. But there will be no VERS HeaderItem, meaning
154
        # that las.write(..., version=None) will fail with a KeyError. But
155
        # las.write(..., version=1.2) will work because a new VERS HeaderItem
156
        # will be created.
157
158
        try:
159
            assert version in (1.2, 2, None)
160
        except AssertionError:
161
            if version < 2:
162
                version = 1.2
163
            else:
164
                version = 2
165
        else:
166
            if version is None:
167
                logger.info('Assuming that LAS VERS is 2.0')
168
                version = 2
169
170
        add_section("~W", "Well", version=version, 
171
                    ignore_header_errors=ignore_header_errors,
172
                    mnemonic_case=mnemonic_case)
173
174
        # Establish NULL value if possible.
175
176
        try:
177
            null = self.well['NULL'].value
178
        except KeyError:
179
            logger.warning('NULL item not found in the ~W section')
180
            null = None
181
182
        add_section("~C", "Curves", version=version, 
183
                    ignore_header_errors=ignore_header_errors,
184
                    mnemonic_case=mnemonic_case)
185
        add_section("~P", "Parameter", version=version, 
186
                    ignore_header_errors=ignore_header_errors,
187
                    mnemonic_case=mnemonic_case)
188
        s = self.match_raw_section("~O")
189
190
        drop = []
191
        if s:
192
            self.sections["Other"] = "\n".join(s["lines"])
193
            drop.append(s["title"])
194
        for key in drop:
195
            self.raw_sections.pop(key)
196
197
        # Deal with nonstandard sections that some operators and/or
198
        # service companies (eg IHS) insist on adding.
199
        drop = []
200
        for s in self.raw_sections.values():
201
            if s["section_type"] == "header":
202
                logger.warning('Found nonstandard LAS section: ' + s["title"])
203
                self.sections[s["title"][1:]] = "\n".join(s["lines"])
204
                drop.append(s["title"])
205
        for key in drop:
206
            self.raw_sections.pop(key)
207
208
        if not ignore_data:
209
            drop = []
210
            s = self.match_raw_section("~A")
211
            s_valid = True
212
            if s is None:
213
                logger.warning("No data section (regexp='~A') found")
214
                s_valid = False
215
            try:
216
                if s['ncols'] is None:
217
                    logger.warning('No numerical data found inside ~A section')
218
                    s_valid = False
219
            except:
220
                pass
221
222
            if s_valid:
223
                arr = s["array"]
224
                logger.debug('~A data.shape {}'.format(arr.shape))
225
                if version_NULL:
226
                    arr[arr == null] = np.nan
227
                logger.debug('~A after NULL replacement data.shape {}'.format(arr.shape))
228
229
                n_curves = len(self.curves)
230
                n_arr_cols = len(self.curves) # provisional pending below check
231
                logger.debug("n_curves=%d ncols=%d" % (n_curves, s["ncols"]))
232
                if wrap == "NO":
233
                    if s["ncols"] > n_curves:
234
                        n_arr_cols = s["ncols"]
235
                data = np.reshape(arr, (-1, n_arr_cols))
236
237
                self.set_data(data, truncate=False)
238
                drop.append(s["title"])
239
            for key in drop:
240
                self.raw_sections.pop(key)
241
242
        check_units_on = []
243
        for mnemonic in ('STRT', 'STOP', 'STEP'):
244
            if mnemonic in self.well:
245
                check_units_on.append(self.well[mnemonic])
246
        if len(self.curves) > 0:
247
            check_units_on.append(self.curves[0])
248
        for index_unit, possibilities in defaults.DEPTH_UNITS.items():
249
            if all(i.unit.upper() in possibilities for i in check_units_on):
250
                self.index_unit = index_unit
251
252
    def write(self, file_ref, **kwargs):
253
        '''Write LAS file to disk.
254
255
        Arguments:
256
            file_ref (open file-like object or str): a file-like object opening
257
                for writing, or a filename.
258
    
259
        All ``**kwargs`` are passed to :func:`lasio.writer.write` -- please
260
        check the docstring of that function for more keyword arguments you can
261
        use here!
262
263
        Examples:
264
265
            >>> with open('test_output.las', mode='w') as f:
266
            ...     lasfile_obj.write(f, version=2.0)   # <-- this method
267
268
        '''
269
        opened_file = False
270
        if isinstance(file_ref, basestring) and not hasattr(file_ref, "write"):
271
            opened_file = True
272
            file_ref = open(file_ref, "w")
273
        writer.write(self, file_ref, **kwargs)
274
        if opened_file:
275
            file_ref.close()
276
277
    def to_excel(self, filename):
278
        '''Export LAS file to a Microsoft Excel workbook.
279
280
        This function will raise an :exc:`ImportError` if ``openpyxl`` is not
281
        installed.
282
283
        Arguments:
284
            filename (str)
285
286
        '''
287
        from . import excel
288
        converter = excel.ExcelConverter(self)
289
        converter.write(filename)
290
291
    def to_csv(self, file_ref, mnemonics=True, units=True, units_loc='line', **kwargs):
292
        '''Export to a CSV file.
293
294
        Arguments:
295
            file_ref (open file-like object or str): a file-like object opening
296
                for writing, or a filename.
297
298
        Keyword Arguments:
299
            mnemonics (list, True, False): write mnemonics as a header line at the
300
                start. If list, use the supplied items as mnemonics. If True,
301
                use the curve mnemonics.
302
            units (list, True, False): as for mnemonics.
303
            units_loc (str or None): either 'line', '[]' or '()'. 'line' will put
304
                units on the line following the mnemonics (good for WellCAD). 
305
                '[]' and '()' will put the units in either brackets or 
306
                parentheses following the mnemonics, on the single header line
307
                (better for Excel)
308
            **kwargs: passed to :class:`csv.writer`. Note that if
309
                ``lineterminator`` is **not** specified here, then it will be
310
                sent to :class:`csv.writer` as ``lineterminator='\\n'``.
311
312
        '''
313
        opened_file = False
314
        if isinstance(file_ref, basestring) and not hasattr(file_ref, "write"):
315
            opened_file = True
316
            file_ref = open(file_ref, "w")
317
318
        if not 'lineterminator' in kwargs:
319
            kwargs['lineterminator'] = '\n'
320
        writer = csv.writer(file_ref, **kwargs)
321
        
322
        if mnemonics is True:
323
            mnemonics = [c.original_mnemonic for c in self.curves]
324
        if units is True:
325
            units = [c.unit for c in self.curves]
326
327
        if mnemonics:
328
            if units_loc in ('()', '[]') and units:
329
                mnemonics = [
330
                    m + ' ' + units_loc[0] + u + units_loc[1] 
331
                    for m, u in zip(mnemonics, units)]
332
            writer.writerow(mnemonics)
333
        if units:
334
            if units_loc == 'line':
335
                writer.writerow(units)
336
337
        for i in range(self.data.shape[0]):
338
            writer.writerow(self.data[i, :])
339
        
340
        if opened_file:
341
            file_ref.close()
342
343
    def match_raw_section(self, pattern, re_func="match", flags=re.IGNORECASE):
344
        '''Find raw section with a regular expression.
345
346
        Arguments:
347
            pattern (str): regular expression (you need to include the tilde)
348
349
        Keyword Arguments:
350
            re_func (str): either "match" or "search", see python ``re`` module.
351
            flags (int): flags for :func:`re.compile`
352
353
        Returns:
354
            dict
355
356
        Intended for internal use only.
357
358
        '''
359
        for title in self.raw_sections.keys():
360
            title = title.strip()
361
            p = re.compile(pattern, flags=flags)
362
            if re_func == "match":
363
                re_func = re.match
364
            elif re_func == "search":
365
                re_func == re.search
366
            m = re_func(p, title)
367
            if m:
368
                return self.raw_sections[title]
369
370
    def get_curve(self, mnemonic):
371
        '''Return CurveItem object.
372
373
        Arguments:
374
            mnemonic (str): the name of the curve
375
376
        Returns:
377
            :class:`lasio.las_items.CurveItem` (not just the data array)
378
379
        '''
380
        for curve in self.curves:
381
            if curve.mnemonic == mnemonic:
382
                return curve
383
384
    def __getitem__(self, key):
385
        '''Provide access to curve data.
386
387
        Arguments:
388
            key (str, int): either a curve mnemonic or the column index.
389
390
        Returns:
391
            1D :class:`numpy.ndarray` (the data for the curve)
392
393
        '''
394
        #TODO: If I implement 2D arrays, need to check here for :1 :2 :3 etc.
395
        curve_mnemonics = [c.mnemonic for c in self.curves]
396
        if isinstance(key, int):
397
            return self.curves[key].data
398
        elif key in curve_mnemonics:
399
            return self.curves[key].data
400
        else:
401
            raise KeyError('{} not found in curves ({})'.format(key, curve_mnemonics))
402
403
    def __setitem__(self, key, value):
404
        '''Not implemented.
405
406
        It is not possible yet to set curve data via the LASFile's item
407
        access shortcut.
408
409
        '''
410
        assert NotImplementedError('not yet')
411
412
    def keys(self):
413
        '''Return curve mnemonics.'''
414
        return [c.mnemonic for c in self.curves]
415
416
    def values(self):
417
        '''Return data for each curve.'''
418
        return [c.data for c in self.curves]
419
420
    def items(self):
421
        '''Return mnemonics and data for all curves.'''
422
        return [(c.mnemonic, c.data) for c in self.curves]
423
424
    def iterkeys(self):
425
        return iter(list(self.keys()))
426
427
    def itervalues(self):
428
        return iter(list(self.values()))
429
430
    def iteritems(self):
431
        return iter(list(self.items()))
432
433
    @property
434
    def version(self):
435
        '''Header information from the Version (~V) section.
436
437
        Returns:
438
            :class:`lasio.las_items.SectionItems` object.
439
440
        '''
441
        return self.sections['Version']
442
443
    @version.setter
444
    def version(self, section):
445
        self.sections['Version'] = section
446
447
    @property
448
    def well(self):
449
        '''Header information from the Well (~W) section.
450
451
        Returns:
452
            :class:`lasio.las_items.SectionItems` object.
453
454
        '''
455
        return self.sections['Well']
456
457
    @well.setter
458
    def well(self, section):
459
        self.sections['Well'] = section
460
461
    @property
462
    def curves(self):
463
        '''Curve information and data from the Curves (~C) and data section..
464
465
        Returns:
466
            :class:`lasio.las_items.SectionItems` object.
467
468
        '''
469
        return self.sections['Curves']
470
471
    @curves.setter
472
    def curves(self, section):
473
        self.sections['Curves'] = section
474
475
    @property
476
    def curvesdict(self):
477
        '''Curve information and data from the Curves (~C) and data section..
478
479
        Returns:
480
            dict
481
482
        '''
483
        d = {}
484
        for curve in self.curves:
485
            d[curve['mnemonic']] = curve
486
        return d
487
488
    @property
489
    def params(self):
490
        '''Header information from the Parameter (~P) section.
491
492
        Returns:
493
            :class:`lasio.las_items.SectionItems` object.
494
495
        '''
496
        return self.sections['Parameter']
497
498
    @params.setter
499
    def params(self, section):
500
        self.sections['Parameter'] = section
501
502
    @property
503
    def other(self):
504
        '''Header information from the Other (~O) section.
505
506
        Returns:
507
            str
508
509
        '''
510
        return self.sections['Other']
511
512
    @other.setter
513
    def other(self, section):
514
        self.sections['Other'] = section
515
516
    @property
517
    def metadata(self):
518
        '''All header information joined together.
519
520
        Returns:
521
            :class:`lasio.las_items.SectionItems` object.
522
523
        '''
524
        s = SectionItems()
525
        for section in self.sections:
526
            for item in section:
527
                s.append(item)
528
        return s
529
530
    @metadata.setter
531
    def metadata(self, value):
532
        raise NotImplementedError('Set values in the section directly')
533
534
    @property
535
    def header(self):
536
        '''All header information
537
538
        Returns:
539
            dict
540
541
        '''
542
        return self.sections
543
544
    def df(self):
545
        '''Return data as a :class:`pandas.DataFrame` structure.'''
546
        import pandas as pd
547
        df = pd.DataFrame(self.data, columns=[c.mnemonic for c in self.curves])
548
        if len(self.curves) > 0:
549
            df = df.set_index(self.curves[0].mnemonic)
550
        return df
551
552
    @property
553
    def data(self):
554
        return np.vstack([c.data for c in self.curves]).T
555
556
    @data.setter
557
    def data(self, value):
558
        return self.set_data(value)
559
560
    def set_data(self, array_like, names=None, truncate=False):
561
        '''Set the data for the LAS; actually sets data on individual curves.
562
563
        Arguments:
564
            array_like (array_like or :class:`pandas.DataFrame`): 2-D data array
565
566
        Keyword Arguments:
567
            names (list, optional): used to replace the names of the existing
568
                :class:`lasio.las_items.CurveItem` objects.
569
            truncate (bool): remove any columns which are not included in the
570
                Curves (~C) section.
571
572
        Note: you can pass a :class:`pandas.DataFrame` to this method.
573
574
        '''
575
        try:
576
            import pandas as pd
577
        except ImportError:
578
            pass
579
        else:
580
            if isinstance(array_like, pd.DataFrame):
581
                return self.set_data_from_df(
582
                    array_like, **dict(names=names, truncate=False))
583
        data = np.asarray(array_like)
584
585
        # Truncate data array if necessary.
586
        if truncate:
587
            data = data[:, len(self.curves)]
588
589
        # Extend curves list if necessary.
590
        while data.shape[1] > len(self.curves):
591
            self.curves.append(CurveItem(''))
592
593
        if not names:
594
            names = [c.original_mnemonic for c in self.curves]
595
        else:
596
            # Extend names list if necessary.
597
            while len(self.curves) > len(names):
598
                names.append('')
599
        logger.debug('set_data. names to use: {}'.format(names))
600
601
        for i, curve in enumerate(self.curves):
602
            curve.mnemonic = names[i]
603
            curve.data = data[:, i]
604
            
605
        self.curves.assign_duplicate_suffixes()
606
607
    def set_data_from_df(self, df, **kwargs):
608
        '''Set the LAS file data from a :class:`pandas.DataFrame`.
609
610
        Arguments:
611
            df (pandas.DataFrame): curve mnemonics are the column names.
612
613
        Keyword arguments are passed to :meth:`lasio.las.LASFile.set_data`.
614
615
        '''
616
        df_values = np.vstack([df.index.values, df.values.T]).T
617
        if (not 'names' in kwargs) or (not kwargs['names']):
618
            kwargs['names'] = [df.index.name] + [str(name) for name in df.columns.values]
619
        self.set_data(df_values, **kwargs)
620
621
    @property
622
    def index(self):
623
        '''Return data from the first column of the LAS file data (depth/time).
624
625
        '''
626
        return self.curves[0].data
627
628
    @property
629
    def depth_m(self):
630
        '''Return the index as metres.'''
631
        if self.index_unit == 'M':
632
            return self.index
633
        elif self.index_unit == 'FT':
634
            return self.index * 0.3048
635
        else:
636
            raise exceptions.LASUnknownUnitError(
637
                'Unit of depth index not known')
638
639
    @property
640
    def depth_ft(self):
641
        '''Return the index as feet.'''
642
        if self.index_unit == 'M':
643
            return self.index / 0.3048
644
        elif self.index_unit == 'FT':
645
            return self.index
646
        else:
647
            raise exceptions.LASUnknownUnitError(
648
                'Unit of depth index not known')
649
650
    def add_curve_raw(self, mnemonic, data, unit='', descr='', value=''):
651
        '''Deprecated. Use append_curve_item() or insert_curve_item() instead.'''
652
        return self.append_curve_item(self, mnemonic, data, unit, descr, value)
653
654
    def append_curve_item(self, curve_item):
655
        '''Add a CurveItem.
656
657
        Args:
658
            curve_item (lasio.CurveItem)
659
660
        '''
661
        self.insert_curve_item(len(self.curves), curve_item)
662
663
    def insert_curve_item(self, ix, curve_item):
664
        '''Insert a CurveItem.
665
666
        Args:
667
            ix (int): position to insert CurveItem i.e. 0 for start
668
            curve_item (lasio.CurveItem)
669
670
        '''
671
        assert isinstance(curve_item, CurveItem)
672
        self.curves.insert(ix, curve_item)
673
674
    def add_curve(self, *args, **kwargs):
675
        '''Deprecated. Use append_curve() or insert_curve() instead.'''
676
        return self.append_curve(*args, **kwargs)
677
678
    def append_curve(self, mnemonic, data, unit='', descr='', value=''):
679
        '''Add a curve.
680
681
        Arguments:
682
            mnemonic (str): the curve mnemonic
683
            data (1D ndarray): the curve data
684
685
        Keyword Arguments:
686
            unit (str): curve unit
687
            descr (str): curve description
688
            value (int/float/str): value e.g. API code.
689
690
        '''
691
        return self.insert_curve(len(self.curves), mnemonic, data, unit, descr, value)
692
693
    def insert_curve(self, ix, mnemonic, data, unit='', descr='', value=''):
694
        '''Insert a curve.
695
696
        Arguments:
697
            ix (int): position to insert curve at i.e. 0 for start.
698
            mnemonic (str): the curve mnemonic
699
            data (1D ndarray): the curve data
700
701
        Keyword Arguments:
702
            unit (str): curve unit
703
            descr (str): curve description
704
            value (int/float/str): value e.g. API code.
705
706
        '''
707
        curve = CurveItem(mnemonic, unit, value, descr, data)
708
        self.insert_curve_item(ix, curve)
709
710
    def delete_curve(self, mnemonic=None, ix=None):
711
        '''Delete a curve.
712
713
        Keyword Arguments:
714
            ix (int): index of curve in LASFile.curves.
715
            mnemonic (str): mnemonic of curve.
716
717
        The index takes precedence over the mnemonic.
718
719
        '''
720
        if ix is None:
721
            ix = self.curves.keys().index(mnemonic)
722
        self.curves.pop(ix)
723
724
    @property
725
    def json(self):
726
        '''Return object contents as a JSON string.'''
727
        obj = OrderedDict()
728
        for name, section in self.sections.items():
729
            try:
730
                obj[name] = section.json
731
            except AttributeError:
732
                obj[name] = json.dumps(section)
733
        return json.dumps(obj)
734
735
    @json.setter
736
    def json(self, value):
737
        raise Exception('Cannot set objects from JSON')
738
739
740
741
class Las(LASFile):
742
743
    '''LAS file object.
744
745
    Retained for backwards compatibility.
746
747
    '''
748
    pass
749
750
751
class JSONEncoder(json.JSONEncoder):
752
753
    def default(self, obj):
754
        if isinstance(obj, LASFile):
755
            d = {'metadata': {},
756
                 'data': {}}
757
            for name, section in obj.sections.items():
758
                if isinstance(section, basestring):
759
                    d['metadata'][name] = section
760
                else:
761
                    d['metadata'][name] = []
762
                    for item in section:
763
                        d['metadata'][name].append(dict(item))
764
            for curve in obj.curves:
765
                d['data'][curve.mnemonic] = list(curve.data)
766
            return d
767