Completed
Pull Request — master (#118)
by Kent
04:21
created

lasio.Reader.read_section()   A

Complexity

Conditions 4

Size

Total Lines 13

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 4
dl 0
loc 13
rs 9.2
1
'''las.py - read Log ASCII Standard files
2
3
See README.rst and LICENSE for more information.
4
5
'''
6
from __future__ import print_function
7
8
# Standard library packages
9
import codecs
10
import json
11
import logging
12
import os
13
import re
14
import textwrap
15
import traceback
16
17
# The standard library OrderedDict was introduced in Python 2.7 so
18
# we have a third-party option to support Python 2.6
19
20
try:
21
    from collections import OrderedDict
22
except ImportError:
23
    from ordereddict import OrderedDict
24
25
# Convoluted import for StringIO in order to support:
26
#
27
# - Python 3 - io.StringIO
28
# - Python 2 (optimized) - cStringIO.StringIO
29
# - Python 2 (all) - StringIO.StringIO
30
31
try:
32
    import cStringIO as StringIO
33
except ImportError:
34
    try:  # cStringIO not available on this system
35
        import StringIO
36
    except ImportError:  # Python 3
37
        from io import StringIO
38
    else:
39
        from StringIO import StringIO
40
else:
41
    from StringIO import StringIO
42
43
# get basestring in py3
44
45
try:
46
    unicode = unicode
47
except NameError:
48
    # 'unicode' is undefined, must be Python 3
49
    str = str
50
    unicode = str
51
    bytes = bytes
52
    basestring = (str,bytes)
53
else:
54
    # 'unicode' exists, must be Python 2
55
    str = str
56
    unicode = unicode
57
    bytes = str
58
    basestring = basestring
59
60
# Required third-party packages available on PyPi:
61
62
from namedlist import namedlist
63
import numpy
64
65
# Optional third-party packages available on PyPI are mostly
66
# imported inline below.
67
68
69
logger = logging.getLogger(__name__)
70
__version__ = '0.10'
71
72
73
class LASDataError(Exception):
74
75
    '''Error during reading of numerical data from LAS file.'''
76
    pass
77
78
79
class LASHeaderError(Exception):
80
81
    '''Error during reading of header data from LAS file.'''
82
    pass
83
84
85
class LASUnknownUnitError(Exception):
86
87
    '''Error of unknown unit in LAS file.'''
88
    pass
89
90
91
class HeaderItem(OrderedDict):
92
    def __init__(self, mnemonic, unit="", value="", descr=""):
93
        super(HeaderItem, self).__init__()
94
95
        # The original mnemonic needs to be stored for rewriting a new file.
96
        # it might be nothing - '' - or a duplicate e.g. two 'RHO' curves,
97
        # or unique - 'X11124' - or perhaps invalid??
98
99
        self.original_mnemonic = mnemonic
100
101
        # We also need to store a more useful mnemonic, which will be used
102
        # (technically not, but read on) for people to access the curve while
103
        # the LASFile object exists. For example, a curve which is unnamed
104
        # and has the mnemonic '' will be accessed via 'UNKNOWN'.
105
106
        if mnemonic.strip() == '':
107
            self.useful_mnemonic = 'UNKNOWN'
108
        else:
109
            self.useful_mnemonic = mnemonic
110
111
        # But note that we need to (later) check (repeatedly) for duplicate
112
        # mnemonics. Any duplicates will have ':1', ':2', ':3', etc., appended
113
        # to them. The result of this will be stored in the below variable,
114
        # which is what the user should actually see and use 99.5% of the time.
115
116
        self.mnemonic = self.useful_mnemonic
117
118
        self.unit = unit
119
        self.value = value
120
        self.descr = descr
121
122
    def __getitem__(self, key):
123
        if key == 'mnemonic':
124
            return self.mnemonic
125
        elif key == 'original_mnemonic':
126
            return self.original_mnemonic
127
        elif key == 'useful_mnemonic':
128
            return self.useful_mnemonic
129
        elif key == 'unit':
130
            return self.unit
131
        elif key == 'value':
132
            return self.value
133
        elif key == 'descr':
134
            return self.descr
135
        else:
136
            raise KeyError('CurveItem only has restricted items (not %s)' % key)
137
138
    def __repr__(self):
139
        return (
140
            "%s(mnemonic=%s, unit=%s, value=%s, "
141
            "descr=%s, original_mnemonic=%s)" % (
142
                self.__class__.__name__, self.mnemonic, self.unit, self.value, 
143
                self.descr, self.original_mnemonic))
144
145
    def _repr_pretty_(self, p, cycle):
146
        return p.text(self.__repr__())
147
148
149
class CurveItem(HeaderItem):
150
    def __init__(self, *args, **kwargs):
151
        self.data = numpy.ndarray([])
152
        super(CurveItem, self).__init__(*args, **kwargs)
153
154
    @property
155
    def API_code(self):
156
        return self.value
157
    
158
    def __repr__(self):
159
        return (
160
            "%s(mnemonic=%s, unit=%s, value=%s, "
161
            "descr=%s, original_mnemonic=%s, data.shape=%s)" % (
162
                self.__class__.__name__, self.mnemonic, self.unit, self.value, 
163
                self.descr, self.original_mnemonic, self.data.shape))
164
165
166
class SectionItems(list):
167
168
    def __contains__(self, testitem):
169
        '''Allows testing of a mnemonic or an actual item.'''
170
        for item in self:
171
            if testitem == item.mnemonic:
172
                return True 
173
            elif hasattr(testitem, 'mnemonic'):
174
                if testitem.mnemonic == item.mnemonic:
175
                    return True
176
            elif testitem is item:
177
                return True
178
        else:
179
            return False
180
181
    def keys(self):
182
        return [item.mnemonic for item in self]
183
184
    def values(self):
185
        return self
186
187
    def items(self):
188
        return [(item.mnemonic, item) for item in self]
189
190
    def iterkeys(self):
191
        return iter(self.keys())
192
193
    def itervalues(self):
194
        return iter(self)
195
196
    def iteritems(self):
197
        return iter(self.items())
198
199
    def __getitem__(self, key):
200
        for item in self:
201
            if item.mnemonic == key:
202
                return item
203
        if isinstance(key, int):
204
            return super(SectionItems, self).__getitem__(key)
205
        else:
206
            raise KeyError("%s not in %s" % (key, self.keys()))
207
208
    def __setitem__(self, key, newitem):
209
        if isinstance(newitem, HeaderItem):
210
            self.set_item(key, newitem)
211
        else:
212
            self.set_item_value(key, newitem)
213
214
    def __getattr__(self, key):
215
        if key in self:
216
            return self[key]
217
        else:
218
            super(SectionItems, self).__getattr__(key)
219
220
    def __setattr__(self, key, value):
221
        if key in self:
222
            self[key] = value
223
        else:
224
            super(SectionItems, self).__setattr__(key, value)
225
226
    def set_item(self, key, newitem):
227
        for i, item in enumerate(self):
228
            if key == item.mnemonic:
229
230
                # This is very important. We replace items where
231
                # 'mnemonic' is equal - i.e. we do not check useful_mnemonic
232
                # or original_mnemonic. Is this correct? Needs to thought
233
                # about and tested more carefully.
234
235
                logger.debug('SectionItems.__setitem__ Replaced %s item' % key)
236
                return super(SectionItems, self).__setitem__(i, newitem)  
237
        else:
238
            self.append(newitem)
239
240
    def set_item_value(self, key, value):
241
        self[key].value = value
242
243
    def append(self, newitem):
244
        '''Check to see if the item's mnemonic needs altering.'''
245
        logger.debug("SectionItems.append type=%s str=%s" % (type(newitem), newitem))
246
        super(SectionItems, self).append(newitem)
247
248
        # Check to fix the :n suffixes
249
        existing = [item.useful_mnemonic for item in self]
250
        locations = []
251
        for i, item in enumerate(self):
252
            if item.useful_mnemonic == newitem.mnemonic:
253
                locations.append(i)
254
        if len(locations) > 1:
255
            current_count = 1
256
            for i, loc in enumerate(locations):
257
                item = self[loc]
258
                # raise Exception("%s" % str(type(item)))
259
                item.mnemonic = item.useful_mnemonic + ":%d" % (i + 1)
260
261
    def dictview(self):
262
        return dict(zip(self.keys(), [i.value for i in self.values()]))
263
264
    # def __repr__(self):
265
    #     return (
266
    #         "{cls}({contents})".format(
267
    #             cls=self.__class__.__name__,
268
    #             contents=', '.join([str(item) for item in self])))
269
270
271
class JSONEncoder(json.JSONEncoder):
272
273
    def default(self, obj):
274
        if isinstance(obj, LASFile):
275
            d = {'metadata': {},
276
                 'data': {}}
277
            for name, section in obj.sections.items():
278
                if isinstance(section, basestring):
279
                    d['metadata'][name] = section
280
                else:
281
                    d['metadata'][name] = []
282
                    for item in section:
283
                        d['metadata'][name].append(dict(item))
284
            for curve in obj.curves:
285
                d['data'][curve.mnemonic] = list(curve.data)
286
            return d
287
288
289
290
DEFAULT_ITEMS = {
291
    "Version": SectionItems([
292
        HeaderItem("VERS", "", 2.0, "CWLS log ASCII Standard -VERSION 2.0"),
293
        HeaderItem("WRAP", "", "NO", "One line per depth step"),
294
        HeaderItem("DLM", "", "SPACE", "Column Data Section Delimiter"),
295
        ]),
296
    "Well": SectionItems([
297
        HeaderItem("STRT", "m", numpy.nan, "START DEPTH"),
298
        HeaderItem("STOP", "m", numpy.nan, "STOP DEPTH"),
299
        HeaderItem("STEP", "m", numpy.nan, "STEP"),
300
        HeaderItem("NULL", "", -9999.25, "NULL VALUE"),
301
        HeaderItem("COMP", "", "", "COMPANY"),
302
        HeaderItem("WELL", "", "", "WELL"),
303
        HeaderItem("FLD", "", "", "FIELD"),
304
        HeaderItem("LOC", "", "", "LOCATION"),
305
        HeaderItem("PROV", "", "", "PROVINCE"),
306
        HeaderItem("CNTY", "", "", "COUNTY"),
307
        HeaderItem("STAT", "", "", "STATE"),
308
        HeaderItem("CTRY", "", "", "COUNTRY"),
309
        HeaderItem("SRVC", "", "", "SERVICE COMPANY"),
310
        HeaderItem("DATE", "", "", "DATE"),
311
        HeaderItem("UWI", "", "", "UNIQUE WELL ID"),
312
        HeaderItem("API", "", "", "API NUMBER")
313
        ]),
314
    "Curves": SectionItems([]),
315
    "Parameter": SectionItems([]),
316
    "Other": "",
317
    "Data": numpy.zeros(shape=(0, 1)),
318
    }
319
320
321
ORDER_DEFINITIONS = {
322
    1.2: OrderedDict([
323
        ("Version", ["value:descr"]),
324
        ("Well", [
325
            "descr:value",
326
            ("value:descr", ["STRT", "STOP", "STEP", "NULL"])]),
327
        ("Curves", ["value:descr"]),
328
        ("Parameter", ["value:descr"]),
329
        ]),
330
    2.0: OrderedDict([
331
        ("Version", ["value:descr"]),
332
        ("Well", ["value:descr"]),
333
        ("Curves", ["value:descr"]),
334
        ("Parameter", ["value:descr"])
335
        ])}
336
337
338
URL_REGEXP = re.compile(
339
    r'^(?:http|ftp)s?://'  # http:// or https://
340
    r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}'
341
    r'\.?|[A-Z0-9-]{2,}\.?)|'  # (cont.) domain...
342
    r'localhost|'  # localhost...
343
    r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'  # ...or ip
344
    r'(?::\d+)?'  # optional port
345
    r'(?:/?|[/?]\S+)$', re.IGNORECASE)
346
347
348
COMMON_NULLS = [
349
    999.25, -999.25, 9999.25, -9999.25, 0, -999, 999, 9999, -9999,
350
    2147483647, -2147483647,
351
    32767, -32767
352
    ]
353
354
AGGRESSIVE_NULLS = [
355
    0,
356
    ]
357
358
359
360
class LASFile(object):
361
362
    '''LAS file object.
363
364
    Keyword Arguments:
365
        file_ref: either a filename, an open file object, or a string of
366
            a LAS file contents.
367
        encoding (str): character encoding to open file_ref with
368
        encoding_errors (str): "strict", "replace" (default), "ignore" - how to
369
            handle errors with encodings (see standard library codecs module or
370
            Python Unicode HOWTO for more information)
371
        autodetect_encoding (bool): use chardet/ccharet to detect encoding
372
        autodetect_encoding_chars (int/None): number of chars to read from LAS
373
            file for auto-detection of encoding.
374
375
    '''
376
    def __init__(self, file_ref=None, **kwargs):
377
378
        self._text = ''
379
        self._use_pandas = "auto"
380
        self.index_unit = None
381
        self.sections = {
382
            "Version": DEFAULT_ITEMS["Version"],
383
            "Well": DEFAULT_ITEMS["Well"],
384
            "Curves": DEFAULT_ITEMS["Curves"],
385
            "Parameter": DEFAULT_ITEMS["Parameter"],
386
            "Other": str(DEFAULT_ITEMS["Other"]),
387
            }
388
389
        if not (file_ref is None):
390
            self.read(file_ref, **kwargs)
391
392
    def read(self, file_ref, use_pandas="auto", null_policy='common', **kwargs):
393
        '''Read a LAS file.
394
395
        Arguments:
396
            file_ref: either a filename, an open file object, or a string of
397
                a LAS file contents.
398
399
        Keyword Arguments:
400
            use_pandas (str): bool or "auto" -- use pandas if available -- provide
401
                False option for faster loading where pandas functionality is not
402
                needed. "auto" becomes True if pandas is installed, and False if not.
403
            null_policy (str): either None, 'NULL', 'common' or 'aggressive' --
404
                see https://github.com/kinverarity1/lasio/issues/49#issuecomment-127980359
405
            encoding (str): character encoding to open file_ref with
406
            encoding_errors (str): "strict", "replace" (default), "ignore" - how to
407
                handle errors with encodings (see standard library codecs module or
408
                Python Unicode HOWTO for more information)
409
            autodetect_encoding (bool): use chardet/cchardet to detect encoding
410
            autodetect_encoding_chars (int/None): number of chars to read from LAS
411
                file for auto-detection of encoding.
412
413
        '''
414
        if not use_pandas is None:
415
            self._use_pandas = use_pandas
416
417
        f = open_file(file_ref, **kwargs)
418
419
        self._text = f.read()
420
        logger.debug("LASFile.read LAS content is type %s" % type(self._text))
421
422
        reader = Reader(self._text, version=1.2)
423
        self.sections["Version"] = reader.read_section('~V')
424
425
        # Set version
426
        try:
427
            # raise Exception("%s %s" % (type(self.version['VERS']), self.version["VERS"]))
428
            reader.version = self.version['VERS'].value
429
        except KeyError:
430
            raise KeyError("No key VERS in ~V section")
431
432
        # Validate version
433
        try:
434
            assert reader.version in (1.2, 2)
435
        except AssertionError:
436
            logger.warning("LAS spec version is %s -- neither 1.2 nor 2" %
437
                           reader.version)
438
            if reader.version < 2:
439
                reader.version = 1.2
440
            else:
441
                reader.version = 2
442
        reader.wrap = self.version['WRAP'].value == 'YES'
443
444
        self.sections["Well"] = reader.read_section('~W')
445
        self.sections["Curves"] = reader.read_section('~C')
446
        try:
447
            self.sections["Parameter"] = reader.read_section('~P')
448
        except LASHeaderError:
449
            logger.warning(traceback.format_exc().splitlines()[-1])
450
        self.sections["Other"] = reader.read_raw_text('~O')
451
452
        # Set null value
453
        reader.null = self.well['NULL'].value
454
455
        data = reader.read_data(len(self.curves), null_policy=null_policy)
456
457
        for i, c in enumerate(self.curves):
458
            d = data[:, i]
459
            c.data = d
460
461
        if (self.well["STRT"].unit.upper() == "M" and
462
                self.well["STOP"].unit.upper() == "M" and
463
                self.well["STEP"].unit.upper() == "M" and
464
                self.curves[0].unit.upper() == "M"):
465
            self.index_unit = "M"
466
        elif (self.well["STRT"].unit.upper() in ("F", "FT") and
467
              self.well["STOP"].unit.upper() in ("F", "FT") and
468
              self.well["STEP"].unit.upper() in ("F", "FT") and
469
              self.curves[0].unit.upper() in ("F", "FT")):
470
            self.index_unit = "FT"
471
472
        self.refresh()
473
474
    def refresh(self, use_pandas=None):
475
        '''Refresh curve names and indices.'''
476
        if not use_pandas is None:
477
            self._use_pandas = use_pandas
478
479
        # n = len(self.curves)
480
        # for i, curve in enumerate(self.curves):
481
        #     self[curve.mnemonic] = curve.data
482
        #     self[i] = curve.data
483
        #     self[i - n] = curve.data
484
485
        if not self._use_pandas is False:
486
            try:
487
                import pandas
488
            except ImportError:
489
                logger.info(
490
                    "pandas not installed - skipping LASFile.df creation")
491
                self._use_pandas = False
492
493
        if self._use_pandas:
494
            self.df = pandas.DataFrame(self.data, columns=self.keys())
495
            self.df.set_index(self.curves[0].mnemonic, inplace=True)
496
497
    @property
498
    def data(self):
499
        '''2D array of data from LAS file.'''
500
        return numpy.vstack([c.data for c in self.curves]).T
501
502
    def write(self, file_object, version=None, wrap=None,
503
              STRT=None, STOP=None, STEP=None, fmt="%10.5g"):
504
        '''Write to a file.
505
506
        Arguments:
507
            file_object: a file_like object opening for writing.
508
            version (float): either 1.2 or 2
509
            wrap (bool): True, False, or None (last uses WRAP item in version)
510
            STRT (float): optional override to automatic calculation using 
511
                the first index curve value.
512
            STOP (float): optional override to automatic calculation using 
513
                the last index curve value.
514
            STEP (float): optional override to automatic calculation using 
515
                the first step size in the index curve.
516
            fmt (str): format string for numerical data being written to data
517
                section.
518
519
        Examples:
520
521
            >>> with open("test_output.las", mode="w") as f:
522
            ...     lasfile_obj.write(f, 2.0)   # <-- this method
523
524
        '''
525
        if wrap is None:
526
            wrap = self.version["WRAP"] == "YES"
527
        elif wrap is True:
528
            self.version["WRAP"] = HeaderItem(
529
                "WRAP", "", "YES", "Multiple lines per depth step")
530
        elif wrap is False:
531
            self.version["WRAP"] = HeaderItem(
532
                "WRAP", "", "NO", "One line per depth step")
533
        lines = []
534
535
        assert version in (1.2, 2, None)
536
        if version is None:
537
            version = self.version["VERS"].value
538
        if version == 1.2:
539
            self.version["VERS"] = HeaderItem(
540
                "VERS", "", 1.2, "CWLS LOG ASCII STANDARD - VERSION 1.2")
541
        elif version == 2:
542
            self.version["VERS"] = HeaderItem(
543
                "VERS", "", 2.0, "CWLS log ASCII Standard -VERSION 2.0")
544
545
        if STRT is None:
546
            STRT = self.index[0]
547
        if STOP is None:
548
            STOP = self.index[-1]
549
        if STEP is None:
550
            STEP = self.index[1] - self.index[0]  # Faster than numpy.gradient
551
        self.well["STRT"].value = STRT
552
        self.well["STOP"].value = STOP
553
        self.well["STEP"].value = STEP
554
555
556
        # Check for any changes in the pandas dataframe and if there are,
557
        # create new curves so they are reflected in the output LAS file.
558
559
        if self._use_pandas:
560
            curve_names = lambda: [ci.mnemonic for ci in self.curves]
561
            for df_curve_name in list(self.df.columns.values):
562
                if not df_curve_name in curve_names():
563
                    self.add_curve(df_curve_name, self.df[df_curve_name])
564
        
565
        # Write each section.
566
567
        # ~Version
568
        logger.debug('LASFile.write Version section')
569
        lines.append("~Version ".ljust(60, "-"))
570
        order_func = get_section_order_function("Version", version)
571
        section_widths = get_section_widths("Version", self.version, version, order_func)
572
        for header_item in self.version.values():
573
            mnemonic = header_item.original_mnemonic
574
            # logger.debug("LASFile.write " + str(header_item))
575
            order = order_func(mnemonic)
576
            # logger.debug("LASFile.write order = %s" % (order, ))
577
            logger.debug('LASFile.write %s\norder=%s section_widths=%s' % (header_item, order, section_widths))
578
            formatter_func = get_formatter_function(order, **section_widths)
579
            line = formatter_func(header_item)
580
            lines.append(line)
581
582
        # ~Well
583
        logger.debug('LASFile.write Well section')
584
        lines.append("~Well ".ljust(60, "-"))
585
        order_func = get_section_order_function("Well", version)
586
        section_widths = get_section_widths("Well", self.well, version, order_func)
587
        # logger.debug('LASFile.write well section_widths=%s' % section_widths)
588
        for header_item in self.well.values():
589
            mnemonic = header_item.original_mnemonic
590
            order = order_func(mnemonic)
591
            logger.debug('LASFile.write %s\norder=%s section_widths=%s' % (header_item, order, section_widths))
592
            formatter_func = get_formatter_function(order, **section_widths)
593
            line = formatter_func(header_item)
594
            lines.append(line)
595
596
        # ~Curves
597
        logger.debug('LASFile.write Curves section')
598
        lines.append("~Curves ".ljust(60, "-"))
599
        order_func = get_section_order_function("Curves", version)
600
        section_widths = get_section_widths("Curves", self.curves, version, order_func)
601
        for header_item in self.curves:
602
            mnemonic = header_item.original_mnemonic
603
            order = order_func(mnemonic)
604
            formatter_func = get_formatter_function(order, **section_widths)
605
            line = formatter_func(header_item)
606
            lines.append(line)
607
608
        # ~Params
609
        lines.append("~Params ".ljust(60, "-"))
610
        order_func = get_section_order_function("Parameter", version)
611
        section_widths = get_section_widths("Parameter", self.params, version, order_func)
612
        for header_item in self.params.values():
613
            mnemonic = header_item.original_mnemonic
614
            order = order_func(mnemonic)
615
            formatter_func = get_formatter_function(order, **section_widths)
616
            line = formatter_func(header_item)
617
            lines.append(line)
618
619
        # ~Other
620
        lines.append("~Other ".ljust(60, "-"))
621
        lines += self.other.splitlines()
622
623
        lines.append("~ASCII ".ljust(60, "-"))
624
625
        file_object.write("\n".join(lines))
626
        file_object.write("\n")
627
628
        data_arr = numpy.column_stack([c.data for c in self.curves])
629
        nrows, ncols = data_arr.shape
630
631
        def format_data_section_line(n, fmt, l=10, spacer=" "):
632
            if numpy.isnan(n):
633
                return spacer + str(self.well["NULL"].value).rjust(l)
634
            else:
635
                return spacer + (fmt % n).rjust(l)
636
637
        twrapper = textwrap.TextWrapper(width=79)
638
        for i in range(nrows):
639
            depth_slice = ''
640
            for j in range(ncols):
641
                depth_slice += format_data_section_line(data_arr[i, j], fmt)
642
643
            if wrap:
644
                lines = twrapper.wrap(depth_slice)
645
                logger.debug("LASFile.write Wrapped %d lines out of %s" %
646
                             (len(lines), depth_slice))
647
            else:
648
                lines = [depth_slice]
649
650
            if self.version["VERS"].value == 1.2:
651
                for line in lines:
652
                    if len(line) > 255:
653
                        logger.warning("LASFile.write Data line > 256 chars: %s" % line)
654
655
            for line in lines:
656
                file_object.write(line + "\n")
657
658
    def get_curve(self, mnemonic):
659
        '''Return Curve object.
660
661
        Arguments:
662
            mnemonic (str): the name of the curve
663
664
        Returns: 
665
            A Curve object, not just the data array.
666
667
        '''
668
        for curve in self.curves:
669
            if curve.mnemonic == mnemonic:
670
                return curve
671
672
    # def __getattr__(self, key):
673
    #     # if hasattr(self, 'sections'):
674
    #     #     if key in self.sections['Curves']:
675
    #     #         return self[key]
676
    #     # else:
677
    #     #     raise AttributeError
678
    #     pass
679
680
    def __getitem__(self, key):
681
        if isinstance(key, int):
682
            return self.curves[key].data
683
        elif isinstance(key, str):
684
            if key in self.keys():
685
                return self.curves[key].data
686
        else:
687
            super(LASFile, self).__getitem__(key)
688
689
    # def __setattr__(self, key, value):
690
    #     assert NotImplementedError('not yet')
691
692
    def __setitem__(self, key, value):
693
        assert NotImplementedError('not yet')
694
695
    def keys(self):
696
        return [c.mnemonic for c in self.curves]
697
698
    def values(self):
699
        return [c.data for c in self.curves]
700
701
    def items(self):
702
        return [(c.mnemonic, c.data) for c in self.curves]
703
704
    def iterkeys(self):
705
        return iter(list(self.keys()))
706
707
    def itervalues(self):
708
        return iter(list(self.values()))
709
710
    def iteritems(self):
711
        return iter(list(self.items()))
712
713
    @property
714
    def version(self):
715
        return self.sections["Version"]
716
    
717
    @version.setter
718
    def version(self, section):
719
        self.sections["Version"] = section
720
721
    @property
722
    def well(self):
723
        return self.sections["Well"]
724
    
725
    @well.setter
726
    def well(self, section):
727
        self.sections["Well"] = section
728
729
    @property
730
    def curves(self):
731
        return self.sections["Curves"]
732
    
733
    @curves.setter
734
    def curves(self, section):
735
        self.sections["Curves"] = section
736
737
    @property
738
    def params(self):
739
        return self.sections["Parameter"]
740
    
741
    @params.setter
742
    def params(self, section):
743
        self.sections["Parameter"] = section
744
745
    @property
746
    def other(self):
747
        return self.sections["Other"]
748
    
749
    @other.setter
750
    def other(self, section):
751
        self.sections["Other"] = section
752
    
753
754
    @property
755
    def metadata(self):
756
        s = SectionItems()
757
        for section in self.sections:
758
            for item in section:
759
                s.append(item)
760
        return s
761
762
    @metadata.setter
763
    def metadata(self, value):
764
        raise Warning('Set values in the version/well/params attrs directly')
765
766
    @property
767
    def df(self):
768
        if self._use_pandas:
769
            return self._df
770
        else:
771
            logger.warning(
772
                "pandas is not installed or use_pandas was set to False")
773
            # raise Warning("pandas is not installed or use_pandas was set to False")
774
775
    @df.setter
776
    def df(self, value):
777
        self._df = value
778
779
    @property
780
    def index(self):
781
        return self.data[:, 0]
782
783
    @property
784
    def depth_m(self):
785
        if self.index_unit == "M":
786
            return self.index
787
        elif self.index_unit == "FT":
788
            return self.index * 0.3048
789
        else:
790
            raise LASUnknownUnitError("Unit of depth index not known")
791
792
    @property
793
    def depth_ft(self):
794
        if self.index_unit == "M":
795
            return self.index / 0.3048
796
        elif self.index_unit == "FT":
797
            return self.index
798
        else:
799
            raise LASUnknownUnitError("Unit of depth index not known")
800
801
    def add_curve(self, mnemonic, data, unit="", descr="", value=""):
802
        # assert not mnemonic in self.curves
803
        curve = CurveItem(mnemonic, unit, value, descr)
804
        curve.data = data
805
        self.curves[mnemonic] = curve
806
        self.refresh()
807
808
    @property
809
    def header(self):
810
        return self.sections
811
812
813
class Las(LASFile):
814
815
    '''LAS file object.
816
817
    Retained for backwards compatibility.
818
819
    '''
820
    pass
821
822
823
class Reader(object):
824
825
    def __init__(self, text, version):
826
        self.lines = text.splitlines()
827
        self.version = version
828
        self.null = numpy.nan
829
        self.wrap = True
830
831
    @property
832
    def section_names(self):
833
        names = []
834
        for line in self.lines:
835
            line = line.strip().strip('\t').strip()
836
            if not line or line.startswith('#'):
837
                continue
838
            if line.startswith('~'):
839
                names.append(line)
840
        return names
841
842
    def iter_section_lines(self, section_name, ignore_comments=True):
843
        in_section = False
844
        for i, line in enumerate(self.lines):
845
            line = line.strip().strip('\t').strip()
846
            if not line:
847
                continue
848
            if ignore_comments and line.startswith('#'):
849
                continue
850
            if line.startswith(section_name):
851
                if in_section:
852
                    return
853
                else:
854
                    in_section = True
855
                    continue
856
            if line.lower().startswith('~') and in_section:
857
                # Start of the next section; we're done here.
858
                break
859
            if in_section:
860
                yield line
861
862
    def read_raw_text(self, section_name):
863
        return '\n'.join(self.iter_section_lines(section_name,
864
                                                 ignore_comments=False))
865
866
    def read_section(self, section_name):
867
        parser = SectionParser(section_name, version=self.version)
868
        section = SectionItems()
869
        for line in self.iter_section_lines(section_name):
870
            try:
871
                values = read_line(line)
872
            except:
873
                raise LASHeaderError("Failed in %s section on line:\n%s%s" % (
874
                    section_name, line,
875
                    traceback.format_exc().splitlines()[-1]))
876
            else:
877
                section.append(parser(**values))
878
        return section
879
880
    def read_data(self, number_of_curves=None, null_policy='common'):
881
        s = self.read_data_string()
882
        if not self.wrap:
883
            try:
884
                arr = numpy.loadtxt(StringIO(s))
885
            except:
886
                raise LASDataError("Failed to read data:\n%s" % (
887
                                   traceback.format_exc().splitlines()[-1]))
888
        else:
889
            eol_chars = r"[\n\t\r]"
890
            s = re.sub(eol_chars, " ", s)
891
            try:
892
                arr = numpy.loadtxt(StringIO(s))
893
            except:
894
                raise LASDataError("Failed to read wrapped data: %s" % (
895
                                   traceback.format_exc().splitlines()[-1]))
896
            logger.debug('Reader.read_data arr shape = %s' % (arr.shape))
897
            logger.debug('Reader.read_data number of curves = %s' % number_of_curves)
898
            arr = numpy.reshape(arr, (-1, number_of_curves))
899
        if not arr.shape or (arr.ndim == 1 and arr.shape[0] == 0):
900
            logger.warning('Reader.read_dataN o data present.')
901
            return None, None
902
        else:
903
            logger.info('LAS file shape = %s' % str(arr.shape))
904
        logger.debug('checking for nulls (NULL = %s)' % self.null)
905
        if null_policy in ['NULL', 'common', 'aggressive']:
906
            arr[arr == self.null] = numpy.nan
907
        if null_policy in ['common', 'aggressive']:
908
            for value in COMMON_NULLS:
909
                arr[arr == value] = numpy.nan
910
        if null_policy in ['aggressive']:
911
            for value in AGGRESSIVE_NULLS:
912
                arr[arr == value] = numpy.nan
913
        return arr
914
915
    def read_data_string(self):
916
        start_data = None
917
        for i, line in enumerate(self.lines):
918
            line = line.strip().strip('\t').strip()
919
            if line.startswith('~A'):
920
                start_data = i + 1
921
                break
922
        s = '\n'.join(self.lines[start_data:])
923
        s = re.sub(r'(\d)-(\d)', r'\1 -\2', s)
924
        s = re.sub('-?\d*\.\d*\.\d*', ' NaN NaN ', s)
925
        s = re.sub('NaN.\d*', ' NaN NaN ', s)
926
        return s
927
928
929
class SectionParser(object):
930
931
    def __init__(self, section_name, version=1.2):
932
        if section_name.startswith('~C'):
933
            self.func = self.curves
934
        elif section_name.startswith('~P'):
935
            self.func = self.params
936
        else:
937
            self.func = self.metadata
938
939
        self.version = version
940
        self.section_name = section_name
941
        self.section_name2 = {"~C": "Curves",
942
                              "~W": "Well",
943
                              "~V": "Version",
944
                              "~P": "Parameter"}[section_name]
945
946
        section_orders = ORDER_DEFINITIONS[self.version][self.section_name2]
947
        self.default_order = section_orders[0]
948
        self.orders = {}
949
        for order, mnemonics in section_orders[1:]:
950
            for mnemonic in mnemonics:
951
                self.orders[mnemonic] = order
952
953
    def __call__(self, **keys):
954
        item = self.func(**keys)
955
        # if item.name == "":
956
        #     item.mnemonic = "UNKNOWN"
957
        return item
958
959
    def num(self, x, default=None):
960
        if default is None:
961
            default = x
962
        try:
963
            return numpy.int(x)
964
        except:
965
            try:
966
                return numpy.float(x)
967
            except:
968
                return default
969
970
    def metadata(self, **keys):
971
        key_order = self.orders.get(keys["name"], self.default_order)
972
        if key_order == "value:descr":
973
            return HeaderItem(
974
                keys["name"],                 # mnemonic
975
                keys["unit"],                 # unit
976
                self.num(keys["value"]),      # value
977
                keys["descr"],                # descr
978
                )
979
        elif key_order == "descr:value":
980
            return HeaderItem(
981
                keys["name"],                   # mnemonic
982
                keys["unit"],                   # unit
983
                keys["descr"],                  # descr
984
                self.num(keys["value"]),        # value
985
                )
986
987
    def curves(self, **keys):
988
        # logger.debug(str(keys))
989
        item = CurveItem(
990
            keys['name'],               # mnemonic
991
            keys['unit'],               # unit
992
            keys['value'],              # value
993
            keys['descr'],              # descr
994
            )
995
        return item
996
997
    def params(self, **keys):
998
        return HeaderItem(
999
            keys['name'],               # mnemonic
1000
            keys['unit'],               # unit
1001
            self.num(keys['value']),    # value
1002
            keys['descr'],              # descr
1003
            )
1004
1005
1006
def read_line(line, pattern=None):
1007
    '''Read a line from a LAS header section.
1008
1009
    The line is parsed with a regular expression -- see LAS file specs for
1010
    more details, but it should basically be in the format::
1011
1012
        name.unit       value : descr
1013
1014
    Arguments:
1015
        line (str): line from a LAS header section
1016
1017
    Returns:
1018
        A dictionary with keys "name", "unit", "value", and "descr", each
1019
        containing a string as value.
1020
1021
    '''
1022
    d = {}
1023
    if pattern is None:
1024
        pattern = (r"\.?(?P<name>[^.]*)\." +
1025
                   r"(?P<unit>[^\s:]*)" +
1026
                   r"(?P<value>[^:]*):" +
1027
                   r"(?P<descr>.*)")
1028
    m = re.match(pattern, line)
1029
    mdict = m.groupdict()
1030
    # if mdict["name"] == "":
1031
    #     mdict["name"] = "UNKNOWN"
1032
    for key, value in mdict.items():
1033
        d[key] = value.strip()
1034
        if key == "unit":
1035
            if d[key].endswith("."):
1036
                d[key] = d[key].strip(".")  # see issue #36
1037
    return d
1038
1039
1040
def open_file(file_ref, encoding=None, encoding_errors="replace",
1041
              autodetect_encoding=False, autodetect_encoding_chars=40e3):
1042
    '''Open a file if necessary.
1043
1044
    If autodetect_encoding is True then either cchardet or chardet (see PyPi)
1045
    needs to be installed, or else an ImportError will be raised.
1046
1047
    Arguments:
1048
        file_ref: either a filename, an open file object, a URL, or a string of
1049
            a LAS file contents.
1050
1051
    Keyword Arguments:
1052
        encoding (str): character encoding to open file_ref with
1053
        encoding_errors (str): "strict", "replace" (default), "ignore" - how to
1054
            handle errors with encodings (see standard library codecs module or
1055
            Python Unicode HOWTO for more information)
1056
        autodetect_encoding (bool): use chardet/ccharet to detect encoding
1057
        autodetect_encoding_chars (int/None): number of chars to read from LAS
1058
            file for auto-detection of encoding.
1059
1060
    Returns: 
1061
        An open file-like object ready for reading from.
1062
1063
    '''
1064
    if isinstance(file_ref, str):
1065
        lines = file_ref.splitlines()
1066
        if len(lines) == 1:  # File name
1067
            if URL_REGEXP.match(file_ref):
1068
                try:
1069
                    import urllib2
1070
                    file_ref = urllib2.urlopen(file_ref)
1071
                except ImportError:
1072
                    import urllib.request
1073
                    response = urllib.request.urlopen(file_ref)
1074
                    enc = response.headers.get_content_charset("utf-8")
1075
                    file_ref = StringIO(response.read().decode(enc))
1076
            else:  # filename
1077
                data = get_unicode_from_filename(
1078
                    file_ref, encoding, encoding_errors, autodetect_encoding,
1079
                    autodetect_encoding_chars)
1080
                file_ref = StringIO(data)
1081
        else:
1082
            file_ref = StringIO("\n".join(lines))
1083
    return file_ref
1084
1085
1086
def get_unicode_from_filename(fn, enc, errors, auto, nbytes):
1087
    '''
1088
    Read Unicode data from file.
1089
1090
    Arguments:
1091
        fn (str): path to file
1092
        enc (str): encoding - can be None
1093
        errors (str): unicode error handling - can be "strict", "ignore", "replace"
1094
        auto (str): auto-detection of character encoding - can be either
1095
            "chardet", "cchardet", or True
1096
        nbytes (int): number of characters for read for auto-detection
1097
1098
    Returns:
1099
        a unicode or string object
1100
1101
    '''
1102
    if nbytes:
1103
        nbytes = int(nbytes)
1104
1105
    # Detect BOM in UTF-8 files
1106
1107
    nbytes_test = min(32, os.path.getsize(fn))
1108
    with open(fn, mode="rb") as test:
1109
        raw = test.read(nbytes_test)
1110
    if raw.startswith(codecs.BOM_UTF8):
1111
        enc = "utf-8-sig"
1112
        auto = False
1113
1114
    if auto:
1115
        with open(fn, mode="rb") as test:
1116
            if nbytes is None:
1117
                raw = test.read()
1118
            else:
1119
                raw = test.read(nbytes)
1120
        enc = get_encoding(auto, raw)
1121
1122
    # codecs.open is smarter than cchardet or chardet IME.
1123
1124
    with codecs.open(fn, mode="r", encoding=enc, errors=errors) as f:
1125
        data = f.read()
1126
1127
    return data
1128
1129
1130
def get_encoding(auto, raw):
1131
    '''
1132
    Automatically detect character encoding.
1133
1134
    Arguments:
1135
        auto (str): auto-detection of character encoding - can be either
1136
            "chardet", "cchardet", or True
1137
        raw (bytes): array of bytes to detect from
1138
1139
    Returns:
1140
        A string specifying the character encoding.
1141
1142
    '''
1143
    if auto is True:
1144
        try:
1145
            import cchardet as chardet
1146
        except ImportError:
1147
            try:
1148
                import chardet
1149
            except ImportError:
1150
                raise ImportError(
1151
                    "chardet or cchardet is required for automatic"
1152
                    " detection of character encodings.")
1153
            else:
1154
                logger.debug("get_encoding Using chardet")
1155
                method = "chardet"
1156
        else:
1157
            logger.debug("get_encoding Using cchardet")
1158
            method = "cchardet"
1159
    elif auto.lower() == "chardet":
1160
        import chardet
1161
        logger.debug("get_encoding Using chardet")
1162
        method = "chardet"
1163
    elif auto.lower() == "cchardet":
1164
        import cchardet as chardet
1165
        logger.debug("get_encoding Using cchardet")
1166
        method = "cchardet"
1167
1168
    result = chardet.detect(raw)
1169
    logger.debug("get_encoding %s results=%s" % (method, result))
1170
    return result["encoding"]
1171
1172
1173
def get_formatter_function(order, left_width=None, middle_width=None):
1174
    '''Create function to format a LAS header item.
1175
1176
    Arguments:
1177
        order: format of item, either "descr:value" or "value:descr" -- see
1178
            LAS 1.2 and 2.0 specifications for more information.
1179
1180
    Keyword Arguments:
1181
        left_width (int): number of characters to the left hand side of the
1182
            first period
1183
        middle_width (int): total number of characters minus 1 between the
1184
            first period from the left and the first colon from the left.
1185
1186
    Returns:
1187
        A function which takes a header item (e.g. LASHeaderItem or Curve)
1188
        as its single argument and which in turn returns a string which is
1189
        the correctly formatted LAS header line.
1190
1191
    '''
1192
    if left_width is None:
1193
        left_width = 10
1194
    if middle_width is None:
1195
        middle_width = 40
1196
    mnemonic_func = lambda mnemonic: mnemonic.ljust(left_width)
1197
    middle_func = lambda unit, right_hand_item: (
1198
        unit
1199
        + " " * (middle_width - len(str(unit)) - len(right_hand_item))
1200
        + right_hand_item
1201
    )
1202
    if order == "descr:value":
1203
        return lambda item: "%s.%s : %s" % (
1204
            mnemonic_func(item.original_mnemonic),
1205
            middle_func(str(item.unit), str(item.descr)),
1206
            item.value
1207
        )
1208
    elif order == "value:descr":
1209
        return lambda item: "%s.%s : %s" % (
1210
            mnemonic_func(item.original_mnemonic),
1211
            middle_func(str(item.unit), str(item.value)),
1212
            item.descr
1213
        )
1214
1215
1216
def get_section_order_function(section, version,
1217
                               order_definitions=ORDER_DEFINITIONS):
1218
    '''Get a function that returns the order per mnemonic and section.
1219
1220
    Arguments:
1221
        section (str): either "well", "params", "curves", "version"
1222
        version (float): either 1.2 and 2.0
1223
1224
    Keyword Arguments:
1225
        order_definitions (dict): ...
1226
1227
    Returns:
1228
        A function which takes a mnemonic (str) as its only argument, and 
1229
        in turn returns the order "value:descr" or "descr:value".
1230
1231
    '''
1232
    section_orders = order_definitions[version][section]
1233
    default_order = section_orders[0]
1234
    orders = {}
1235
    for order, mnemonics in section_orders[1:]:
1236
        for mnemonic in mnemonics:
1237
            orders[mnemonic] = order
1238
    return lambda mnemonic: orders.get(mnemonic, default_order)
1239
1240
1241
def get_section_widths(section_name, items, version, order_func, middle_padding=5):
1242
    '''Find minimum section widths fitting the content in *items*.
1243
1244
    Arguments:
1245
        section_name (str): either "version", "well", "curves", or "params"
1246
        items (SectionItems): section items
1247
        version (float): either 1.2 or 2.0
1248
1249
    '''
1250
    section_widths = {
1251
        "left_width": None,
1252
        "middle_width": None
1253
    }
1254
    if len(items) > 0:
1255
        section_widths["left_width"] = max([len(i.original_mnemonic) for i in items])
1256
        middle_widths = []
1257
        for i in items:
1258
            order = order_func(i.mnemonic)
1259
            rhs_element = order.split(':')[0]
1260
            logger.debug('get_section_widths %s\n\torder=%s rhs_element=%s' % (i, order, rhs_element))
1261
            middle_widths.append(len(str(i.unit)) + 1 + len(str(i[rhs_element])))
1262
        section_widths['middle_width'] = max(middle_widths)
1263
    return section_widths
1264
1265
1266
def read(file_ref, **kwargs):
1267
    '''Read a LAS file.
1268
1269
    Note that only versions 1.2 and 2.0 of the LAS file specification
1270
    are currently supported.
1271
1272
    Arguments:
1273
        file_ref: either a filename, an open file object, or a string of
1274
            a LAS file contents.
1275
1276
    Keyword Arguments:
1277
        encoding (str): character encoding to open file_ref with
1278
        encoding_errors (str): "strict", "replace" (default), "ignore" - how to
1279
            handle errors with encodings (see standard library codecs module or
1280
            Python Unicode HOWTO for more information)
1281
        autodetect_encoding (bool): use chardet/ccharet to detect encoding
1282
        autodetect_encoding_chars (int/None): number of chars to read from LAS
1283
            file for auto-detection of encoding.
1284
1285
    Returns: 
1286
        A LASFile object representing the file -- see above
1287
1288
    '''
1289
    return LASFile(file_ref, **kwargs)
1290