Completed
Push — master ( 2d79b0...39c744 )
by Kent
03:41
created

lasio.SectionItems.append()   B

Complexity

Conditions 6

Size

Total Lines 17

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 6
dl 0
loc 17
rs 8
1
'''las.py - read Log ASCII Standard files
2
3
See README.rst and LICENSE for more information.
4
5
'''
6
from __future__ import print_function
7
8
# Standard library packages
9
import codecs
10
import json
11
import logging
12
import os
13
import re
14
import textwrap
15
import traceback
16
17
# The standard library OrderedDict was introduced in Python 2.7 so
18
# we have a third-party option to support Python 2.6
19
20
try:
21
    from collections import OrderedDict
22
except ImportError:
23
    from ordereddict import OrderedDict
24
25
# Convoluted import for StringIO in order to support:
26
#
27
# - Python 3 - io.StringIO
28
# - Python 2 (optimized) - cStringIO.StringIO
29
# - Python 2 (all) - StringIO.StringIO
30
31
try:
32
    import cStringIO as StringIO
33
except ImportError:
34
    try:  # cStringIO not available on this system
35
        import StringIO
36
    except ImportError:  # Python 3
37
        from io import StringIO
38
    else:
39
        from StringIO import StringIO
40
else:
41
    from StringIO import StringIO
42
43
# get basestring in py3
44
45
try:
46
    unicode = unicode
47
except NameError:
48
    # 'unicode' is undefined, must be Python 3
49
    str = str
50
    unicode = str
51
    bytes = bytes
52
    basestring = (str,bytes)
53
else:
54
    # 'unicode' exists, must be Python 2
55
    str = str
56
    unicode = unicode
57
    bytes = str
58
    basestring = basestring
59
60
# Required third-party packages available on PyPi:
61
62
from namedlist import namedlist
63
import numpy
64
65
# Optional third-party packages available on PyPI are mostly
66
# imported inline below.
67
68
69
logger = logging.getLogger(__name__)
70
__version__ = "0.9.1"
71
72
73
class LASDataError(Exception):
74
75
    '''Error during reading of numerical data from LAS file.'''
76
    pass
77
78
79
class LASHeaderError(Exception):
80
81
    '''Error during reading of header data from LAS file.'''
82
    pass
83
84
85
class LASUnknownUnitError(Exception):
86
87
    '''Error of unknown unit in LAS file.'''
88
    pass
89
90
91
class HeaderItem(OrderedDict):
92
    def __init__(self, mnemonic, unit="", value="", descr=""):
93
        super(HeaderItem, self).__init__()
94
95
        # The original mnemonic needs to be stored for rewriting a new file.
96
        # it might be nothing - '' - or a duplicate e.g. two 'RHO' curves,
97
        # or unique - 'X11124' - or perhaps invalid??
98
99
        self.original_mnemonic = mnemonic
100
101
        # We also need to store a more useful mnemonic, which will be used
102
        # (technically not, but read on) for people to access the curve while
103
        # the LASFile object exists. For example, a curve which is unnamed
104
        # and has the mnemonic '' will be accessed via 'UNKNOWN'.
105
106
        if mnemonic.strip() == '':
107
            self.useful_mnemonic = 'UNKNOWN'
108
        else:
109
            self.useful_mnemonic = mnemonic
110
111
        # But note that we need to (later) check (repeatedly) for duplicate
112
        # mnemonics. Any duplicates will have ':1', ':2', ':3', etc., appended
113
        # to them. The result of this will be stored in the below variable,
114
        # which is what the user should actually see and use 99.5% of the time.
115
116
        self.mnemonic = self.useful_mnemonic
117
118
        self.unit = unit
119
        self.value = value
120
        self.descr = descr
121
122
    def __getitem__(self, key):
123
        if key == 'mnemonic':
124
            return self.mnemonic
125
        elif key == 'original_mnemonic':
126
            return self.original_mnemonic
127
        elif key == 'useful_mnemonic':
128
            return self.useful_mnemonic
129
        elif key == 'unit':
130
            return self.unit
131
        elif key == 'value':
132
            return self.value
133
        elif key == 'descr':
134
            return self.descr
135
        else:
136
            raise KeyError('CurveItem only has restricted items (not %s)' % key)
137
138
    def __repr__(self):
139
        return (
140
            "%s(mnemonic=%s, unit=%s, value=%s, "
141
            "descr=%s, original_mnemonic=%s)" % (
142
                self.__class__.__name__, self.mnemonic, self.unit, self.value, 
143
                self.descr, self.original_mnemonic))
144
145
    def _repr_pretty_(self, p, cycle):
146
        return p.text(self.__repr__())
147
148
149
class CurveItem(HeaderItem):
150
    def __init__(self, *args, **kwargs):
151
        self.data = numpy.ndarray([])
152
        super(CurveItem, self).__init__(*args, **kwargs)
153
154
    @property
155
    def API_code(self):
156
        return self.value
157
    
158
    def __repr__(self):
159
        return (
160
            "%s(mnemonic=%s, unit=%s, value=%s, "
161
            "descr=%s, original_mnemonic=%s, data.shape=%s)" % (
162
                self.__class__.__name__, self.mnemonic, self.unit, self.value, 
163
                self.descr, self.original_mnemonic, self.data.shape))
164
165
166
class SectionItems(list):
167
168
    def __contains__(self, testitem):
169
        '''Allows testing of a mnemonic or an actual item.'''
170
        for item in self:
171
            if testitem == item.mnemonic:
172
                return True 
173
            elif hasattr(testitem, 'mnemonic'):
174
                if testitem.mnemonic == item.mnemonic:
175
                    return True
176
            elif testitem is item:
177
                return True
178
        else:
179
            return False
180
181
    def keys(self):
182
        return [item.mnemonic for item in self]
183
184
    def values(self):
185
        return self
186
187
    def items(self):
188
        return [(item.mnemonic, item) for item in self]
189
190
    def iterkeys(self):
191
        return iter(self.keys())
192
193
    def itervalues(self):
194
        return iter(self)
195
196
    def iteritems(self):
197
        return iter(self.items())
198
199
    def __getitem__(self, key):
200
        for item in self:
201
            if item.mnemonic == key:
202
                return item
203
        if isinstance(key, int):
204
            return super(SectionItems, self).__getitem__(key)
205
        else:
206
            raise KeyError("%s not in %s" % (key, self.keys()))
207
208
    def __setitem__(self, key, newitem):
209
        if isinstance(newitem, HeaderItem):
210
            self.set_item(key, newitem)
211
        else:
212
            self.set_item_value(key, newitem)
213
214
    def __getattr__(self, key):
215
        if key in self:
216
            return self[key]
217
        else:
218
            super(SectionItems, self).__getattr__(key)
219
220
    def __setattr__(self, key, value):
221
        if key in self:
222
            self[key] = value
223
        else:
224
            super(SectionItems, self).__setattr__(key, value)
225
226
    def set_item(self, key, newitem):
227
        for i, item in enumerate(self):
228
            if key == item.mnemonic:
229
230
                # This is very important. We replace items where
231
                # 'mnemonic' is equal - i.e. we do not check useful_mnemonic
232
                # or original_mnemonic. Is this correct? Needs to thought
233
                # about and tested more carefully.
234
235
                logger.debug('SectionItems.__setitem__ Replaced %s item' % key)
236
                return super(SectionItems, self).__setitem__(i, newitem)  
237
        else:
238
            self.append(newitem)
239
240
    def set_item_value(self, key, value):
241
        self[key].value = value
242
243
    def append(self, newitem):
244
        '''Check to see if the item's mnemonic needs altering.'''
245
        logger.debug("SectionItems.append type=%s str=%s" % (type(newitem), newitem))
246
        super(SectionItems, self).append(newitem)
247
248
        # Check to fix the :n suffixes
249
        existing = [item.useful_mnemonic for item in self]
250
        locations = []
251
        for i, item in enumerate(self):
252
            if item.useful_mnemonic == newitem.mnemonic:
253
                locations.append(i)
254
        if len(locations) > 1:
255
            current_count = 1
256
            for i, loc in enumerate(locations):
257
                item = self[loc]
258
                # raise Exception("%s" % str(type(item)))
259
                item.mnemonic = item.useful_mnemonic + ":%d" % (i + 1)
260
261
    def dictview(self):
262
        return dict(zip(self.keys(), [i.value for i in self.values()]))
263
264
    # def __repr__(self):
265
    #     return (
266
    #         "{cls}({contents})".format(
267
    #             cls=self.__class__.__name__,
268
    #             contents=', '.join([str(item) for item in self])))
269
270
271
class JSONEncoder(json.JSONEncoder):
272
273
    def default(self, obj):
274
        if isinstance(obj, LASFile):
275
            d = {'metadata': {},
276
                 'data': {}}
277
            for name, section in obj.sections.items():
278
                if isinstance(section, basestring):
279
                    d['metadata'][name] = section
280
                else:
281
                    d['metadata'][name] = []
282
                    for item in section:
283
                        d['metadata'][name].append(dict(item))
284
            for curve in obj.curves:
285
                d['data'][curve.mnemonic] = list(curve.data)
286
            return d
287
288
289
290
DEFAULT_ITEMS = {
291
    "Version": SectionItems([
292
        HeaderItem("VERS", "", 2.0, "CWLS log ASCII Standard -VERSION 2.0"),
293
        HeaderItem("WRAP", "", "NO", "One line per depth step"),
294
        HeaderItem("DLM", "", "SPACE", "Column Data Section Delimiter"),
295
        ]),
296
    "Well": SectionItems([
297
        HeaderItem("STRT", "m", numpy.nan, "START DEPTH"),
298
        HeaderItem("STOP", "m", numpy.nan, "STOP DEPTH"),
299
        HeaderItem("STEP", "m", numpy.nan, "STEP"),
300
        HeaderItem("NULL", "", -9999.25, "NULL VALUE"),
301
        HeaderItem("COMP", "", "", "COMPANY"),
302
        HeaderItem("WELL", "", "", "WELL"),
303
        HeaderItem("FLD", "", "", "FIELD"),
304
        HeaderItem("LOC", "", "", "LOCATION"),
305
        HeaderItem("PROV", "", "", "PROVINCE"),
306
        HeaderItem("CNTY", "", "", "COUNTY"),
307
        HeaderItem("STAT", "", "", "STATE"),
308
        HeaderItem("CTRY", "", "", "COUNTRY"),
309
        HeaderItem("SRVC", "", "", "SERVICE COMPANY"),
310
        HeaderItem("DATE", "", "", "DATE"),
311
        HeaderItem("UWI", "", "", "UNIQUE WELL ID"),
312
        HeaderItem("API", "", "", "API NUMBER")
313
        ]),
314
    "Curves": SectionItems([]),
315
    "Parameter": SectionItems([]),
316
    "Other": "",
317
    "Data": numpy.zeros(shape=(0, 1)),
318
    }
319
320
321
ORDER_DEFINITIONS = {
322
    1.2: OrderedDict([
323
        ("Version", ["value:descr"]),
324
        ("Well", [
325
            "descr:value",
326
            ("value:descr", ["STRT", "STOP", "STEP", "NULL"])]),
327
        ("Curves", ["value:descr"]),
328
        ("Parameter", ["value:descr"]),
329
        ]),
330
    2.0: OrderedDict([
331
        ("Version", ["value:descr"]),
332
        ("Well", ["value:descr"]),
333
        ("Curves", ["value:descr"]),
334
        ("Parameter", ["value:descr"])
335
        ])}
336
337
338
URL_REGEXP = re.compile(
339
    r'^(?:http|ftp)s?://'  # http:// or https://
340
    r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}'
341
    r'\.?|[A-Z0-9-]{2,}\.?)|'  # (cont.) domain...
342
    r'localhost|'  # localhost...
343
    r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'  # ...or ip
344
    r'(?::\d+)?'  # optional port
345
    r'(?:/?|[/?]\S+)$', re.IGNORECASE)
346
347
348
class LASFile(object):
349
350
    '''LAS file object.
351
352
    Keyword Arguments:
353
        file_ref: either a filename, an open file object, or a string of
354
            a LAS file contents.
355
        encoding (str): character encoding to open file_ref with
356
        encoding_errors (str): "strict", "replace" (default), "ignore" - how to
357
            handle errors with encodings (see standard library codecs module or
358
            Python Unicode HOWTO for more information)
359
        autodetect_encoding (bool): use chardet/ccharet to detect encoding
360
        autodetect_encoding_chars (int/None): number of chars to read from LAS
361
            file for auto-detection of encoding.
362
363
    '''
364
    def __init__(self, file_ref=None, **kwargs):
365
366
        self._text = ''
367
        self._use_pandas = "auto"
368
        self.index_unit = None
369
        self.sections = {
370
            "Version": DEFAULT_ITEMS["Version"],
371
            "Well": DEFAULT_ITEMS["Well"],
372
            "Curves": DEFAULT_ITEMS["Curves"],
373
            "Parameter": DEFAULT_ITEMS["Parameter"],
374
            "Other": str(DEFAULT_ITEMS["Other"]),
375
            }
376
377
        if not (file_ref is None):
378
            self.read(file_ref, **kwargs)
379
380
    def read(self, file_ref, use_pandas="auto", null_subs=True, **kwargs):
381
        '''Read a LAS file.
382
383
        Arguments:
384
            file_ref: either a filename, an open file object, or a string of
385
                a LAS file contents.
386
387
        Keyword Arguments:
388
            use_pandas (str): bool or "auto" -- use pandas if available -- provide
389
                False option for faster loading where pandas functionality is not
390
                needed. "auto" becomes True if pandas is installed, and False if not.
391
            encoding (str): character encoding to open file_ref with
392
            encoding_errors (str): "strict", "replace" (default), "ignore" - how to
393
                handle errors with encodings (see standard library codecs module or
394
                Python Unicode HOWTO for more information)
395
            autodetect_encoding (bool): use chardet/cchardet to detect encoding
396
            autodetect_encoding_chars (int/None): number of chars to read from LAS
397
                file for auto-detection of encoding.
398
399
        '''
400
        if not use_pandas is None:
401
            self._use_pandas = use_pandas
402
403
        f = open_file(file_ref, **kwargs)
404
405
        self._text = f.read()
406
        logger.debug("LASFile.read LAS content is type %s" % type(self._text))
407
408
        reader = Reader(self._text, version=1.2)
409
        self.sections["Version"] = reader.read_section('~V')
410
411
        # Set version
412
        try:
413
            # raise Exception("%s %s" % (type(self.version['VERS']), self.version["VERS"]))
414
            reader.version = self.version['VERS'].value
415
        except KeyError:
416
            raise KeyError("No key VERS in ~V section")
417
418
        # Validate version
419
        try:
420
            assert reader.version in (1.2, 2)
421
        except AssertionError:
422
            logger.warning("LAS spec version is %s -- neither 1.2 nor 2" %
423
                           reader.version)
424
            if reader.version < 2:
425
                reader.version = 1.2
426
            else:
427
                reader.version = 2
428
        reader.wrap = self.version['WRAP'].value == 'YES'
429
430
        self.sections["Well"] = reader.read_section('~W')
431
        self.sections["Curves"] = reader.read_section('~C')
432
        try:
433
            self.sections["Parameter"] = reader.read_section('~P')
434
        except LASHeaderError:
435
            logger.warning(traceback.format_exc().splitlines()[-1])
436
        self.sections["Other"] = reader.read_raw_text('~O')
437
438
        # Set null value
439
        reader.null = self.well['NULL'].value
440
441
        data = reader.read_data(len(self.curves), null_subs=null_subs)
442
443
        for i, c in enumerate(self.curves):
444
            d = data[:, i]
445
            c.data = d
446
447
        if (self.well["STRT"].unit.upper() == "M" and
448
                self.well["STOP"].unit.upper() == "M" and
449
                self.well["STEP"].unit.upper() == "M" and
450
                self.curves[0].unit.upper() == "M"):
451
            self.index_unit = "M"
452
        elif (self.well["STRT"].unit.upper() in ("F", "FT") and
453
              self.well["STOP"].unit.upper() in ("F", "FT") and
454
              self.well["STEP"].unit.upper() in ("F", "FT") and
455
              self.curves[0].unit.upper() in ("F", "FT")):
456
            self.index_unit = "FT"
457
458
        self.refresh()
459
460
    def refresh(self, use_pandas=None):
461
        '''Refresh curve names and indices.'''
462
        if not use_pandas is None:
463
            self._use_pandas = use_pandas
464
465
        # n = len(self.curves)
466
        # for i, curve in enumerate(self.curves):
467
        #     self[curve.mnemonic] = curve.data
468
        #     self[i] = curve.data
469
        #     self[i - n] = curve.data
470
471
        if not self._use_pandas is False:
472
            try:
473
                import pandas
474
            except ImportError:
475
                logger.info(
476
                    "pandas not installed - skipping LASFile.df creation")
477
                self._use_pandas = False
478
479
        if self._use_pandas:
480
            self.df = pandas.DataFrame(self.data, columns=self.keys())
481
            self.df.set_index(self.curves[0].mnemonic, inplace=True)
482
483
    @property
484
    def data(self):
485
        '''2D array of data from LAS file.'''
486
        return numpy.vstack([c.data for c in self.curves]).T
487
488
    def write(self, file_object, version=None, wrap=None,
489
              STRT=None, STOP=None, STEP=None, fmt="%10.5g"):
490
        '''Write to a file.
491
492
        Arguments:
493
            file_object: a file_like object opening for writing.
494
            version (float): either 1.2 or 2
495
            wrap (bool): True, False, or None (last uses WRAP item in version)
496
            STRT (float): optional override to automatic calculation using 
497
                the first index curve value.
498
            STOP (float): optional override to automatic calculation using 
499
                the last index curve value.
500
            STEP (float): optional override to automatic calculation using 
501
                the first step size in the index curve.
502
            fmt (str): format string for numerical data being written to data
503
                section.
504
505
        Examples:
506
507
            >>> with open("test_output.las", mode="w") as f:
508
            ...     lasfile_obj.write(f, 2.0)   # <-- this method
509
510
        '''
511
        if wrap is None:
512
            wrap = self.version["WRAP"] == "YES"
513
        elif wrap is True:
514
            self.version["WRAP"] = HeaderItem(
515
                "WRAP", "", "YES", "Multiple lines per depth step")
516
        elif wrap is False:
517
            self.version["WRAP"] = HeaderItem(
518
                "WRAP", "", "NO", "One line per depth step")
519
        lines = []
520
521
        assert version in (1.2, 2, None)
522
        if version is None:
523
            version = self.version["VERS"].value
524
        if version == 1.2:
525
            self.version["VERS"] = HeaderItem(
526
                "VERS", "", 1.2, "CWLS LOG ASCII STANDARD - VERSION 1.2")
527
        elif version == 2:
528
            self.version["VERS"] = HeaderItem(
529
                "VERS", "", 2.0, "CWLS log ASCII Standard -VERSION 2.0")
530
531
        if STRT is None:
532
            STRT = self.index[0]
533
        if STOP is None:
534
            STOP = self.index[-1]
535
        if STEP is None:
536
            STEP = self.index[1] - self.index[0]  # Faster than numpy.gradient
537
        self.well["STRT"].value = STRT
538
        self.well["STOP"].value = STOP
539
        self.well["STEP"].value = STEP
540
541
542
        # Check for any changes in the pandas dataframe and if there are,
543
        # create new curves so they are reflected in the output LAS file.
544
545
        if self._use_pandas:
546
            curve_names = lambda: [ci.mnemonic for ci in self.curves]
547
            for df_curve_name in list(self.df.columns.values):
548
                if not df_curve_name in curve_names():
549
                    self.add_curve(df_curve_name, self.df[df_curve_name])
550
        
551
        # Write each section.
552
553
        # ~Version
554
        logger.debug('LASFile.write Version section')
555
        lines.append("~Version ".ljust(60, "-"))
556
        order_func = get_section_order_function("Version", version)
557
        section_widths = get_section_widths("Version", self.version, version, order_func)
558
        for header_item in self.version.values():
559
            mnemonic = header_item.original_mnemonic
560
            # logger.debug("LASFile.write " + str(header_item))
561
            order = order_func(mnemonic)
562
            # logger.debug("LASFile.write order = %s" % (order, ))
563
            logger.debug('LASFile.write %s\norder=%s section_widths=%s' % (header_item, order, section_widths))
564
            formatter_func = get_formatter_function(order, **section_widths)
565
            line = formatter_func(header_item)
566
            lines.append(line)
567
568
        # ~Well
569
        logger.debug('LASFile.write Well section')
570
        lines.append("~Well ".ljust(60, "-"))
571
        order_func = get_section_order_function("Well", version)
572
        section_widths = get_section_widths("Well", self.well, version, order_func)
573
        # logger.debug('LASFile.write well section_widths=%s' % section_widths)
574
        for header_item in self.well.values():
575
            mnemonic = header_item.original_mnemonic
576
            order = order_func(mnemonic)
577
            logger.debug('LASFile.write %s\norder=%s section_widths=%s' % (header_item, order, section_widths))
578
            formatter_func = get_formatter_function(order, **section_widths)
579
            line = formatter_func(header_item)
580
            lines.append(line)
581
582
        # ~Curves
583
        logger.debug('LASFile.write Curves section')
584
        lines.append("~Curves ".ljust(60, "-"))
585
        order_func = get_section_order_function("Curves", version)
586
        section_widths = get_section_widths("Curves", self.curves, version, order_func)
587
        for header_item in self.curves:
588
            mnemonic = header_item.original_mnemonic
589
            order = order_func(mnemonic)
590
            formatter_func = get_formatter_function(order, **section_widths)
591
            line = formatter_func(header_item)
592
            lines.append(line)
593
594
        # ~Params
595
        lines.append("~Params ".ljust(60, "-"))
596
        order_func = get_section_order_function("Parameter", version)
597
        section_widths = get_section_widths("Parameter", self.params, version, order_func)
598
        for header_item in self.params.values():
599
            mnemonic = header_item.original_mnemonic
600
            order = order_func(mnemonic)
601
            formatter_func = get_formatter_function(order, **section_widths)
602
            line = formatter_func(header_item)
603
            lines.append(line)
604
605
        # ~Other
606
        lines.append("~Other ".ljust(60, "-"))
607
        lines += self.other.splitlines()
608
609
        lines.append("~ASCII ".ljust(60, "-"))
610
611
        file_object.write("\n".join(lines))
612
        file_object.write("\n")
613
614
        data_arr = numpy.column_stack([c.data for c in self.curves])
615
        nrows, ncols = data_arr.shape
616
617
        def format_data_section_line(n, fmt, l=10, spacer=" "):
618
            if numpy.isnan(n):
619
                return spacer + str(self.well["NULL"].value).rjust(l)
620
            else:
621
                return spacer + (fmt % n).rjust(l)
622
623
        twrapper = textwrap.TextWrapper(width=79)
624
        for i in range(nrows):
625
            depth_slice = ''
626
            for j in range(ncols):
627
                depth_slice += format_data_section_line(data_arr[i, j], fmt)
628
629
            if wrap:
630
                lines = twrapper.wrap(depth_slice)
631
                logger.debug("LASFile.write Wrapped %d lines out of %s" %
632
                             (len(lines), depth_slice))
633
            else:
634
                lines = [depth_slice]
635
636
            if self.version["VERS"].value == 1.2:
637
                for line in lines:
638
                    if len(line) > 255:
639
                        logger.warning("LASFile.write Data line > 256 chars: %s" % line)
640
641
            for line in lines:
642
                file_object.write(line + "\n")
643
644
    def get_curve(self, mnemonic):
645
        '''Return Curve object.
646
647
        Arguments:
648
            mnemonic (str): the name of the curve
649
650
        Returns: 
651
            A Curve object, not just the data array.
652
653
        '''
654
        for curve in self.curves:
655
            if curve.mnemonic == mnemonic:
656
                return curve
657
658
    # def __getattr__(self, key):
659
    #     # if hasattr(self, 'sections'):
660
    #     #     if key in self.sections['Curves']:
661
    #     #         return self[key]
662
    #     # else:
663
    #     #     raise AttributeError
664
    #     pass
665
666
    def __getitem__(self, key):
667
        if isinstance(key, int):
668
            return self.curves[key].data
669
        elif isinstance(key, str):
670
            if key in self.keys():
671
                return self.curves[key].data
672
        else:
673
            super(LASFile, self).__getitem__(key)
674
675
    # def __setattr__(self, key, value):
676
    #     assert NotImplementedError('not yet')
677
678
    def __setitem__(self, key, value):
679
        assert NotImplementedError('not yet')
680
681
    def keys(self):
682
        return [c.mnemonic for c in self.curves]
683
684
    def values(self):
685
        return [c.data for c in self.curves]
686
687
    def items(self):
688
        return [(c.mnemonic, c.data) for c in self.curves]
689
690
    def iterkeys(self):
691
        return iter(list(self.keys()))
692
693
    def itervalues(self):
694
        return iter(list(self.values()))
695
696
    def iteritems(self):
697
        return iter(list(self.items()))
698
699
    @property
700
    def version(self):
701
        return self.sections["Version"]
702
    
703
    @version.setter
704
    def version(self, section):
705
        self.sections["Version"] = section
706
707
    @property
708
    def well(self):
709
        return self.sections["Well"]
710
    
711
    @well.setter
712
    def well(self, section):
713
        self.sections["Well"] = section
714
715
    @property
716
    def curves(self):
717
        return self.sections["Curves"]
718
    
719
    @curves.setter
720
    def curves(self, section):
721
        self.sections["Curves"] = section
722
723
    @property
724
    def params(self):
725
        return self.sections["Parameter"]
726
    
727
    @params.setter
728
    def params(self, section):
729
        self.sections["Parameter"] = section
730
731
    @property
732
    def other(self):
733
        return self.sections["Other"]
734
    
735
    @other.setter
736
    def other(self, section):
737
        self.sections["Other"] = section
738
    
739
740
    @property
741
    def metadata(self):
742
        s = SectionItems()
743
        for section in self.sections:
744
            for item in section:
745
                s.append(item)
746
        return s
747
748
    @metadata.setter
749
    def metadata(self, value):
750
        raise Warning('Set values in the version/well/params attrs directly')
751
752
    @property
753
    def df(self):
754
        if self._use_pandas:
755
            return self._df
756
        else:
757
            logger.warning(
758
                "pandas is not installed or use_pandas was set to False")
759
            # raise Warning("pandas is not installed or use_pandas was set to False")
760
761
    @df.setter
762
    def df(self, value):
763
        self._df = value
764
765
    @property
766
    def index(self):
767
        return self.data[:, 0]
768
769
    @property
770
    def depth_m(self):
771
        if self.index_unit == "M":
772
            return self.index
773
        elif self.index_unit == "FT":
774
            return self.index * 0.3048
775
        else:
776
            raise LASUnknownUnitError("Unit of depth index not known")
777
778
    @property
779
    def depth_ft(self):
780
        if self.index_unit == "M":
781
            return self.index / 0.3048
782
        elif self.index_unit == "FT":
783
            return self.index
784
        else:
785
            raise LASUnknownUnitError("Unit of depth index not known")
786
787
    def add_curve(self, mnemonic, data, unit="", descr="", value=""):
788
        # assert not mnemonic in self.curves
789
        curve = CurveItem(mnemonic, unit, value, descr)
790
        curve.data = data
791
        self.curves[mnemonic] = curve
792
        self.refresh()
793
794
    @property
795
    def header(self):
796
        return self.sections
797
798
799
class Las(LASFile):
800
801
    '''LAS file object.
802
803
    Retained for backwards compatibility.
804
805
    '''
806
    pass
807
808
809
class Reader(object):
810
811
    def __init__(self, text, version):
812
        self.lines = text.splitlines()
813
        self.version = version
814
        self.null = numpy.nan
815
        self.wrap = True
816
817
    @property
818
    def section_names(self):
819
        names = []
820
        for line in self.lines:
821
            line = line.strip().strip('\t').strip()
822
            if not line or line.startswith('#'):
823
                continue
824
            if line.startswith('~'):
825
                names.append(line)
826
        return names
827
828
    def iter_section_lines(self, section_name, ignore_comments=True):
829
        in_section = False
830
        for i, line in enumerate(self.lines):
831
            line = line.strip().strip('\t').strip()
832
            if not line:
833
                continue
834
            if ignore_comments and line.startswith('#'):
835
                continue
836
            if line.startswith(section_name):
837
                if in_section:
838
                    return
839
                else:
840
                    in_section = True
841
                    continue
842
            if line.lower().startswith('~') and in_section:
843
                # Start of the next section; we're done here.
844
                break
845
            if in_section:
846
                yield line
847
848
    def read_raw_text(self, section_name):
849
        return '\n'.join(self.iter_section_lines(section_name,
850
                                                 ignore_comments=False))
851
852
    def read_section(self, section_name):
853
        parser = SectionParser(section_name, version=self.version)
854
        section = SectionItems()
855
        for line in self.iter_section_lines(section_name):
856
            try:
857
                values = read_line(line)
858
            except:
859
                raise LASHeaderError("Failed in %s section on line:\n%s%s" % (
860
                    section_name, line,
861
                    traceback.format_exc().splitlines()[-1]))
862
            else:
863
                section.append(parser(**values))
864
        return section
865
866
    def read_data(self, number_of_curves=None, null_subs=True):
867
        s = self.read_data_string()
868
        if not self.wrap:
869
            try:
870
                arr = numpy.loadtxt(StringIO(s))
871
            except:
872
                raise LASDataError("Failed to read data:\n%s" % (
873
                                   traceback.format_exc().splitlines()[-1]))
874
        else:
875
            eol_chars = r"[\n\t\r]"
876
            s = re.sub(eol_chars, " ", s)
877
            try:
878
                arr = numpy.loadtxt(StringIO(s))
879
            except:
880
                raise LASDataError("Failed to read wrapped data: %s" % (
881
                                   traceback.format_exc().splitlines()[-1]))
882
            logger.debug('Reader.read_data arr shape = %s' % (arr.shape))
883
            logger.debug('Reader.read_data number of curves = %s' % number_of_curves)
884
            arr = numpy.reshape(arr, (-1, number_of_curves))
885
        if not arr.shape or (arr.ndim == 1 and arr.shape[0] == 0):
886
            logger.warning('Reader.read_dataN o data present.')
887
            return None, None
888
        else:
889
            logger.info('Reader.read_data LAS file shape = %s' % str(arr.shape))
890
        logger.debug('Reader.read_data checking for nulls (NULL = %s)' % self.null)
891
        if null_subs:
892
            arr[arr == self.null] = numpy.nan
893
        return arr
894
895
    def read_data_string(self):
896
        start_data = None
897
        for i, line in enumerate(self.lines):
898
            line = line.strip().strip('\t').strip()
899
            if line.startswith('~A'):
900
                start_data = i + 1
901
                break
902
        s = '\n'.join(self.lines[start_data:])
903
        s = re.sub(r'(\d)-(\d)', r'\1 -\2', s)
904
        s = re.sub('-?\d*\.\d*\.\d*', ' NaN NaN ', s)
905
        s = re.sub('NaN.\d*', ' NaN NaN ', s)
906
        return s
907
908
909
class SectionParser(object):
910
911
    def __init__(self, section_name, version=1.2):
912
        if section_name.startswith('~C'):
913
            self.func = self.curves
914
        elif section_name.startswith('~P'):
915
            self.func = self.params
916
        else:
917
            self.func = self.metadata
918
919
        self.version = version
920
        self.section_name = section_name
921
        self.section_name2 = {"~C": "Curves",
922
                              "~W": "Well",
923
                              "~V": "Version",
924
                              "~P": "Parameter"}[section_name]
925
926
        section_orders = ORDER_DEFINITIONS[self.version][self.section_name2]
927
        self.default_order = section_orders[0]
928
        self.orders = {}
929
        for order, mnemonics in section_orders[1:]:
930
            for mnemonic in mnemonics:
931
                self.orders[mnemonic] = order
932
933
    def __call__(self, **keys):
934
        item = self.func(**keys)
935
        # if item.name == "":
936
        #     item.mnemonic = "UNKNOWN"
937
        return item
938
939
    def num(self, x, default=None):
940
        if default is None:
941
            default = x
942
        try:
943
            return numpy.int(x)
944
        except:
945
            try:
946
                return numpy.float(x)
947
            except:
948
                return default
949
950
    def metadata(self, **keys):
951
        key_order = self.orders.get(keys["name"], self.default_order)
952
        if key_order == "value:descr":
953
            return HeaderItem(
954
                keys["name"],                 # mnemonic
955
                keys["unit"],                 # unit
956
                self.num(keys["value"]),      # value
957
                keys["descr"],                # descr
958
                )
959
        elif key_order == "descr:value":
960
            return HeaderItem(
961
                keys["name"],                   # mnemonic
962
                keys["unit"],                   # unit
963
                keys["descr"],                  # descr
964
                self.num(keys["value"]),        # value
965
                )
966
967
    def curves(self, **keys):
968
        # logger.debug(str(keys))
969
        item = CurveItem(
970
            keys['name'],               # mnemonic
971
            keys['unit'],               # unit
972
            keys['value'],              # value
973
            keys['descr'],              # descr
974
            )
975
        return item
976
977
    def params(self, **keys):
978
        return HeaderItem(
979
            keys['name'],               # mnemonic
980
            keys['unit'],               # unit
981
            self.num(keys['value']),    # value
982
            keys['descr'],              # descr
983
            )
984
985
986
def read_line(line, pattern=None):
987
    '''Read a line from a LAS header section.
988
989
    The line is parsed with a regular expression -- see LAS file specs for
990
    more details, but it should basically be in the format::
991
992
        name.unit       value : descr
993
994
    Arguments:
995
        line (str): line from a LAS header section
996
997
    Returns:
998
        A dictionary with keys "name", "unit", "value", and "descr", each
999
        containing a string as value.
1000
1001
    '''
1002
    d = {}
1003
    if pattern is None:
1004
        pattern = (r"\.?(?P<name>[^.]*)\." +
1005
                   r"(?P<unit>[^\s:]*)" +
1006
                   r"(?P<value>[^:]*):" +
1007
                   r"(?P<descr>.*)")
1008
    m = re.match(pattern, line)
1009
    mdict = m.groupdict()
1010
    # if mdict["name"] == "":
1011
    #     mdict["name"] = "UNKNOWN"
1012
    for key, value in mdict.items():
1013
        d[key] = value.strip()
1014
        if key == "unit":
1015
            if d[key].endswith("."):
1016
                d[key] = d[key].strip(".")  # see issue #36
1017
    return d
1018
1019
1020
def open_file(file_ref, encoding=None, encoding_errors="replace",
1021
              autodetect_encoding=False, autodetect_encoding_chars=40e3):
1022
    '''Open a file if necessary.
1023
1024
    If autodetect_encoding is True then either cchardet or chardet (see PyPi)
1025
    needs to be installed, or else an ImportError will be raised.
1026
1027
    Arguments:
1028
        file_ref: either a filename, an open file object, a URL, or a string of
1029
            a LAS file contents.
1030
1031
    Keyword Arguments:
1032
        encoding (str): character encoding to open file_ref with
1033
        encoding_errors (str): "strict", "replace" (default), "ignore" - how to
1034
            handle errors with encodings (see standard library codecs module or
1035
            Python Unicode HOWTO for more information)
1036
        autodetect_encoding (bool): use chardet/ccharet to detect encoding
1037
        autodetect_encoding_chars (int/None): number of chars to read from LAS
1038
            file for auto-detection of encoding.
1039
1040
    Returns: 
1041
        An open file-like object ready for reading from.
1042
1043
    '''
1044
    if isinstance(file_ref, str):
1045
        lines = file_ref.splitlines()
1046
        if len(lines) == 1:  # File name
1047
            if URL_REGEXP.match(file_ref):
1048
                try:
1049
                    import urllib2
1050
                    file_ref = urllib2.urlopen(file_ref)
1051
                except ImportError:
1052
                    import urllib.request
1053
                    response = urllib.request.urlopen(file_ref)
1054
                    enc = response.headers.get_content_charset("utf-8")
1055
                    file_ref = StringIO(response.read().decode(enc))
1056
            else:  # filename
1057
                data = get_unicode_from_filename(
1058
                    file_ref, encoding, encoding_errors, autodetect_encoding,
1059
                    autodetect_encoding_chars)
1060
                file_ref = StringIO(data)
1061
        else:
1062
            file_ref = StringIO("\n".join(lines))
1063
    return file_ref
1064
1065
1066
def get_unicode_from_filename(fn, enc, errors, auto, nbytes):
1067
    '''
1068
    Read Unicode data from file.
1069
1070
    Arguments:
1071
        fn (str): path to file
1072
        enc (str): encoding - can be None
1073
        errors (str): unicode error handling - can be "strict", "ignore", "replace"
1074
        auto (str): auto-detection of character encoding - can be either
1075
            "chardet", "cchardet", or True
1076
        nbytes (int): number of characters for read for auto-detection
1077
1078
    Returns:
1079
        a unicode or string object
1080
1081
    '''
1082
    if nbytes:
1083
        nbytes = int(nbytes)
1084
1085
    # Detect BOM in UTF-8 files
1086
1087
    nbytes_test = min(32, os.path.getsize(fn))
1088
    with open(fn, mode="rb") as test:
1089
        raw = test.read(nbytes_test)
1090
    if raw.startswith(codecs.BOM_UTF8):
1091
        enc = "utf-8-sig"
1092
        auto = False
1093
1094
    if auto:
1095
        with open(fn, mode="rb") as test:
1096
            if nbytes is None:
1097
                raw = test.read()
1098
            else:
1099
                raw = test.read(nbytes)
1100
        enc = get_encoding(auto, raw)
1101
1102
    # codecs.open is smarter than cchardet or chardet IME.
1103
1104
    with codecs.open(fn, mode="r", encoding=enc, errors=errors) as f:
1105
        data = f.read()
1106
1107
    return data
1108
1109
1110
def get_encoding(auto, raw):
1111
    '''
1112
    Automatically detect character encoding.
1113
1114
    Arguments:
1115
        auto (str): auto-detection of character encoding - can be either
1116
            "chardet", "cchardet", or True
1117
        raw (bytes): array of bytes to detect from
1118
1119
    Returns:
1120
        A string specifying the character encoding.
1121
1122
    '''
1123
    if auto is True:
1124
        try:
1125
            import cchardet as chardet
1126
        except ImportError:
1127
            try:
1128
                import chardet
1129
            except ImportError:
1130
                raise ImportError(
1131
                    "chardet or cchardet is required for automatic"
1132
                    " detection of character encodings.")
1133
            else:
1134
                logger.debug("get_encoding Using chardet")
1135
                method = "chardet"
1136
        else:
1137
            logger.debug("get_encoding Using cchardet")
1138
            method = "cchardet"
1139
    elif auto.lower() == "chardet":
1140
        import chardet
1141
        logger.debug("get_encoding Using chardet")
1142
        method = "chardet"
1143
    elif auto.lower() == "cchardet":
1144
        import cchardet as chardet
1145
        logger.debug("get_encoding Using cchardet")
1146
        method = "cchardet"
1147
1148
    result = chardet.detect(raw)
1149
    logger.debug("get_encoding %s results=%s" % (method, result))
1150
    return result["encoding"]
1151
1152
1153
def get_formatter_function(order, left_width=None, middle_width=None):
1154
    '''Create function to format a LAS header item.
1155
1156
    Arguments:
1157
        order: format of item, either "descr:value" or "value:descr" -- see
1158
            LAS 1.2 and 2.0 specifications for more information.
1159
1160
    Keyword Arguments:
1161
        left_width (int): number of characters to the left hand side of the
1162
            first period
1163
        middle_width (int): total number of characters minus 1 between the
1164
            first period from the left and the first colon from the left.
1165
1166
    Returns:
1167
        A function which takes a header item (e.g. LASHeaderItem or Curve)
1168
        as its single argument and which in turn returns a string which is
1169
        the correctly formatted LAS header line.
1170
1171
    '''
1172
    if left_width is None:
1173
        left_width = 10
1174
    if middle_width is None:
1175
        middle_width = 40
1176
    mnemonic_func = lambda mnemonic: mnemonic.ljust(left_width)
1177
    middle_func = lambda unit, right_hand_item: (
1178
        unit
1179
        + " " * (middle_width - len(str(unit)) - len(right_hand_item))
1180
        + right_hand_item
1181
    )
1182
    if order == "descr:value":
1183
        return lambda item: "%s.%s : %s" % (
1184
            mnemonic_func(item.original_mnemonic),
1185
            middle_func(str(item.unit), str(item.descr)),
1186
            item.value
1187
        )
1188
    elif order == "value:descr":
1189
        return lambda item: "%s.%s : %s" % (
1190
            mnemonic_func(item.original_mnemonic),
1191
            middle_func(str(item.unit), str(item.value)),
1192
            item.descr
1193
        )
1194
1195
1196
def get_section_order_function(section, version,
1197
                               order_definitions=ORDER_DEFINITIONS):
1198
    '''Get a function that returns the order per mnemonic and section.
1199
1200
    Arguments:
1201
        section (str): either "well", "params", "curves", "version"
1202
        version (float): either 1.2 and 2.0
1203
1204
    Keyword Arguments:
1205
        order_definitions (dict): ...
1206
1207
    Returns:
1208
        A function which takes a mnemonic (str) as its only argument, and 
1209
        in turn returns the order "value:descr" or "descr:value".
1210
1211
    '''
1212
    section_orders = order_definitions[version][section]
1213
    default_order = section_orders[0]
1214
    orders = {}
1215
    for order, mnemonics in section_orders[1:]:
1216
        for mnemonic in mnemonics:
1217
            orders[mnemonic] = order
1218
    return lambda mnemonic: orders.get(mnemonic, default_order)
1219
1220
1221
def get_section_widths(section_name, items, version, order_func, middle_padding=5):
1222
    '''Find minimum section widths fitting the content in *items*.
1223
1224
    Arguments:
1225
        section_name (str): either "version", "well", "curves", or "params"
1226
        items (SectionItems): section items
1227
        version (float): either 1.2 or 2.0
1228
1229
    '''
1230
    section_widths = {
1231
        "left_width": None,
1232
        "middle_width": None
1233
    }
1234
    if len(items) > 0:
1235
        section_widths["left_width"] = max([len(i.original_mnemonic) for i in items])
1236
        middle_widths = []
1237
        for i in items:
1238
            order = order_func(i.mnemonic)
1239
            rhs_element = order.split(':')[0]
1240
            logger.debug('get_section_widths %s\n\torder=%s rhs_element=%s' % (i, order, rhs_element))
1241
            middle_widths.append(len(str(i.unit)) + 1 + len(str(i[rhs_element])))
1242
        section_widths['middle_width'] = max(middle_widths)
1243
    return section_widths
1244
1245
1246
def read(file_ref, **kwargs):
1247
    '''Read a LAS file.
1248
1249
    Note that only versions 1.2 and 2.0 of the LAS file specification
1250
    are currently supported.
1251
1252
    Arguments:
1253
        file_ref: either a filename, an open file object, or a string of
1254
            a LAS file contents.
1255
1256
    Keyword Arguments:
1257
        encoding (str): character encoding to open file_ref with
1258
        encoding_errors (str): "strict", "replace" (default), "ignore" - how to
1259
            handle errors with encodings (see standard library codecs module or
1260
            Python Unicode HOWTO for more information)
1261
        autodetect_encoding (bool): use chardet/ccharet to detect encoding
1262
        autodetect_encoding_chars (int/None): number of chars to read from LAS
1263
            file for auto-detection of encoding.
1264
1265
    Returns: 
1266
        A LASFile object representing the file -- see above
1267
1268
    '''
1269
    return LASFile(file_ref, **kwargs)
1270