Completed
Pull Request — master (#118)
by Kent
04:16
created

lasio.Reader.read_data()   F

Complexity

Conditions 12

Size

Total Lines 34

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 12
dl 0
loc 34
rs 2.7855

How to fix   Complexity   

Complexity

Complex classes like lasio.Reader.read_data() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
'''las.py - read Log ASCII Standard files
2
3
See README.rst and LICENSE for more information.
4
5
'''
6
from __future__ import print_function
7
8
# Standard library packages
9
import codecs
10
import json
11
import logging
12
import os
13
import re
14
import textwrap
15
import traceback
16
17
# The standard library OrderedDict was introduced in Python 2.7 so
18
# we have a third-party option to support Python 2.6
19
20
try:
21
    from collections import OrderedDict
22
except ImportError:
23
    from ordereddict import OrderedDict
24
25
# Convoluted import for StringIO in order to support:
26
#
27
# - Python 3 - io.StringIO
28
# - Python 2 (optimized) - cStringIO.StringIO
29
# - Python 2 (all) - StringIO.StringIO
30
31
try:
32
    import cStringIO as StringIO
33
except ImportError:
34
    try:  # cStringIO not available on this system
35
        import StringIO
36
    except ImportError:  # Python 3
37
        from io import StringIO
38
    else:
39
        from StringIO import StringIO
40
else:
41
    from StringIO import StringIO
42
43
# get basestring in py3
44
45
try:
46
    unicode = unicode
47
except NameError:
48
    # 'unicode' is undefined, must be Python 3
49
    str = str
50
    unicode = str
51
    bytes = bytes
52
    basestring = (str,bytes)
53
else:
54
    # 'unicode' exists, must be Python 2
55
    str = str
56
    unicode = unicode
57
    bytes = str
58
    basestring = basestring
59
60
# Required third-party packages available on PyPi:
61
62
from namedlist import namedlist
63
import numpy
64
65
# Optional third-party packages available on PyPI are mostly
66
# imported inline below.
67
68
69
logger = logging.getLogger(__name__)
70
71
__version__ = '0.10'
72
73
74
ORDER_DEFINITIONS = {
75
    1.2: OrderedDict([
76
        ("Version", ["value:descr"]),
77
        ("Well", [
78
            "descr:value",
79
            ("value:descr", ["STRT", "STOP", "STEP", "NULL"])]),
80
        ("Curves", ["value:descr"]),
81
        ("Parameter", ["value:descr"]),
82
        ]),
83
    2.0: OrderedDict([
84
        ("Version", ["value:descr"]),
85
        ("Well", ["value:descr"]),
86
        ("Curves", ["value:descr"]),
87
        ("Parameter", ["value:descr"])
88
        ])}
89
90
URL_REGEXP = re.compile(
91
    r'^(?:http|ftp)s?://'  # http:// or https://
92
    r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}'
93
    r'\.?|[A-Z0-9-]{2,}\.?)|'  # (cont.) domain...
94
    r'localhost|'  # localhost...
95
    r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'  # ...or ip
96
    r'(?::\d+)?'  # optional port
97
    r'(?:/?|[/?]\S+)$', re.IGNORECASE)
98
99
NULLS_COMMON_NUMERIC = [999.25, -999.25, 9999.25, -9999.25, 0, -999, 999, 9999, -9999, 2147483647, -2147483647, 32767, -32767]
100
101
NULLS_AGGRESSIVE_NUMERIC = [0]
102
103
# Expressions for use in re.sub
104
105
NULLS_COMMON_ALPHA = [
106
    r'(#N/A)[ ]', r'[ ](#N/A)',             # matches   #N/A
107
    r'(-?1\.#INF)[ ]', r'[ ](-?1\.#INF)',   # matches   1.#INF -1.#INF
108
    r'(-?1\.#IO)[ ]', r'[ ](-?1\.#IO)',     # matches   1.#IO  -1.#IO
109
    r'(-?1\.#IND)[ ]', r'[ ](-?1\.#IND)',   # matches   1.#IND -1.#IND
110
    ]
111
NULLS_AGGRESSIVE_ALPHA = [
112
    r'([^0-9.\-+]+)[ ]',      # matches - not a float (trailing space/newline)
113
    r'[ ]([^0-9.\-+]+)',      # matches - not a float (leading space/newline)
114
    ]                    
115
                        # Generally this would be a bad idea because these files
116
                        # ought to raise an exception and be manually fixed. 
117
                        # But - that's why this mode is called "aggressive".
118
119
120
121
class LASDataError(Exception):
122
123
    '''Error during reading of numerical data from LAS file.'''
124
    pass
125
126
127
class LASHeaderError(Exception):
128
129
    '''Error during reading of header data from LAS file.'''
130
    pass
131
132
133
class LASUnknownUnitError(Exception):
134
135
    '''Error of unknown unit in LAS file.'''
136
    pass
137
138
139
class HeaderItem(OrderedDict):
140
    def __init__(self, mnemonic, unit="", value="", descr=""):
141
        super(HeaderItem, self).__init__()
142
143
        # The original mnemonic needs to be stored for rewriting a new file.
144
        # it might be nothing - '' - or a duplicate e.g. two 'RHO' curves,
145
        # or unique - 'X11124' - or perhaps invalid??
146
147
        self.original_mnemonic = mnemonic
148
149
        # We also need to store a more useful mnemonic, which will be used
150
        # (technically not, but read on) for people to access the curve while
151
        # the LASFile object exists. For example, a curve which is unnamed
152
        # and has the mnemonic '' will be accessed via 'UNKNOWN'.
153
154
        if mnemonic.strip() == '':
155
            self.useful_mnemonic = 'UNKNOWN'
156
        else:
157
            self.useful_mnemonic = mnemonic
158
159
        # But note that we need to (later) check (repeatedly) for duplicate
160
        # mnemonics. Any duplicates will have ':1', ':2', ':3', etc., appended
161
        # to them. The result of this will be stored in the below variable,
162
        # which is what the user should actually see and use 99.5% of the time.
163
164
        self.mnemonic = self.useful_mnemonic
165
166
        self.unit = unit
167
        self.value = value
168
        self.descr = descr
169
170
    def __getitem__(self, key):
171
        if key == 'mnemonic':
172
            return self.mnemonic
173
        elif key == 'original_mnemonic':
174
            return self.original_mnemonic
175
        elif key == 'useful_mnemonic':
176
            return self.useful_mnemonic
177
        elif key == 'unit':
178
            return self.unit
179
        elif key == 'value':
180
            return self.value
181
        elif key == 'descr':
182
            return self.descr
183
        else:
184
            raise KeyError('CurveItem only has restricted items (not %s)' % key)
185
186
    def __repr__(self):
187
        return (
188
            "%s(mnemonic=%s, unit=%s, value=%s, "
189
            "descr=%s, original_mnemonic=%s)" % (
190
                self.__class__.__name__, self.mnemonic, self.unit, self.value, 
191
                self.descr, self.original_mnemonic))
192
193
    def _repr_pretty_(self, p, cycle):
194
        return p.text(self.__repr__())
195
196
197
class CurveItem(HeaderItem):
198
    def __init__(self, *args, **kwargs):
199
        self.data = numpy.ndarray([])
200
        super(CurveItem, self).__init__(*args, **kwargs)
201
202
    @property
203
    def API_code(self):
204
        return self.value
205
    
206
    def __repr__(self):
207
        return (
208
            "%s(mnemonic=%s, unit=%s, value=%s, "
209
            "descr=%s, original_mnemonic=%s, data.shape=%s)" % (
210
                self.__class__.__name__, self.mnemonic, self.unit, self.value, 
211
                self.descr, self.original_mnemonic, self.data.shape))
212
213
214
class SectionItems(list):
215
216
    def __contains__(self, testitem):
217
        '''Allows testing of a mnemonic or an actual item.'''
218
        for item in self:
219
            if testitem == item.mnemonic:
220
                return True 
221
            elif hasattr(testitem, 'mnemonic'):
222
                if testitem.mnemonic == item.mnemonic:
223
                    return True
224
            elif testitem is item:
225
                return True
226
        else:
227
            return False
228
229
    def keys(self):
230
        return [item.mnemonic for item in self]
231
232
    def values(self):
233
        return self
234
235
    def items(self):
236
        return [(item.mnemonic, item) for item in self]
237
238
    def iterkeys(self):
239
        return iter(self.keys())
240
241
    def itervalues(self):
242
        return iter(self)
243
244
    def iteritems(self):
245
        return iter(self.items())
246
247
    def __getitem__(self, key):
248
        for item in self:
249
            if item.mnemonic == key:
250
                return item
251
        if isinstance(key, int):
252
            return super(SectionItems, self).__getitem__(key)
253
        else:
254
            raise KeyError("%s not in %s" % (key, self.keys()))
255
256
    def __setitem__(self, key, newitem):
257
        if isinstance(newitem, HeaderItem):
258
            self.set_item(key, newitem)
259
        else:
260
            self.set_item_value(key, newitem)
261
262
    def __getattr__(self, key):
263
        if key in self:
264
            return self[key]
265
        else:
266
            super(SectionItems, self).__getattr__(key)
267
268
    def __setattr__(self, key, value):
269
        if key in self:
270
            self[key] = value
271
        else:
272
            super(SectionItems, self).__setattr__(key, value)
273
274
    def set_item(self, key, newitem):
275
        for i, item in enumerate(self):
276
            if key == item.mnemonic:
277
278
                # This is very important. We replace items where
279
                # 'mnemonic' is equal - i.e. we do not check useful_mnemonic
280
                # or original_mnemonic. Is this correct? Needs to thought
281
                # about and tested more carefully.
282
283
                logger.debug('SectionItems.__setitem__ Replaced %s item' % key)
284
                return super(SectionItems, self).__setitem__(i, newitem)  
285
        else:
286
            self.append(newitem)
287
288
    def set_item_value(self, key, value):
289
        self[key].value = value
290
291
    def append(self, newitem):
292
        '''Check to see if the item's mnemonic needs altering.'''
293
        logger.debug("SectionItems.append type=%s str=%s" % (type(newitem), newitem))
294
        super(SectionItems, self).append(newitem)
295
296
        # Check to fix the :n suffixes
297
        existing = [item.useful_mnemonic for item in self]
298
        locations = []
299
        for i, item in enumerate(self):
300
            if item.useful_mnemonic == newitem.mnemonic:
301
                locations.append(i)
302
        if len(locations) > 1:
303
            current_count = 1
304
            for i, loc in enumerate(locations):
305
                item = self[loc]
306
                # raise Exception("%s" % str(type(item)))
307
                item.mnemonic = item.useful_mnemonic + ":%d" % (i + 1)
308
309
    def dictview(self):
310
        return dict(zip(self.keys(), [i.value for i in self.values()]))
311
312
    # def __repr__(self):
313
    #     return (
314
    #         "{cls}({contents})".format(
315
    #             cls=self.__class__.__name__,
316
    #             contents=', '.join([str(item) for item in self])))
317
318
319
class JSONEncoder(json.JSONEncoder):
320
321
    def default(self, obj):
322
        if isinstance(obj, LASFile):
323
            d = {'metadata': {},
324
                 'data': {}}
325
            for name, section in obj.sections.items():
326
                if isinstance(section, basestring):
327
                    d['metadata'][name] = section
328
                else:
329
                    d['metadata'][name] = []
330
                    for item in section:
331
                        d['metadata'][name].append(dict(item))
332
            for curve in obj.curves:
333
                d['data'][curve.mnemonic] = list(curve.data)
334
            return d
335
336
337
338
DEFAULT_ITEMS = {
339
    "Version": SectionItems([
340
        HeaderItem("VERS", "", 2.0, "CWLS log ASCII Standard -VERSION 2.0"),
341
        HeaderItem("WRAP", "", "NO", "One line per depth step"),
342
        HeaderItem("DLM", "", "SPACE", "Column Data Section Delimiter"),
343
        ]),
344
    "Well": SectionItems([
345
        HeaderItem("STRT", "m", numpy.nan, "START DEPTH"),
346
        HeaderItem("STOP", "m", numpy.nan, "STOP DEPTH"),
347
        HeaderItem("STEP", "m", numpy.nan, "STEP"),
348
        HeaderItem("NULL", "", -9999.25, "NULL VALUE"),
349
        HeaderItem("COMP", "", "", "COMPANY"),
350
        HeaderItem("WELL", "", "", "WELL"),
351
        HeaderItem("FLD", "", "", "FIELD"),
352
        HeaderItem("LOC", "", "", "LOCATION"),
353
        HeaderItem("PROV", "", "", "PROVINCE"),
354
        HeaderItem("CNTY", "", "", "COUNTY"),
355
        HeaderItem("STAT", "", "", "STATE"),
356
        HeaderItem("CTRY", "", "", "COUNTRY"),
357
        HeaderItem("SRVC", "", "", "SERVICE COMPANY"),
358
        HeaderItem("DATE", "", "", "DATE"),
359
        HeaderItem("UWI", "", "", "UNIQUE WELL ID"),
360
        HeaderItem("API", "", "", "API NUMBER")
361
        ]),
362
    "Curves": SectionItems([]),
363
    "Parameter": SectionItems([]),
364
    "Other": "",
365
    "Data": numpy.zeros(shape=(0, 1)),
366
    }
367
368
369
370
class LASFile(object):
371
372
    '''LAS file object.
373
374
    Keyword Arguments:
375
        file_ref: either a filename, an open file object, or a string of
376
            a LAS file contents.
377
        encoding (str): character encoding to open file_ref with
378
        encoding_errors (str): "strict", "replace" (default), "ignore" - how to
379
            handle errors with encodings (see standard library codecs module or
380
            Python Unicode HOWTO for more information)
381
        autodetect_encoding (bool): use chardet/ccharet to detect encoding
382
        autodetect_encoding_chars (int/None): number of chars to read from LAS
383
            file for auto-detection of encoding.
384
385
    '''
386
    def __init__(self, file_ref=None, **kwargs):
387
388
        self._text = ''
389
        self._use_pandas = "auto"
390
        self.index_unit = None
391
        self.sections = {
392
            "Version": DEFAULT_ITEMS["Version"],
393
            "Well": DEFAULT_ITEMS["Well"],
394
            "Curves": DEFAULT_ITEMS["Curves"],
395
            "Parameter": DEFAULT_ITEMS["Parameter"],
396
            "Other": str(DEFAULT_ITEMS["Other"]),
397
            }
398
399
        if not (file_ref is None):
400
            self.read(file_ref, **kwargs)
401
402
    def read(self, file_ref, use_pandas="auto", null_policy='common', **kwargs):
403
        '''Read a LAS file.
404
405
        Arguments:
406
            file_ref: either a filename, an open file object, or a string of
407
                a LAS file contents.
408
409
        Keyword Arguments:
410
            use_pandas (str): bool or "auto" -- use pandas if available -- provide
411
                False option for faster loading where pandas functionality is not
412
                needed. "auto" becomes True if pandas is installed, and False if not.
413
            null_policy (str): either None, 'NULL', 'common' or 'aggressive' --
414
                see https://github.com/kinverarity1/lasio/issues/49#issuecomment-127980359
415
            encoding (str): character encoding to open file_ref with
416
            encoding_errors (str): "strict", "replace" (default), "ignore" - how to
417
                handle errors with encodings (see standard library codecs module or
418
                Python Unicode HOWTO for more information)
419
            autodetect_encoding (bool): use chardet/cchardet to detect encoding
420
            autodetect_encoding_chars (int/None): number of chars to read from LAS
421
                file for auto-detection of encoding.
422
423
        '''
424
        if not use_pandas is None:
425
            self._use_pandas = use_pandas
426
427
        f = open_file(file_ref, **kwargs)
428
429
        self._text = f.read()
430
        logger.debug("LASFile.read LAS content is type %s" % type(self._text))
431
432
        reader = Reader(self._text, version=1.2)
433
        self.sections["Version"] = reader.read_section('~V')
434
435
        # Set version
436
        try:
437
            # raise Exception("%s %s" % (type(self.version['VERS']), self.version["VERS"]))
438
            reader.version = self.version['VERS'].value
439
        except KeyError:
440
            raise KeyError("No key VERS in ~V section")
441
442
        # Validate version
443
        try:
444
            assert reader.version in (1.2, 2)
445
        except AssertionError:
446
            logger.warning("LAS spec version is %s -- neither 1.2 nor 2" %
447
                           reader.version)
448
            if reader.version < 2:
449
                reader.version = 1.2
450
            else:
451
                reader.version = 2
452
        reader.wrap = self.version['WRAP'].value == 'YES'
453
454
        self.sections["Well"] = reader.read_section('~W')
455
        self.sections["Curves"] = reader.read_section('~C')
456
        try:
457
            self.sections["Parameter"] = reader.read_section('~P')
458
        except LASHeaderError:
459
            logger.warning(traceback.format_exc().splitlines()[-1])
460
        self.sections["Other"] = reader.read_raw_text('~O')
461
462
        # Set null value
463
        reader.null = self.well['NULL'].value
464
465
        data = reader.read_data(len(self.curves), null_policy=null_policy)
466
467
        for i, c in enumerate(self.curves):
468
            d = data[:, i]
469
            c.data = d
470
471
        if (self.well["STRT"].unit.upper() == "M" and
472
                self.well["STOP"].unit.upper() == "M" and
473
                self.well["STEP"].unit.upper() == "M" and
474
                self.curves[0].unit.upper() == "M"):
475
            self.index_unit = "M"
476
        elif (self.well["STRT"].unit.upper() in ("F", "FT") and
477
              self.well["STOP"].unit.upper() in ("F", "FT") and
478
              self.well["STEP"].unit.upper() in ("F", "FT") and
479
              self.curves[0].unit.upper() in ("F", "FT")):
480
            self.index_unit = "FT"
481
482
        self.refresh()
483
484
    def refresh(self, use_pandas=None):
485
        '''Refresh curve names and indices.'''
486
        if not use_pandas is None:
487
            self._use_pandas = use_pandas
488
489
        # n = len(self.curves)
490
        # for i, curve in enumerate(self.curves):
491
        #     self[curve.mnemonic] = curve.data
492
        #     self[i] = curve.data
493
        #     self[i - n] = curve.data
494
495
        if not self._use_pandas is False:
496
            try:
497
                import pandas
498
            except ImportError:
499
                logger.info(
500
                    "pandas not installed - skipping LASFile.df creation")
501
                self._use_pandas = False
502
503
        if self._use_pandas:
504
            self.df = pandas.DataFrame(self.data, columns=self.keys())
505
            self.df.set_index(self.curves[0].mnemonic, inplace=True)
506
507
    @property
508
    def data(self):
509
        '''2D array of data from LAS file.'''
510
        return numpy.vstack([c.data for c in self.curves]).T
511
512
    def write(self, file_object, version=None, wrap=None,
513
              STRT=None, STOP=None, STEP=None, fmt="%10.5g"):
514
        '''Write to a file.
515
516
        Arguments:
517
            file_object: a file_like object opening for writing.
518
            version (float): either 1.2 or 2
519
            wrap (bool): True, False, or None (last uses WRAP item in version)
520
            STRT (float): optional override to automatic calculation using 
521
                the first index curve value.
522
            STOP (float): optional override to automatic calculation using 
523
                the last index curve value.
524
            STEP (float): optional override to automatic calculation using 
525
                the first step size in the index curve.
526
            fmt (str): format string for numerical data being written to data
527
                section.
528
529
        Examples:
530
531
            >>> with open("test_output.las", mode="w") as f:
532
            ...     lasfile_obj.write(f, 2.0)   # <-- this method
533
534
        '''
535
        if wrap is None:
536
            wrap = self.version["WRAP"] == "YES"
537
        elif wrap is True:
538
            self.version["WRAP"] = HeaderItem(
539
                "WRAP", "", "YES", "Multiple lines per depth step")
540
        elif wrap is False:
541
            self.version["WRAP"] = HeaderItem(
542
                "WRAP", "", "NO", "One line per depth step")
543
        lines = []
544
545
        assert version in (1.2, 2, None)
546
        if version is None:
547
            version = self.version["VERS"].value
548
        if version == 1.2:
549
            self.version["VERS"] = HeaderItem(
550
                "VERS", "", 1.2, "CWLS LOG ASCII STANDARD - VERSION 1.2")
551
        elif version == 2:
552
            self.version["VERS"] = HeaderItem(
553
                "VERS", "", 2.0, "CWLS log ASCII Standard -VERSION 2.0")
554
555
        if STRT is None:
556
            STRT = self.index[0]
557
        if STOP is None:
558
            STOP = self.index[-1]
559
        if STEP is None:
560
            STEP = self.index[1] - self.index[0]  # Faster than numpy.gradient
561
        self.well["STRT"].value = STRT
562
        self.well["STOP"].value = STOP
563
        self.well["STEP"].value = STEP
564
565
566
        # Check for any changes in the pandas dataframe and if there are,
567
        # create new curves so they are reflected in the output LAS file.
568
569
        if self._use_pandas:
570
            curve_names = lambda: [ci.mnemonic for ci in self.curves]
571
            for df_curve_name in list(self.df.columns.values):
572
                if not df_curve_name in curve_names():
573
                    self.add_curve(df_curve_name, self.df[df_curve_name])
574
        
575
        # Write each section.
576
577
        # ~Version
578
        logger.debug('LASFile.write Version section')
579
        lines.append("~Version ".ljust(60, "-"))
580
        order_func = get_section_order_function("Version", version)
581
        section_widths = get_section_widths("Version", self.version, version, order_func)
582
        for header_item in self.version.values():
583
            mnemonic = header_item.original_mnemonic
584
            # logger.debug("LASFile.write " + str(header_item))
585
            order = order_func(mnemonic)
586
            # logger.debug("LASFile.write order = %s" % (order, ))
587
            logger.debug('LASFile.write %s\norder=%s section_widths=%s' % (header_item, order, section_widths))
588
            formatter_func = get_formatter_function(order, **section_widths)
589
            line = formatter_func(header_item)
590
            lines.append(line)
591
592
        # ~Well
593
        logger.debug('LASFile.write Well section')
594
        lines.append("~Well ".ljust(60, "-"))
595
        order_func = get_section_order_function("Well", version)
596
        section_widths = get_section_widths("Well", self.well, version, order_func)
597
        # logger.debug('LASFile.write well section_widths=%s' % section_widths)
598
        for header_item in self.well.values():
599
            mnemonic = header_item.original_mnemonic
600
            order = order_func(mnemonic)
601
            logger.debug('LASFile.write %s\norder=%s section_widths=%s' % (header_item, order, section_widths))
602
            formatter_func = get_formatter_function(order, **section_widths)
603
            line = formatter_func(header_item)
604
            lines.append(line)
605
606
        # ~Curves
607
        logger.debug('LASFile.write Curves section')
608
        lines.append("~Curves ".ljust(60, "-"))
609
        order_func = get_section_order_function("Curves", version)
610
        section_widths = get_section_widths("Curves", self.curves, version, order_func)
611
        for header_item in self.curves:
612
            mnemonic = header_item.original_mnemonic
613
            order = order_func(mnemonic)
614
            formatter_func = get_formatter_function(order, **section_widths)
615
            line = formatter_func(header_item)
616
            lines.append(line)
617
618
        # ~Params
619
        lines.append("~Params ".ljust(60, "-"))
620
        order_func = get_section_order_function("Parameter", version)
621
        section_widths = get_section_widths("Parameter", self.params, version, order_func)
622
        for header_item in self.params.values():
623
            mnemonic = header_item.original_mnemonic
624
            order = order_func(mnemonic)
625
            formatter_func = get_formatter_function(order, **section_widths)
626
            line = formatter_func(header_item)
627
            lines.append(line)
628
629
        # ~Other
630
        lines.append("~Other ".ljust(60, "-"))
631
        lines += self.other.splitlines()
632
633
        lines.append("~ASCII ".ljust(60, "-"))
634
635
        file_object.write("\n".join(lines))
636
        file_object.write("\n")
637
638
        data_arr = numpy.column_stack([c.data for c in self.curves])
639
        nrows, ncols = data_arr.shape
640
641
        def format_data_section_line(n, fmt, l=10, spacer=" "):
642
            if numpy.isnan(n):
643
                return spacer + str(self.well["NULL"].value).rjust(l)
644
            else:
645
                return spacer + (fmt % n).rjust(l)
646
647
        twrapper = textwrap.TextWrapper(width=79)
648
        for i in range(nrows):
649
            depth_slice = ''
650
            for j in range(ncols):
651
                depth_slice += format_data_section_line(data_arr[i, j], fmt)
652
653
            if wrap:
654
                lines = twrapper.wrap(depth_slice)
655
                logger.debug("LASFile.write Wrapped %d lines out of %s" %
656
                             (len(lines), depth_slice))
657
            else:
658
                lines = [depth_slice]
659
660
            if self.version["VERS"].value == 1.2:
661
                for line in lines:
662
                    if len(line) > 255:
663
                        logger.warning("LASFile.write Data line > 256 chars: %s" % line)
664
665
            for line in lines:
666
                file_object.write(line + "\n")
667
668
    def get_curve(self, mnemonic):
669
        '''Return Curve object.
670
671
        Arguments:
672
            mnemonic (str): the name of the curve
673
674
        Returns: 
675
            A Curve object, not just the data array.
676
677
        '''
678
        for curve in self.curves:
679
            if curve.mnemonic == mnemonic:
680
                return curve
681
682
    # def __getattr__(self, key):
683
    #     # if hasattr(self, 'sections'):
684
    #     #     if key in self.sections['Curves']:
685
    #     #         return self[key]
686
    #     # else:
687
    #     #     raise AttributeError
688
    #     pass
689
690
    def __getitem__(self, key):
691
        if isinstance(key, int):
692
            return self.curves[key].data
693
        elif isinstance(key, str):
694
            if key in self.keys():
695
                return self.curves[key].data
696
        else:
697
            super(LASFile, self).__getitem__(key)
698
699
    # def __setattr__(self, key, value):
700
    #     assert NotImplementedError('not yet')
701
702
    def __setitem__(self, key, value):
703
        assert NotImplementedError('not yet')
704
705
    def keys(self):
706
        return [c.mnemonic for c in self.curves]
707
708
    def values(self):
709
        return [c.data for c in self.curves]
710
711
    def items(self):
712
        return [(c.mnemonic, c.data) for c in self.curves]
713
714
    def iterkeys(self):
715
        return iter(list(self.keys()))
716
717
    def itervalues(self):
718
        return iter(list(self.values()))
719
720
    def iteritems(self):
721
        return iter(list(self.items()))
722
723
    @property
724
    def version(self):
725
        return self.sections["Version"]
726
    
727
    @version.setter
728
    def version(self, section):
729
        self.sections["Version"] = section
730
731
    @property
732
    def well(self):
733
        return self.sections["Well"]
734
    
735
    @well.setter
736
    def well(self, section):
737
        self.sections["Well"] = section
738
739
    @property
740
    def curves(self):
741
        return self.sections["Curves"]
742
    
743
    @curves.setter
744
    def curves(self, section):
745
        self.sections["Curves"] = section
746
747
    @property
748
    def params(self):
749
        return self.sections["Parameter"]
750
    
751
    @params.setter
752
    def params(self, section):
753
        self.sections["Parameter"] = section
754
755
    @property
756
    def other(self):
757
        return self.sections["Other"]
758
    
759
    @other.setter
760
    def other(self, section):
761
        self.sections["Other"] = section
762
    
763
764
    @property
765
    def metadata(self):
766
        s = SectionItems()
767
        for section in self.sections:
768
            for item in section:
769
                s.append(item)
770
        return s
771
772
    @metadata.setter
773
    def metadata(self, value):
774
        raise Warning('Set values in the version/well/params attrs directly')
775
776
    @property
777
    def df(self):
778
        if self._use_pandas:
779
            return self._df
780
        else:
781
            logger.warning(
782
                "pandas is not installed or use_pandas was set to False")
783
            # raise Warning("pandas is not installed or use_pandas was set to False")
784
785
    @df.setter
786
    def df(self, value):
787
        self._df = value
788
789
    @property
790
    def index(self):
791
        return self.data[:, 0]
792
793
    @property
794
    def depth_m(self):
795
        if self.index_unit == "M":
796
            return self.index
797
        elif self.index_unit == "FT":
798
            return self.index * 0.3048
799
        else:
800
            raise LASUnknownUnitError("Unit of depth index not known")
801
802
    @property
803
    def depth_ft(self):
804
        if self.index_unit == "M":
805
            return self.index / 0.3048
806
        elif self.index_unit == "FT":
807
            return self.index
808
        else:
809
            raise LASUnknownUnitError("Unit of depth index not known")
810
811
    def add_curve(self, mnemonic, data, unit="", descr="", value=""):
812
        # assert not mnemonic in self.curves
813
        curve = CurveItem(mnemonic, unit, value, descr)
814
        curve.data = data
815
        self.curves[mnemonic] = curve
816
        self.refresh()
817
818
    @property
819
    def header(self):
820
        return self.sections
821
822
823
class Las(LASFile):
824
825
    '''LAS file object.
826
827
    Retained for backwards compatibility.
828
829
    '''
830
    pass
831
832
833
class Reader(object):
834
835
    def __init__(self, text, version):
836
        self.lines = text.splitlines()
837
        self.version = version
838
        self.null = numpy.nan
839
        self.wrap = True
840
841
    @property
842
    def section_names(self):
843
        names = []
844
        for line in self.lines:
845
            line = line.strip().strip('\t').strip()
846
            if not line or line.startswith('#'):
847
                continue
848
            if line.startswith('~'):
849
                names.append(line)
850
        return names
851
852
    def iter_section_lines(self, section_name, ignore_comments=True):
853
        in_section = False
854
        for i, line in enumerate(self.lines):
855
            line = line.strip().strip('\t').strip()
856
            if not line:
857
                continue
858
            if ignore_comments and line.startswith('#'):
859
                continue
860
            if line.startswith(section_name):
861
                if in_section:
862
                    return
863
                else:
864
                    in_section = True
865
                    continue
866
            if line.lower().startswith('~') and in_section:
867
                # Start of the next section; we're done here.
868
                break
869
            if in_section:
870
                yield line
871
872
    def read_raw_text(self, section_name):
873
        return '\n'.join(self.iter_section_lines(section_name,
874
                                                 ignore_comments=False))
875
876
    def read_section(self, section_name):
877
        parser = SectionParser(section_name, version=self.version)
878
        section = SectionItems()
879
        for line in self.iter_section_lines(section_name):
880
            try:
881
                values = read_line(line)
882
            except:
883
                raise LASHeaderError("Failed in %s section on line:\n%s%s" % (
884
                    section_name, line,
885
                    traceback.format_exc().splitlines()[-1]))
886
            else:
887
                section.append(parser(**values))
888
        return section
889
890
    def read_data(self, number_of_curves=None, null_policy='common'):
891
        s = self.read_data_string(null_policy=null_policy)
892
        if not self.wrap:
893
            try:
894
                arr = numpy.loadtxt(StringIO(s))
895
            except:
896
                raise LASDataError("Failed to read data:\n%s" % (
897
                                   traceback.format_exc().splitlines()[-1]))
898
        else:
899
            eol_chars = r"[\n\t\r]"
900
            s = re.sub(eol_chars, " ", s)
901
            try:
902
                arr = numpy.loadtxt(StringIO(s))
903
            except:
904
                raise LASDataError("Failed to read wrapped data: %s" % (
905
                                   traceback.format_exc().splitlines()[-1]))
906
            logger.debug('Reader.read_data arr shape = %s' % (arr.shape))
907
            logger.debug('Reader.read_data number of curves = %s' % number_of_curves)
908
            arr = numpy.reshape(arr, (-1, number_of_curves))
909
        if not arr.shape or (arr.ndim == 1 and arr.shape[0] == 0):
910
            logger.warning('Reader.read_dataN o data present.')
911
            return None, None
912
        else:
913
            logger.info('LAS file shape = %s' % str(arr.shape))
914
        logger.debug('checking for nulls (NULL = %s)' % self.null)
915
        if null_policy in ['NULL', 'common', 'aggressive']:
916
            arr[arr == self.null] = numpy.nan
917
        if null_policy in ['common', 'aggressive']:
918
            for value in NULLS_COMMON_NUMERIC:
919
                arr[arr == value] = numpy.nan
920
        if null_policy in ['aggressive']:
921
            for value in NULLS_AGGRESSIVE_NUMERIC:
922
                arr[arr == value] = numpy.nan
923
        return arr
924
925
    def read_data_string(self, null_policy):
926
        start_data = None
927
        for i, line in enumerate(self.lines):
928
            line = line.strip().strip('\t').strip()
929
            if line.startswith('~A'):
930
                start_data = i + 1
931
                break
932
        s = '\n'.join(self.lines[start_data:])
933
        s = re.sub(r'(\d)-(\d)', r'\1 -\2', s)
934
        s = re.sub('-?\d*\.\d*\.\d*', ' NaN NaN ', s)
935
        s = re.sub('NaN.\d*', ' NaN NaN ', s)
936
937
        if null_policy in ['common', 'aggressive']:
938
            for pattern in NULLS_COMMON_ALPHA:
939
                s = re.sub(pattern, null_alpha_repl, s)
940
        if null_policy in ['aggressive']:
941
            for pattern in NULLS_AGGRESSIVE_ALPHA:
942
                s = re.sub(pattern, null_alpha_repl, s)
943
        return s
944
945
def null_alpha_repl(match):
946
    if match.re.pattern.startswith('[ ]'):
947
        # return ' ' + 'NaN'.rjust(len(match.group(1)))
948
        return ' NaN '
949
    elif match.re.pattern.endswith('[ ]'):
950
        # n = len(match.group(1))
951
        return ' NaN '
952
953
954
class SectionParser(object):
955
956
    def __init__(self, section_name, version=1.2):
957
        if section_name.startswith('~C'):
958
            self.func = self.curves
959
        elif section_name.startswith('~P'):
960
            self.func = self.params
961
        else:
962
            self.func = self.metadata
963
964
        self.version = version
965
        self.section_name = section_name
966
        self.section_name2 = {"~C": "Curves",
967
                              "~W": "Well",
968
                              "~V": "Version",
969
                              "~P": "Parameter"}[section_name]
970
971
        section_orders = ORDER_DEFINITIONS[self.version][self.section_name2]
972
        self.default_order = section_orders[0]
973
        self.orders = {}
974
        for order, mnemonics in section_orders[1:]:
975
            for mnemonic in mnemonics:
976
                self.orders[mnemonic] = order
977
978
    def __call__(self, **keys):
979
        item = self.func(**keys)
980
        # if item.name == "":
981
        #     item.mnemonic = "UNKNOWN"
982
        return item
983
984
    def num(self, x, default=None):
985
        if default is None:
986
            default = x
987
        try:
988
            return numpy.int(x)
989
        except:
990
            try:
991
                return numpy.float(x)
992
            except:
993
                return default
994
995
    def metadata(self, **keys):
996
        key_order = self.orders.get(keys["name"], self.default_order)
997
        if key_order == "value:descr":
998
            return HeaderItem(
999
                keys["name"],                 # mnemonic
1000
                keys["unit"],                 # unit
1001
                self.num(keys["value"]),      # value
1002
                keys["descr"],                # descr
1003
                )
1004
        elif key_order == "descr:value":
1005
            return HeaderItem(
1006
                keys["name"],                   # mnemonic
1007
                keys["unit"],                   # unit
1008
                keys["descr"],                  # descr
1009
                self.num(keys["value"]),        # value
1010
                )
1011
1012
    def curves(self, **keys):
1013
        # logger.debug(str(keys))
1014
        item = CurveItem(
1015
            keys['name'],               # mnemonic
1016
            keys['unit'],               # unit
1017
            keys['value'],              # value
1018
            keys['descr'],              # descr
1019
            )
1020
        return item
1021
1022
    def params(self, **keys):
1023
        return HeaderItem(
1024
            keys['name'],               # mnemonic
1025
            keys['unit'],               # unit
1026
            self.num(keys['value']),    # value
1027
            keys['descr'],              # descr
1028
            )
1029
1030
1031
def read_line(line, pattern=None):
1032
    '''Read a line from a LAS header section.
1033
1034
    The line is parsed with a regular expression -- see LAS file specs for
1035
    more details, but it should basically be in the format::
1036
1037
        name.unit       value : descr
1038
1039
    Arguments:
1040
        line (str): line from a LAS header section
1041
1042
    Returns:
1043
        A dictionary with keys "name", "unit", "value", and "descr", each
1044
        containing a string as value.
1045
1046
    '''
1047
    d = {}
1048
    if pattern is None:
1049
        pattern = (r"\.?(?P<name>[^.]*)\." +
1050
                   r"(?P<unit>[^\s:]*)" +
1051
                   r"(?P<value>[^:]*):" +
1052
                   r"(?P<descr>.*)")
1053
    m = re.match(pattern, line)
1054
    mdict = m.groupdict()
1055
    # if mdict["name"] == "":
1056
    #     mdict["name"] = "UNKNOWN"
1057
    for key, value in mdict.items():
1058
        d[key] = value.strip()
1059
        if key == "unit":
1060
            if d[key].endswith("."):
1061
                d[key] = d[key].strip(".")  # see issue #36
1062
    return d
1063
1064
1065
def open_file(file_ref, encoding=None, encoding_errors="replace",
1066
              autodetect_encoding=False, autodetect_encoding_chars=40e3):
1067
    '''Open a file if necessary.
1068
1069
    If autodetect_encoding is True then either cchardet or chardet (see PyPi)
1070
    needs to be installed, or else an ImportError will be raised.
1071
1072
    Arguments:
1073
        file_ref: either a filename, an open file object, a URL, or a string of
1074
            a LAS file contents.
1075
1076
    Keyword Arguments:
1077
        encoding (str): character encoding to open file_ref with
1078
        encoding_errors (str): "strict", "replace" (default), "ignore" - how to
1079
            handle errors with encodings (see standard library codecs module or
1080
            Python Unicode HOWTO for more information)
1081
        autodetect_encoding (bool): use chardet/ccharet to detect encoding
1082
        autodetect_encoding_chars (int/None): number of chars to read from LAS
1083
            file for auto-detection of encoding.
1084
1085
    Returns: 
1086
        An open file-like object ready for reading from.
1087
1088
    '''
1089
    if isinstance(file_ref, str):
1090
        lines = file_ref.splitlines()
1091
        if len(lines) == 1:  # File name
1092
            if URL_REGEXP.match(file_ref):
1093
                try:
1094
                    import urllib2
1095
                    file_ref = urllib2.urlopen(file_ref)
1096
                except ImportError:
1097
                    import urllib.request
1098
                    response = urllib.request.urlopen(file_ref)
1099
                    enc = response.headers.get_content_charset("utf-8")
1100
                    file_ref = StringIO(response.read().decode(enc))
1101
            else:  # filename
1102
                data = get_unicode_from_filename(
1103
                    file_ref, encoding, encoding_errors, autodetect_encoding,
1104
                    autodetect_encoding_chars)
1105
                file_ref = StringIO(data)
1106
        else:
1107
            file_ref = StringIO("\n".join(lines))
1108
    return file_ref
1109
1110
1111
def get_unicode_from_filename(fn, enc, errors, auto, nbytes):
1112
    '''
1113
    Read Unicode data from file.
1114
1115
    Arguments:
1116
        fn (str): path to file
1117
        enc (str): encoding - can be None
1118
        errors (str): unicode error handling - can be "strict", "ignore", "replace"
1119
        auto (str): auto-detection of character encoding - can be either
1120
            "chardet", "cchardet", or True
1121
        nbytes (int): number of characters for read for auto-detection
1122
1123
    Returns:
1124
        a unicode or string object
1125
1126
    '''
1127
    if nbytes:
1128
        nbytes = int(nbytes)
1129
1130
    # Detect BOM in UTF-8 files
1131
1132
    nbytes_test = min(32, os.path.getsize(fn))
1133
    with open(fn, mode="rb") as test:
1134
        raw = test.read(nbytes_test)
1135
    if raw.startswith(codecs.BOM_UTF8):
1136
        enc = "utf-8-sig"
1137
        auto = False
1138
1139
    if auto:
1140
        with open(fn, mode="rb") as test:
1141
            if nbytes is None:
1142
                raw = test.read()
1143
            else:
1144
                raw = test.read(nbytes)
1145
        enc = get_encoding(auto, raw)
1146
1147
    # codecs.open is smarter than cchardet or chardet IME.
1148
1149
    with codecs.open(fn, mode="r", encoding=enc, errors=errors) as f:
1150
        data = f.read()
1151
1152
    return data
1153
1154
1155
def get_encoding(auto, raw):
1156
    '''
1157
    Automatically detect character encoding.
1158
1159
    Arguments:
1160
        auto (str): auto-detection of character encoding - can be either
1161
            "chardet", "cchardet", or True
1162
        raw (bytes): array of bytes to detect from
1163
1164
    Returns:
1165
        A string specifying the character encoding.
1166
1167
    '''
1168
    if auto is True:
1169
        try:
1170
            import cchardet as chardet
1171
        except ImportError:
1172
            try:
1173
                import chardet
1174
            except ImportError:
1175
                raise ImportError(
1176
                    "chardet or cchardet is required for automatic"
1177
                    " detection of character encodings.")
1178
            else:
1179
                logger.debug("get_encoding Using chardet")
1180
                method = "chardet"
1181
        else:
1182
            logger.debug("get_encoding Using cchardet")
1183
            method = "cchardet"
1184
    elif auto.lower() == "chardet":
1185
        import chardet
1186
        logger.debug("get_encoding Using chardet")
1187
        method = "chardet"
1188
    elif auto.lower() == "cchardet":
1189
        import cchardet as chardet
1190
        logger.debug("get_encoding Using cchardet")
1191
        method = "cchardet"
1192
1193
    result = chardet.detect(raw)
1194
    logger.debug("get_encoding %s results=%s" % (method, result))
1195
    return result["encoding"]
1196
1197
1198
def get_formatter_function(order, left_width=None, middle_width=None):
1199
    '''Create function to format a LAS header item.
1200
1201
    Arguments:
1202
        order: format of item, either "descr:value" or "value:descr" -- see
1203
            LAS 1.2 and 2.0 specifications for more information.
1204
1205
    Keyword Arguments:
1206
        left_width (int): number of characters to the left hand side of the
1207
            first period
1208
        middle_width (int): total number of characters minus 1 between the
1209
            first period from the left and the first colon from the left.
1210
1211
    Returns:
1212
        A function which takes a header item (e.g. LASHeaderItem or Curve)
1213
        as its single argument and which in turn returns a string which is
1214
        the correctly formatted LAS header line.
1215
1216
    '''
1217
    if left_width is None:
1218
        left_width = 10
1219
    if middle_width is None:
1220
        middle_width = 40
1221
    mnemonic_func = lambda mnemonic: mnemonic.ljust(left_width)
1222
    middle_func = lambda unit, right_hand_item: (
1223
        unit
1224
        + " " * (middle_width - len(str(unit)) - len(right_hand_item))
1225
        + right_hand_item
1226
    )
1227
    if order == "descr:value":
1228
        return lambda item: "%s.%s : %s" % (
1229
            mnemonic_func(item.original_mnemonic),
1230
            middle_func(str(item.unit), str(item.descr)),
1231
            item.value
1232
        )
1233
    elif order == "value:descr":
1234
        return lambda item: "%s.%s : %s" % (
1235
            mnemonic_func(item.original_mnemonic),
1236
            middle_func(str(item.unit), str(item.value)),
1237
            item.descr
1238
        )
1239
1240
1241
def get_section_order_function(section, version,
1242
                               order_definitions=ORDER_DEFINITIONS):
1243
    '''Get a function that returns the order per mnemonic and section.
1244
1245
    Arguments:
1246
        section (str): either "well", "params", "curves", "version"
1247
        version (float): either 1.2 and 2.0
1248
1249
    Keyword Arguments:
1250
        order_definitions (dict): ...
1251
1252
    Returns:
1253
        A function which takes a mnemonic (str) as its only argument, and 
1254
        in turn returns the order "value:descr" or "descr:value".
1255
1256
    '''
1257
    section_orders = order_definitions[version][section]
1258
    default_order = section_orders[0]
1259
    orders = {}
1260
    for order, mnemonics in section_orders[1:]:
1261
        for mnemonic in mnemonics:
1262
            orders[mnemonic] = order
1263
    return lambda mnemonic: orders.get(mnemonic, default_order)
1264
1265
1266
def get_section_widths(section_name, items, version, order_func, middle_padding=5):
1267
    '''Find minimum section widths fitting the content in *items*.
1268
1269
    Arguments:
1270
        section_name (str): either "version", "well", "curves", or "params"
1271
        items (SectionItems): section items
1272
        version (float): either 1.2 or 2.0
1273
1274
    '''
1275
    section_widths = {
1276
        "left_width": None,
1277
        "middle_width": None
1278
    }
1279
    if len(items) > 0:
1280
        section_widths["left_width"] = max([len(i.original_mnemonic) for i in items])
1281
        middle_widths = []
1282
        for i in items:
1283
            order = order_func(i.mnemonic)
1284
            rhs_element = order.split(':')[0]
1285
            logger.debug('get_section_widths %s\n\torder=%s rhs_element=%s' % (i, order, rhs_element))
1286
            middle_widths.append(len(str(i.unit)) + 1 + len(str(i[rhs_element])))
1287
        section_widths['middle_width'] = max(middle_widths)
1288
    return section_widths
1289
1290
1291
def read(file_ref, **kwargs):
1292
    '''Read a LAS file.
1293
1294
    Note that only versions 1.2 and 2.0 of the LAS file specification
1295
    are currently supported.
1296
1297
    Arguments:
1298
        file_ref: either a filename, an open file object, or a string of
1299
            a LAS file contents.
1300
1301
    Keyword Arguments:
1302
        encoding (str): character encoding to open file_ref with
1303
        encoding_errors (str): "strict", "replace" (default), "ignore" - how to
1304
            handle errors with encodings (see standard library codecs module or
1305
            Python Unicode HOWTO for more information)
1306
        autodetect_encoding (bool): use chardet/ccharet to detect encoding
1307
        autodetect_encoding_chars (int/None): number of chars to read from LAS
1308
            file for auto-detection of encoding.
1309
1310
    Returns: 
1311
        A LASFile object representing the file -- see above
1312
1313
    '''
1314
    return LASFile(file_ref, **kwargs)
1315