|
1
|
|
|
'''las.py - read Log ASCII Standard files |
|
2
|
|
|
|
|
3
|
|
|
See README.rst and LICENSE for more information. |
|
4
|
|
|
|
|
5
|
|
|
''' |
|
6
|
|
|
from __future__ import print_function |
|
7
|
|
|
|
|
8
|
|
|
# Standard library packages |
|
9
|
|
|
import codecs |
|
10
|
|
|
import json |
|
11
|
|
|
import logging |
|
12
|
|
|
import os |
|
13
|
|
|
import re |
|
14
|
|
|
import textwrap |
|
15
|
|
|
import traceback |
|
16
|
|
|
|
|
17
|
|
|
# The standard library OrderedDict was introduced in Python 2.7 so |
|
18
|
|
|
# we have a third-party option to support Python 2.6 |
|
19
|
|
|
|
|
20
|
|
|
try: |
|
21
|
|
|
from collections import OrderedDict |
|
22
|
|
|
except ImportError: |
|
23
|
|
|
from ordereddict import OrderedDict |
|
24
|
|
|
|
|
25
|
|
|
# Convoluted import for StringIO in order to support: |
|
26
|
|
|
# |
|
27
|
|
|
# - Python 3 - io.StringIO |
|
28
|
|
|
# - Python 2 (optimized) - cStringIO.StringIO |
|
29
|
|
|
# - Python 2 (all) - StringIO.StringIO |
|
30
|
|
|
|
|
31
|
|
|
try: |
|
32
|
|
|
import cStringIO as StringIO |
|
33
|
|
|
except ImportError: |
|
34
|
|
|
try: # cStringIO not available on this system |
|
35
|
|
|
import StringIO |
|
36
|
|
|
except ImportError: # Python 3 |
|
37
|
|
|
from io import StringIO |
|
38
|
|
|
else: |
|
39
|
|
|
from StringIO import StringIO |
|
40
|
|
|
else: |
|
41
|
|
|
from StringIO import StringIO |
|
42
|
|
|
|
|
43
|
|
|
# get basestring in py3 |
|
44
|
|
|
|
|
45
|
|
|
try: |
|
46
|
|
|
unicode = unicode |
|
47
|
|
|
except NameError: |
|
48
|
|
|
# 'unicode' is undefined, must be Python 3 |
|
49
|
|
|
str = str |
|
50
|
|
|
unicode = str |
|
51
|
|
|
bytes = bytes |
|
52
|
|
|
basestring = (str,bytes) |
|
53
|
|
|
else: |
|
54
|
|
|
# 'unicode' exists, must be Python 2 |
|
55
|
|
|
str = str |
|
56
|
|
|
unicode = unicode |
|
57
|
|
|
bytes = str |
|
58
|
|
|
basestring = basestring |
|
59
|
|
|
|
|
60
|
|
|
# Required third-party packages available on PyPi: |
|
61
|
|
|
|
|
62
|
|
|
from namedlist import namedlist |
|
63
|
|
|
import numpy |
|
64
|
|
|
|
|
65
|
|
|
# Optional third-party packages available on PyPI are mostly |
|
66
|
|
|
# imported inline below. |
|
67
|
|
|
|
|
68
|
|
|
|
|
69
|
|
|
logger = logging.getLogger(__name__) |
|
70
|
|
|
__version__ = "0.9.1" |
|
71
|
|
|
|
|
72
|
|
|
|
|
73
|
|
|
class LASDataError(Exception): |
|
74
|
|
|
|
|
75
|
|
|
'''Error during reading of numerical data from LAS file.''' |
|
76
|
|
|
pass |
|
77
|
|
|
|
|
78
|
|
|
|
|
79
|
|
|
class LASHeaderError(Exception): |
|
80
|
|
|
|
|
81
|
|
|
'''Error during reading of header data from LAS file.''' |
|
82
|
|
|
pass |
|
83
|
|
|
|
|
84
|
|
|
|
|
85
|
|
|
class LASUnknownUnitError(Exception): |
|
86
|
|
|
|
|
87
|
|
|
'''Error of unknown unit in LAS file.''' |
|
88
|
|
|
pass |
|
89
|
|
|
|
|
90
|
|
|
|
|
91
|
|
|
class HeaderItem(OrderedDict): |
|
92
|
|
|
def __init__(self, mnemonic, unit="", value="", descr=""): |
|
93
|
|
|
super(HeaderItem, self).__init__() |
|
94
|
|
|
|
|
95
|
|
|
# The original mnemonic needs to be stored for rewriting a new file. |
|
96
|
|
|
# it might be nothing - '' - or a duplicate e.g. two 'RHO' curves, |
|
97
|
|
|
# or unique - 'X11124' - or perhaps invalid?? |
|
98
|
|
|
|
|
99
|
|
|
self.original_mnemonic = mnemonic |
|
100
|
|
|
|
|
101
|
|
|
# We also need to store a more useful mnemonic, which will be used |
|
102
|
|
|
# (technically not, but read on) for people to access the curve while |
|
103
|
|
|
# the LASFile object exists. For example, a curve which is unnamed |
|
104
|
|
|
# and has the mnemonic '' will be accessed via 'UNKNOWN'. |
|
105
|
|
|
|
|
106
|
|
|
if mnemonic.strip() == '': |
|
107
|
|
|
self.useful_mnemonic = 'UNKNOWN' |
|
108
|
|
|
else: |
|
109
|
|
|
self.useful_mnemonic = mnemonic |
|
110
|
|
|
|
|
111
|
|
|
# But note that we need to (later) check (repeatedly) for duplicate |
|
112
|
|
|
# mnemonics. Any duplicates will have ':1', ':2', ':3', etc., appended |
|
113
|
|
|
# to them. The result of this will be stored in the below variable, |
|
114
|
|
|
# which is what the user should actually see and use 99.5% of the time. |
|
115
|
|
|
|
|
116
|
|
|
self.mnemonic = self.useful_mnemonic |
|
117
|
|
|
|
|
118
|
|
|
self.unit = unit |
|
119
|
|
|
self.value = value |
|
120
|
|
|
self.descr = descr |
|
121
|
|
|
|
|
122
|
|
|
def __getitem__(self, key): |
|
123
|
|
|
if key == 'mnemonic': |
|
124
|
|
|
return self.mnemonic |
|
125
|
|
|
elif key == 'original_mnemonic': |
|
126
|
|
|
return self.original_mnemonic |
|
127
|
|
|
elif key == 'useful_mnemonic': |
|
128
|
|
|
return self.useful_mnemonic |
|
129
|
|
|
elif key == 'unit': |
|
130
|
|
|
return self.unit |
|
131
|
|
|
elif key == 'value': |
|
132
|
|
|
return self.value |
|
133
|
|
|
elif key == 'descr': |
|
134
|
|
|
return self.descr |
|
135
|
|
|
else: |
|
136
|
|
|
raise KeyError('CurveItem only has restricted items (not %s)' % key) |
|
137
|
|
|
|
|
138
|
|
|
def __repr__(self): |
|
139
|
|
|
return ( |
|
140
|
|
|
"%s(mnemonic=%s, unit=%s, value=%s, " |
|
141
|
|
|
"descr=%s, original_mnemonic=%s)" % ( |
|
142
|
|
|
self.__class__.__name__, self.mnemonic, self.unit, self.value, |
|
143
|
|
|
self.descr, self.original_mnemonic)) |
|
144
|
|
|
|
|
145
|
|
|
def _repr_pretty_(self, p, cycle): |
|
146
|
|
|
return p.text(self.__repr__()) |
|
147
|
|
|
|
|
148
|
|
|
|
|
149
|
|
|
class CurveItem(HeaderItem): |
|
150
|
|
|
def __init__(self, *args, **kwargs): |
|
151
|
|
|
self.data = numpy.ndarray([]) |
|
152
|
|
|
super(CurveItem, self).__init__(*args, **kwargs) |
|
153
|
|
|
|
|
154
|
|
|
@property |
|
155
|
|
|
def API_code(self): |
|
156
|
|
|
return self.value |
|
157
|
|
|
|
|
158
|
|
|
def __repr__(self): |
|
159
|
|
|
return ( |
|
160
|
|
|
"%s(mnemonic=%s, unit=%s, value=%s, " |
|
161
|
|
|
"descr=%s, original_mnemonic=%s, data.shape=%s)" % ( |
|
162
|
|
|
self.__class__.__name__, self.mnemonic, self.unit, self.value, |
|
163
|
|
|
self.descr, self.original_mnemonic, self.data.shape)) |
|
164
|
|
|
|
|
165
|
|
|
|
|
166
|
|
|
class SectionItems(list): |
|
167
|
|
|
|
|
168
|
|
|
def __contains__(self, testitem): |
|
169
|
|
|
'''Allows testing of a mnemonic or an actual item.''' |
|
170
|
|
|
for item in self: |
|
171
|
|
|
if testitem == item.mnemonic: |
|
172
|
|
|
return True |
|
173
|
|
|
elif hasattr(testitem, 'mnemonic'): |
|
174
|
|
|
if testitem.mnemonic == item.mnemonic: |
|
175
|
|
|
return True |
|
176
|
|
|
elif testitem is item: |
|
177
|
|
|
return True |
|
178
|
|
|
else: |
|
179
|
|
|
return False |
|
180
|
|
|
|
|
181
|
|
|
def keys(self): |
|
182
|
|
|
return [item.mnemonic for item in self] |
|
183
|
|
|
|
|
184
|
|
|
def values(self): |
|
185
|
|
|
return self |
|
186
|
|
|
|
|
187
|
|
|
def items(self): |
|
188
|
|
|
return [(item.mnemonic, item) for item in self] |
|
189
|
|
|
|
|
190
|
|
|
def iterkeys(self): |
|
191
|
|
|
return iter(self.keys()) |
|
192
|
|
|
|
|
193
|
|
|
def itervalues(self): |
|
194
|
|
|
return iter(self) |
|
195
|
|
|
|
|
196
|
|
|
def iteritems(self): |
|
197
|
|
|
return iter(self.items()) |
|
198
|
|
|
|
|
199
|
|
|
def __getitem__(self, key): |
|
200
|
|
|
for item in self: |
|
201
|
|
|
if item.mnemonic == key: |
|
202
|
|
|
return item |
|
203
|
|
|
if isinstance(key, int): |
|
204
|
|
|
return super(SectionItems, self).__getitem__(key) |
|
205
|
|
|
else: |
|
206
|
|
|
raise KeyError("%s not in %s" % (key, self.keys())) |
|
207
|
|
|
|
|
208
|
|
|
def __setitem__(self, key, newitem): |
|
209
|
|
|
if isinstance(newitem, HeaderItem): |
|
210
|
|
|
self.set_item(key, newitem) |
|
211
|
|
|
else: |
|
212
|
|
|
self.set_item_value(key, newitem) |
|
213
|
|
|
|
|
214
|
|
|
def __getattr__(self, key): |
|
215
|
|
|
if key in self: |
|
216
|
|
|
return self[key] |
|
217
|
|
|
else: |
|
218
|
|
|
super(SectionItems, self).__getattr__(key) |
|
219
|
|
|
|
|
220
|
|
|
def __setattr__(self, key, value): |
|
221
|
|
|
if key in self: |
|
222
|
|
|
self[key] = value |
|
223
|
|
|
else: |
|
224
|
|
|
super(SectionItems, self).__setattr__(key, value) |
|
225
|
|
|
|
|
226
|
|
|
def set_item(self, key, newitem): |
|
227
|
|
|
for i, item in enumerate(self): |
|
228
|
|
|
if key == item.mnemonic: |
|
229
|
|
|
|
|
230
|
|
|
# This is very important. We replace items where |
|
231
|
|
|
# 'mnemonic' is equal - i.e. we do not check useful_mnemonic |
|
232
|
|
|
# or original_mnemonic. Is this correct? Needs to thought |
|
233
|
|
|
# about and tested more carefully. |
|
234
|
|
|
|
|
235
|
|
|
logger.debug('SectionItems.__setitem__ Replaced %s item' % key) |
|
236
|
|
|
return super(SectionItems, self).__setitem__(i, newitem) |
|
237
|
|
|
else: |
|
238
|
|
|
self.append(newitem) |
|
239
|
|
|
|
|
240
|
|
|
def set_item_value(self, key, value): |
|
241
|
|
|
self[key].value = value |
|
242
|
|
|
|
|
243
|
|
|
def append(self, newitem): |
|
244
|
|
|
'''Check to see if the item's mnemonic needs altering.''' |
|
245
|
|
|
logger.debug("SectionItems.append type=%s str=%s" % (type(newitem), newitem)) |
|
246
|
|
|
super(SectionItems, self).append(newitem) |
|
247
|
|
|
|
|
248
|
|
|
# Check to fix the :n suffixes |
|
249
|
|
|
existing = [item.useful_mnemonic for item in self] |
|
250
|
|
|
locations = [] |
|
251
|
|
|
for i, item in enumerate(self): |
|
252
|
|
|
if item.useful_mnemonic == newitem.mnemonic: |
|
253
|
|
|
locations.append(i) |
|
254
|
|
|
if len(locations) > 1: |
|
255
|
|
|
current_count = 1 |
|
256
|
|
|
for i, loc in enumerate(locations): |
|
257
|
|
|
item = self[loc] |
|
258
|
|
|
# raise Exception("%s" % str(type(item))) |
|
259
|
|
|
item.mnemonic = item.useful_mnemonic + ":%d" % (i + 1) |
|
260
|
|
|
|
|
261
|
|
|
def dictview(self): |
|
262
|
|
|
return dict(zip(self.keys(), [i.value for i in self.values()])) |
|
263
|
|
|
|
|
264
|
|
|
# def __repr__(self): |
|
265
|
|
|
# return ( |
|
266
|
|
|
# "{cls}({contents})".format( |
|
267
|
|
|
# cls=self.__class__.__name__, |
|
268
|
|
|
# contents=', '.join([str(item) for item in self]))) |
|
269
|
|
|
|
|
270
|
|
|
|
|
271
|
|
|
class JSONEncoder(json.JSONEncoder): |
|
272
|
|
|
|
|
273
|
|
|
def default(self, obj): |
|
274
|
|
|
if isinstance(obj, LASFile): |
|
275
|
|
|
d = {'metadata': {}, |
|
276
|
|
|
'data': {}} |
|
277
|
|
|
for name, section in obj.sections.items(): |
|
278
|
|
|
if isinstance(section, basestring): |
|
279
|
|
|
d['metadata'][name] = section |
|
280
|
|
|
else: |
|
281
|
|
|
d['metadata'][name] = [] |
|
282
|
|
|
for item in section: |
|
283
|
|
|
d['metadata'][name].append(dict(item)) |
|
284
|
|
|
for curve in obj.curves: |
|
285
|
|
|
d['data'][curve.mnemonic] = list(curve.data) |
|
286
|
|
|
return d |
|
287
|
|
|
|
|
288
|
|
|
|
|
289
|
|
|
|
|
290
|
|
|
DEFAULT_ITEMS = { |
|
291
|
|
|
"Version": SectionItems([ |
|
292
|
|
|
HeaderItem("VERS", "", 2.0, "CWLS log ASCII Standard -VERSION 2.0"), |
|
293
|
|
|
HeaderItem("WRAP", "", "NO", "One line per depth step"), |
|
294
|
|
|
HeaderItem("DLM", "", "SPACE", "Column Data Section Delimiter"), |
|
295
|
|
|
]), |
|
296
|
|
|
"Well": SectionItems([ |
|
297
|
|
|
HeaderItem("STRT", "m", numpy.nan, "START DEPTH"), |
|
298
|
|
|
HeaderItem("STOP", "m", numpy.nan, "STOP DEPTH"), |
|
299
|
|
|
HeaderItem("STEP", "m", numpy.nan, "STEP"), |
|
300
|
|
|
HeaderItem("NULL", "", -9999.25, "NULL VALUE"), |
|
301
|
|
|
HeaderItem("COMP", "", "", "COMPANY"), |
|
302
|
|
|
HeaderItem("WELL", "", "", "WELL"), |
|
303
|
|
|
HeaderItem("FLD", "", "", "FIELD"), |
|
304
|
|
|
HeaderItem("LOC", "", "", "LOCATION"), |
|
305
|
|
|
HeaderItem("PROV", "", "", "PROVINCE"), |
|
306
|
|
|
HeaderItem("CNTY", "", "", "COUNTY"), |
|
307
|
|
|
HeaderItem("STAT", "", "", "STATE"), |
|
308
|
|
|
HeaderItem("CTRY", "", "", "COUNTRY"), |
|
309
|
|
|
HeaderItem("SRVC", "", "", "SERVICE COMPANY"), |
|
310
|
|
|
HeaderItem("DATE", "", "", "DATE"), |
|
311
|
|
|
HeaderItem("UWI", "", "", "UNIQUE WELL ID"), |
|
312
|
|
|
HeaderItem("API", "", "", "API NUMBER") |
|
313
|
|
|
]), |
|
314
|
|
|
"Curves": SectionItems([]), |
|
315
|
|
|
"Parameter": SectionItems([]), |
|
316
|
|
|
"Other": "", |
|
317
|
|
|
"Data": numpy.zeros(shape=(0, 1)), |
|
318
|
|
|
} |
|
319
|
|
|
|
|
320
|
|
|
|
|
321
|
|
|
ORDER_DEFINITIONS = { |
|
322
|
|
|
1.2: OrderedDict([ |
|
323
|
|
|
("Version", ["value:descr"]), |
|
324
|
|
|
("Well", [ |
|
325
|
|
|
"descr:value", |
|
326
|
|
|
("value:descr", ["STRT", "STOP", "STEP", "NULL"])]), |
|
327
|
|
|
("Curves", ["value:descr"]), |
|
328
|
|
|
("Parameter", ["value:descr"]), |
|
329
|
|
|
]), |
|
330
|
|
|
2.0: OrderedDict([ |
|
331
|
|
|
("Version", ["value:descr"]), |
|
332
|
|
|
("Well", ["value:descr"]), |
|
333
|
|
|
("Curves", ["value:descr"]), |
|
334
|
|
|
("Parameter", ["value:descr"]) |
|
335
|
|
|
])} |
|
336
|
|
|
|
|
337
|
|
|
|
|
338
|
|
|
URL_REGEXP = re.compile( |
|
339
|
|
|
r'^(?:http|ftp)s?://' # http:// or https:// |
|
340
|
|
|
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}' |
|
341
|
|
|
r'\.?|[A-Z0-9-]{2,}\.?)|' # (cont.) domain... |
|
342
|
|
|
r'localhost|' # localhost... |
|
343
|
|
|
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip |
|
344
|
|
|
r'(?::\d+)?' # optional port |
|
345
|
|
|
r'(?:/?|[/?]\S+)$', re.IGNORECASE) |
|
346
|
|
|
|
|
347
|
|
|
|
|
348
|
|
|
class LASFile(object): |
|
349
|
|
|
|
|
350
|
|
|
'''LAS file object. |
|
351
|
|
|
|
|
352
|
|
|
Keyword Arguments: |
|
353
|
|
|
file_ref: either a filename, an open file object, or a string of |
|
354
|
|
|
a LAS file contents. |
|
355
|
|
|
encoding (str): character encoding to open file_ref with |
|
356
|
|
|
encoding_errors (str): "strict", "replace" (default), "ignore" - how to |
|
357
|
|
|
handle errors with encodings (see standard library codecs module or |
|
358
|
|
|
Python Unicode HOWTO for more information) |
|
359
|
|
|
autodetect_encoding (bool): use chardet/ccharet to detect encoding |
|
360
|
|
|
autodetect_encoding_chars (int/None): number of chars to read from LAS |
|
361
|
|
|
file for auto-detection of encoding. |
|
362
|
|
|
|
|
363
|
|
|
''' |
|
364
|
|
|
def __init__(self, file_ref=None, **kwargs): |
|
365
|
|
|
|
|
366
|
|
|
self._text = '' |
|
367
|
|
|
self._use_pandas = "auto" |
|
368
|
|
|
self.index_unit = None |
|
369
|
|
|
self.sections = { |
|
370
|
|
|
"Version": DEFAULT_ITEMS["Version"], |
|
371
|
|
|
"Well": DEFAULT_ITEMS["Well"], |
|
372
|
|
|
"Curves": DEFAULT_ITEMS["Curves"], |
|
373
|
|
|
"Parameter": DEFAULT_ITEMS["Parameter"], |
|
374
|
|
|
"Other": str(DEFAULT_ITEMS["Other"]), |
|
375
|
|
|
} |
|
376
|
|
|
|
|
377
|
|
|
if not (file_ref is None): |
|
378
|
|
|
self.read(file_ref, **kwargs) |
|
379
|
|
|
|
|
380
|
|
|
def read(self, file_ref, use_pandas="auto", null_subs=True, **kwargs): |
|
381
|
|
|
'''Read a LAS file. |
|
382
|
|
|
|
|
383
|
|
|
Arguments: |
|
384
|
|
|
file_ref: either a filename, an open file object, or a string of |
|
385
|
|
|
a LAS file contents. |
|
386
|
|
|
|
|
387
|
|
|
Keyword Arguments: |
|
388
|
|
|
use_pandas (str): bool or "auto" -- use pandas if available -- provide |
|
389
|
|
|
False option for faster loading where pandas functionality is not |
|
390
|
|
|
needed. "auto" becomes True if pandas is installed, and False if not. |
|
391
|
|
|
encoding (str): character encoding to open file_ref with |
|
392
|
|
|
encoding_errors (str): "strict", "replace" (default), "ignore" - how to |
|
393
|
|
|
handle errors with encodings (see standard library codecs module or |
|
394
|
|
|
Python Unicode HOWTO for more information) |
|
395
|
|
|
autodetect_encoding (bool): use chardet/cchardet to detect encoding |
|
396
|
|
|
autodetect_encoding_chars (int/None): number of chars to read from LAS |
|
397
|
|
|
file for auto-detection of encoding. |
|
398
|
|
|
|
|
399
|
|
|
''' |
|
400
|
|
|
if not use_pandas is None: |
|
401
|
|
|
self._use_pandas = use_pandas |
|
402
|
|
|
|
|
403
|
|
|
f = open_file(file_ref, **kwargs) |
|
404
|
|
|
|
|
405
|
|
|
self._text = f.read() |
|
406
|
|
|
logger.debug("LASFile.read LAS content is type %s" % type(self._text)) |
|
407
|
|
|
|
|
408
|
|
|
reader = Reader(self._text, version=1.2) |
|
409
|
|
|
self.sections["Version"] = reader.read_section('~V') |
|
410
|
|
|
|
|
411
|
|
|
# Set version |
|
412
|
|
|
try: |
|
413
|
|
|
# raise Exception("%s %s" % (type(self.version['VERS']), self.version["VERS"])) |
|
414
|
|
|
reader.version = self.version['VERS'].value |
|
415
|
|
|
except KeyError: |
|
416
|
|
|
raise KeyError("No key VERS in ~V section") |
|
417
|
|
|
|
|
418
|
|
|
# Validate version |
|
419
|
|
|
try: |
|
420
|
|
|
assert reader.version in (1.2, 2) |
|
421
|
|
|
except AssertionError: |
|
422
|
|
|
logger.warning("LAS spec version is %s -- neither 1.2 nor 2" % |
|
423
|
|
|
reader.version) |
|
424
|
|
|
if reader.version < 2: |
|
425
|
|
|
reader.version = 1.2 |
|
426
|
|
|
else: |
|
427
|
|
|
reader.version = 2 |
|
428
|
|
|
reader.wrap = self.version['WRAP'].value == 'YES' |
|
429
|
|
|
|
|
430
|
|
|
self.sections["Well"] = reader.read_section('~W') |
|
431
|
|
|
self.sections["Curves"] = reader.read_section('~C') |
|
432
|
|
|
try: |
|
433
|
|
|
self.sections["Parameter"] = reader.read_section('~P') |
|
434
|
|
|
except LASHeaderError: |
|
435
|
|
|
logger.warning(traceback.format_exc().splitlines()[-1]) |
|
436
|
|
|
self.sections["Other"] = reader.read_raw_text('~O') |
|
437
|
|
|
|
|
438
|
|
|
# Set null value |
|
439
|
|
|
reader.null = self.well['NULL'].value |
|
440
|
|
|
|
|
441
|
|
|
data = reader.read_data(len(self.curves), null_subs=null_subs) |
|
442
|
|
|
|
|
443
|
|
|
for i, c in enumerate(self.curves): |
|
444
|
|
|
d = data[:, i] |
|
445
|
|
|
c.data = d |
|
446
|
|
|
|
|
447
|
|
|
if (self.well["STRT"].unit.upper() == "M" and |
|
448
|
|
|
self.well["STOP"].unit.upper() == "M" and |
|
449
|
|
|
self.well["STEP"].unit.upper() == "M" and |
|
450
|
|
|
self.curves[0].unit.upper() == "M"): |
|
451
|
|
|
self.index_unit = "M" |
|
452
|
|
|
elif (self.well["STRT"].unit.upper() in ("F", "FT") and |
|
453
|
|
|
self.well["STOP"].unit.upper() in ("F", "FT") and |
|
454
|
|
|
self.well["STEP"].unit.upper() in ("F", "FT") and |
|
455
|
|
|
self.curves[0].unit.upper() in ("F", "FT")): |
|
456
|
|
|
self.index_unit = "FT" |
|
457
|
|
|
|
|
458
|
|
|
self.refresh() |
|
459
|
|
|
|
|
460
|
|
|
def refresh(self, use_pandas=None): |
|
461
|
|
|
'''Refresh curve names and indices.''' |
|
462
|
|
|
if not use_pandas is None: |
|
463
|
|
|
self._use_pandas = use_pandas |
|
464
|
|
|
|
|
465
|
|
|
# n = len(self.curves) |
|
466
|
|
|
# for i, curve in enumerate(self.curves): |
|
467
|
|
|
# self[curve.mnemonic] = curve.data |
|
468
|
|
|
# self[i] = curve.data |
|
469
|
|
|
# self[i - n] = curve.data |
|
470
|
|
|
|
|
471
|
|
|
if not self._use_pandas is False: |
|
472
|
|
|
try: |
|
473
|
|
|
import pandas |
|
474
|
|
|
except ImportError: |
|
475
|
|
|
logger.info( |
|
476
|
|
|
"pandas not installed - skipping LASFile.df creation") |
|
477
|
|
|
self._use_pandas = False |
|
478
|
|
|
|
|
479
|
|
|
if self._use_pandas: |
|
480
|
|
|
self.df = pandas.DataFrame(self.data, columns=self.keys()) |
|
481
|
|
|
self.df.set_index(self.curves[0].mnemonic, inplace=True) |
|
482
|
|
|
|
|
483
|
|
|
@property |
|
484
|
|
|
def data(self): |
|
485
|
|
|
'''2D array of data from LAS file.''' |
|
486
|
|
|
return numpy.vstack([c.data for c in self.curves]).T |
|
487
|
|
|
|
|
488
|
|
|
def write(self, file_object, version=None, wrap=None, |
|
489
|
|
|
STRT=None, STOP=None, STEP=None, fmt="%10.5g"): |
|
490
|
|
|
'''Write to a file. |
|
491
|
|
|
|
|
492
|
|
|
Arguments: |
|
493
|
|
|
file_object: a file_like object opening for writing. |
|
494
|
|
|
version (float): either 1.2 or 2 |
|
495
|
|
|
wrap (bool): True, False, or None (last uses WRAP item in version) |
|
496
|
|
|
STRT (float): optional override to automatic calculation using |
|
497
|
|
|
the first index curve value. |
|
498
|
|
|
STOP (float): optional override to automatic calculation using |
|
499
|
|
|
the last index curve value. |
|
500
|
|
|
STEP (float): optional override to automatic calculation using |
|
501
|
|
|
the first step size in the index curve. |
|
502
|
|
|
fmt (str): format string for numerical data being written to data |
|
503
|
|
|
section. |
|
504
|
|
|
|
|
505
|
|
|
Examples: |
|
506
|
|
|
|
|
507
|
|
|
>>> with open("test_output.las", mode="w") as f: |
|
508
|
|
|
... lasfile_obj.write(f, 2.0) # <-- this method |
|
509
|
|
|
|
|
510
|
|
|
''' |
|
511
|
|
|
if wrap is None: |
|
512
|
|
|
wrap = self.version["WRAP"] == "YES" |
|
513
|
|
|
elif wrap is True: |
|
514
|
|
|
self.version["WRAP"] = HeaderItem( |
|
515
|
|
|
"WRAP", "", "YES", "Multiple lines per depth step") |
|
516
|
|
|
elif wrap is False: |
|
517
|
|
|
self.version["WRAP"] = HeaderItem( |
|
518
|
|
|
"WRAP", "", "NO", "One line per depth step") |
|
519
|
|
|
lines = [] |
|
520
|
|
|
|
|
521
|
|
|
assert version in (1.2, 2, None) |
|
522
|
|
|
if version is None: |
|
523
|
|
|
version = self.version["VERS"].value |
|
524
|
|
|
if version == 1.2: |
|
525
|
|
|
self.version["VERS"] = HeaderItem( |
|
526
|
|
|
"VERS", "", 1.2, "CWLS LOG ASCII STANDARD - VERSION 1.2") |
|
527
|
|
|
elif version == 2: |
|
528
|
|
|
self.version["VERS"] = HeaderItem( |
|
529
|
|
|
"VERS", "", 2.0, "CWLS log ASCII Standard -VERSION 2.0") |
|
530
|
|
|
|
|
531
|
|
|
if STRT is None: |
|
532
|
|
|
STRT = self.index[0] |
|
533
|
|
|
if STOP is None: |
|
534
|
|
|
STOP = self.index[-1] |
|
535
|
|
|
if STEP is None: |
|
536
|
|
|
STEP = self.index[1] - self.index[0] # Faster than numpy.gradient |
|
537
|
|
|
self.well["STRT"].value = STRT |
|
538
|
|
|
self.well["STOP"].value = STOP |
|
539
|
|
|
self.well["STEP"].value = STEP |
|
540
|
|
|
|
|
541
|
|
|
|
|
542
|
|
|
# Check for any changes in the pandas dataframe and if there are, |
|
543
|
|
|
# create new curves so they are reflected in the output LAS file. |
|
544
|
|
|
|
|
545
|
|
|
if self._use_pandas: |
|
546
|
|
|
curve_names = lambda: [ci.mnemonic for ci in self.curves] |
|
547
|
|
|
for df_curve_name in list(self.df.columns.values): |
|
548
|
|
|
if not df_curve_name in curve_names(): |
|
549
|
|
|
self.add_curve(df_curve_name, self.df[df_curve_name]) |
|
550
|
|
|
|
|
551
|
|
|
# Write each section. |
|
552
|
|
|
|
|
553
|
|
|
# ~Version |
|
554
|
|
|
logger.debug('LASFile.write Version section') |
|
555
|
|
|
lines.append("~Version ".ljust(60, "-")) |
|
556
|
|
|
order_func = get_section_order_function("Version", version) |
|
557
|
|
|
section_widths = get_section_widths("Version", self.version, version, order_func) |
|
558
|
|
|
for header_item in self.version.values(): |
|
559
|
|
|
mnemonic = header_item.original_mnemonic |
|
560
|
|
|
# logger.debug("LASFile.write " + str(header_item)) |
|
561
|
|
|
order = order_func(mnemonic) |
|
562
|
|
|
# logger.debug("LASFile.write order = %s" % (order, )) |
|
563
|
|
|
logger.debug('LASFile.write %s\norder=%s section_widths=%s' % (header_item, order, section_widths)) |
|
564
|
|
|
formatter_func = get_formatter_function(order, **section_widths) |
|
565
|
|
|
line = formatter_func(header_item) |
|
566
|
|
|
lines.append(line) |
|
567
|
|
|
|
|
568
|
|
|
# ~Well |
|
569
|
|
|
logger.debug('LASFile.write Well section') |
|
570
|
|
|
lines.append("~Well ".ljust(60, "-")) |
|
571
|
|
|
order_func = get_section_order_function("Well", version) |
|
572
|
|
|
section_widths = get_section_widths("Well", self.well, version, order_func) |
|
573
|
|
|
# logger.debug('LASFile.write well section_widths=%s' % section_widths) |
|
574
|
|
|
for header_item in self.well.values(): |
|
575
|
|
|
mnemonic = header_item.original_mnemonic |
|
576
|
|
|
order = order_func(mnemonic) |
|
577
|
|
|
logger.debug('LASFile.write %s\norder=%s section_widths=%s' % (header_item, order, section_widths)) |
|
578
|
|
|
formatter_func = get_formatter_function(order, **section_widths) |
|
579
|
|
|
line = formatter_func(header_item) |
|
580
|
|
|
lines.append(line) |
|
581
|
|
|
|
|
582
|
|
|
# ~Curves |
|
583
|
|
|
logger.debug('LASFile.write Curves section') |
|
584
|
|
|
lines.append("~Curves ".ljust(60, "-")) |
|
585
|
|
|
order_func = get_section_order_function("Curves", version) |
|
586
|
|
|
section_widths = get_section_widths("Curves", self.curves, version, order_func) |
|
587
|
|
|
for header_item in self.curves: |
|
588
|
|
|
mnemonic = header_item.original_mnemonic |
|
589
|
|
|
order = order_func(mnemonic) |
|
590
|
|
|
formatter_func = get_formatter_function(order, **section_widths) |
|
591
|
|
|
line = formatter_func(header_item) |
|
592
|
|
|
lines.append(line) |
|
593
|
|
|
|
|
594
|
|
|
# ~Params |
|
595
|
|
|
lines.append("~Params ".ljust(60, "-")) |
|
596
|
|
|
order_func = get_section_order_function("Parameter", version) |
|
597
|
|
|
section_widths = get_section_widths("Parameter", self.params, version, order_func) |
|
598
|
|
|
for header_item in self.params.values(): |
|
599
|
|
|
mnemonic = header_item.original_mnemonic |
|
600
|
|
|
order = order_func(mnemonic) |
|
601
|
|
|
formatter_func = get_formatter_function(order, **section_widths) |
|
602
|
|
|
line = formatter_func(header_item) |
|
603
|
|
|
lines.append(line) |
|
604
|
|
|
|
|
605
|
|
|
# ~Other |
|
606
|
|
|
lines.append("~Other ".ljust(60, "-")) |
|
607
|
|
|
lines += self.other.splitlines() |
|
608
|
|
|
|
|
609
|
|
|
lines.append("~ASCII ".ljust(60, "-")) |
|
610
|
|
|
|
|
611
|
|
|
file_object.write("\n".join(lines)) |
|
612
|
|
|
file_object.write("\n") |
|
613
|
|
|
|
|
614
|
|
|
data_arr = numpy.column_stack([c.data for c in self.curves]) |
|
615
|
|
|
nrows, ncols = data_arr.shape |
|
616
|
|
|
|
|
617
|
|
|
def format_data_section_line(n, fmt, l=10, spacer=" "): |
|
618
|
|
|
if numpy.isnan(n): |
|
619
|
|
|
return spacer + str(self.well["NULL"].value).rjust(l) |
|
620
|
|
|
else: |
|
621
|
|
|
return spacer + (fmt % n).rjust(l) |
|
622
|
|
|
|
|
623
|
|
|
twrapper = textwrap.TextWrapper(width=79) |
|
624
|
|
|
for i in range(nrows): |
|
625
|
|
|
depth_slice = '' |
|
626
|
|
|
for j in range(ncols): |
|
627
|
|
|
depth_slice += format_data_section_line(data_arr[i, j], fmt) |
|
628
|
|
|
|
|
629
|
|
|
if wrap: |
|
630
|
|
|
lines = twrapper.wrap(depth_slice) |
|
631
|
|
|
logger.debug("LASFile.write Wrapped %d lines out of %s" % |
|
632
|
|
|
(len(lines), depth_slice)) |
|
633
|
|
|
else: |
|
634
|
|
|
lines = [depth_slice] |
|
635
|
|
|
|
|
636
|
|
|
if self.version["VERS"].value == 1.2: |
|
637
|
|
|
for line in lines: |
|
638
|
|
|
if len(line) > 255: |
|
639
|
|
|
logger.warning("LASFile.write Data line > 256 chars: %s" % line) |
|
640
|
|
|
|
|
641
|
|
|
for line in lines: |
|
642
|
|
|
file_object.write(line + "\n") |
|
643
|
|
|
|
|
644
|
|
|
def get_curve(self, mnemonic): |
|
645
|
|
|
'''Return Curve object. |
|
646
|
|
|
|
|
647
|
|
|
Arguments: |
|
648
|
|
|
mnemonic (str): the name of the curve |
|
649
|
|
|
|
|
650
|
|
|
Returns: |
|
651
|
|
|
A Curve object, not just the data array. |
|
652
|
|
|
|
|
653
|
|
|
''' |
|
654
|
|
|
for curve in self.curves: |
|
655
|
|
|
if curve.mnemonic == mnemonic: |
|
656
|
|
|
return curve |
|
657
|
|
|
|
|
658
|
|
|
# def __getattr__(self, key): |
|
659
|
|
|
# # if hasattr(self, 'sections'): |
|
660
|
|
|
# # if key in self.sections['Curves']: |
|
661
|
|
|
# # return self[key] |
|
662
|
|
|
# # else: |
|
663
|
|
|
# # raise AttributeError |
|
664
|
|
|
# pass |
|
665
|
|
|
|
|
666
|
|
|
def __getitem__(self, key): |
|
667
|
|
|
if isinstance(key, int): |
|
668
|
|
|
return self.curves[key].data |
|
669
|
|
|
elif isinstance(key, str): |
|
670
|
|
|
if key in self.keys(): |
|
671
|
|
|
return self.curves[key].data |
|
672
|
|
|
else: |
|
673
|
|
|
super(LASFile, self).__getitem__(key) |
|
674
|
|
|
|
|
675
|
|
|
# def __setattr__(self, key, value): |
|
676
|
|
|
# assert NotImplementedError('not yet') |
|
677
|
|
|
|
|
678
|
|
|
def __setitem__(self, key, value): |
|
679
|
|
|
assert NotImplementedError('not yet') |
|
680
|
|
|
|
|
681
|
|
|
def keys(self): |
|
682
|
|
|
return [c.mnemonic for c in self.curves] |
|
683
|
|
|
|
|
684
|
|
|
def values(self): |
|
685
|
|
|
return [c.data for c in self.curves] |
|
686
|
|
|
|
|
687
|
|
|
def items(self): |
|
688
|
|
|
return [(c.mnemonic, c.data) for c in self.curves] |
|
689
|
|
|
|
|
690
|
|
|
def iterkeys(self): |
|
691
|
|
|
return iter(list(self.keys())) |
|
692
|
|
|
|
|
693
|
|
|
def itervalues(self): |
|
694
|
|
|
return iter(list(self.values())) |
|
695
|
|
|
|
|
696
|
|
|
def iteritems(self): |
|
697
|
|
|
return iter(list(self.items())) |
|
698
|
|
|
|
|
699
|
|
|
@property |
|
700
|
|
|
def version(self): |
|
701
|
|
|
return self.sections["Version"] |
|
702
|
|
|
|
|
703
|
|
|
@version.setter |
|
704
|
|
|
def version(self, section): |
|
705
|
|
|
self.sections["Version"] = section |
|
706
|
|
|
|
|
707
|
|
|
@property |
|
708
|
|
|
def well(self): |
|
709
|
|
|
return self.sections["Well"] |
|
710
|
|
|
|
|
711
|
|
|
@well.setter |
|
712
|
|
|
def well(self, section): |
|
713
|
|
|
self.sections["Well"] = section |
|
714
|
|
|
|
|
715
|
|
|
@property |
|
716
|
|
|
def curves(self): |
|
717
|
|
|
return self.sections["Curves"] |
|
718
|
|
|
|
|
719
|
|
|
@curves.setter |
|
720
|
|
|
def curves(self, section): |
|
721
|
|
|
self.sections["Curves"] = section |
|
722
|
|
|
|
|
723
|
|
|
@property |
|
724
|
|
|
def params(self): |
|
725
|
|
|
return self.sections["Parameter"] |
|
726
|
|
|
|
|
727
|
|
|
@params.setter |
|
728
|
|
|
def params(self, section): |
|
729
|
|
|
self.sections["Parameter"] = section |
|
730
|
|
|
|
|
731
|
|
|
@property |
|
732
|
|
|
def other(self): |
|
733
|
|
|
return self.sections["Other"] |
|
734
|
|
|
|
|
735
|
|
|
@other.setter |
|
736
|
|
|
def other(self, section): |
|
737
|
|
|
self.sections["Other"] = section |
|
738
|
|
|
|
|
739
|
|
|
|
|
740
|
|
|
@property |
|
741
|
|
|
def metadata(self): |
|
742
|
|
|
s = SectionItems() |
|
743
|
|
|
for section in self.sections: |
|
744
|
|
|
for item in section: |
|
745
|
|
|
s.append(item) |
|
746
|
|
|
return s |
|
747
|
|
|
|
|
748
|
|
|
@metadata.setter |
|
749
|
|
|
def metadata(self, value): |
|
750
|
|
|
raise Warning('Set values in the version/well/params attrs directly') |
|
751
|
|
|
|
|
752
|
|
|
@property |
|
753
|
|
|
def df(self): |
|
754
|
|
|
if self._use_pandas: |
|
755
|
|
|
return self._df |
|
756
|
|
|
else: |
|
757
|
|
|
logger.warning( |
|
758
|
|
|
"pandas is not installed or use_pandas was set to False") |
|
759
|
|
|
# raise Warning("pandas is not installed or use_pandas was set to False") |
|
760
|
|
|
|
|
761
|
|
|
@df.setter |
|
762
|
|
|
def df(self, value): |
|
763
|
|
|
self._df = value |
|
764
|
|
|
|
|
765
|
|
|
@property |
|
766
|
|
|
def index(self): |
|
767
|
|
|
return self.data[:, 0] |
|
768
|
|
|
|
|
769
|
|
|
@property |
|
770
|
|
|
def depth_m(self): |
|
771
|
|
|
if self.index_unit == "M": |
|
772
|
|
|
return self.index |
|
773
|
|
|
elif self.index_unit == "FT": |
|
774
|
|
|
return self.index * 0.3048 |
|
775
|
|
|
else: |
|
776
|
|
|
raise LASUnknownUnitError("Unit of depth index not known") |
|
777
|
|
|
|
|
778
|
|
|
@property |
|
779
|
|
|
def depth_ft(self): |
|
780
|
|
|
if self.index_unit == "M": |
|
781
|
|
|
return self.index / 0.3048 |
|
782
|
|
|
elif self.index_unit == "FT": |
|
783
|
|
|
return self.index |
|
784
|
|
|
else: |
|
785
|
|
|
raise LASUnknownUnitError("Unit of depth index not known") |
|
786
|
|
|
|
|
787
|
|
|
def add_curve(self, mnemonic, data, unit="", descr="", value=""): |
|
788
|
|
|
# assert not mnemonic in self.curves |
|
789
|
|
|
curve = CurveItem(mnemonic, unit, value, descr) |
|
790
|
|
|
curve.data = data |
|
791
|
|
|
self.curves[mnemonic] = curve |
|
792
|
|
|
self.refresh() |
|
793
|
|
|
|
|
794
|
|
|
@property |
|
795
|
|
|
def header(self): |
|
796
|
|
|
return self.sections |
|
797
|
|
|
|
|
798
|
|
|
|
|
799
|
|
|
class Las(LASFile): |
|
800
|
|
|
|
|
801
|
|
|
'''LAS file object. |
|
802
|
|
|
|
|
803
|
|
|
Retained for backwards compatibility. |
|
804
|
|
|
|
|
805
|
|
|
''' |
|
806
|
|
|
pass |
|
807
|
|
|
|
|
808
|
|
|
|
|
809
|
|
|
class Reader(object): |
|
810
|
|
|
|
|
811
|
|
|
def __init__(self, text, version): |
|
812
|
|
|
self.lines = text.splitlines() |
|
813
|
|
|
self.version = version |
|
814
|
|
|
self.null = numpy.nan |
|
815
|
|
|
self.wrap = True |
|
816
|
|
|
|
|
817
|
|
|
@property |
|
818
|
|
|
def section_names(self): |
|
819
|
|
|
names = [] |
|
820
|
|
|
for line in self.lines: |
|
821
|
|
|
line = line.strip().strip('\t').strip() |
|
822
|
|
|
if not line or line.startswith('#'): |
|
823
|
|
|
continue |
|
824
|
|
|
if line.startswith('~'): |
|
825
|
|
|
names.append(line) |
|
826
|
|
|
return names |
|
827
|
|
|
|
|
828
|
|
|
def iter_section_lines(self, section_name, ignore_comments=True): |
|
829
|
|
|
in_section = False |
|
830
|
|
|
for i, line in enumerate(self.lines): |
|
831
|
|
|
line = line.strip().strip('\t').strip() |
|
832
|
|
|
if not line: |
|
833
|
|
|
continue |
|
834
|
|
|
if ignore_comments and line.startswith('#'): |
|
835
|
|
|
continue |
|
836
|
|
|
if line.startswith(section_name): |
|
837
|
|
|
if in_section: |
|
838
|
|
|
return |
|
839
|
|
|
else: |
|
840
|
|
|
in_section = True |
|
841
|
|
|
continue |
|
842
|
|
|
if line.lower().startswith('~') and in_section: |
|
843
|
|
|
# Start of the next section; we're done here. |
|
844
|
|
|
break |
|
845
|
|
|
if in_section: |
|
846
|
|
|
yield line |
|
847
|
|
|
|
|
848
|
|
|
def read_raw_text(self, section_name): |
|
849
|
|
|
return '\n'.join(self.iter_section_lines(section_name, |
|
850
|
|
|
ignore_comments=False)) |
|
851
|
|
|
|
|
852
|
|
|
def read_section(self, section_name): |
|
853
|
|
|
parser = SectionParser(section_name, version=self.version) |
|
854
|
|
|
section = SectionItems() |
|
855
|
|
|
for line in self.iter_section_lines(section_name): |
|
856
|
|
|
try: |
|
857
|
|
|
values = read_line(line) |
|
858
|
|
|
except: |
|
859
|
|
|
raise LASHeaderError("Failed in %s section on line:\n%s%s" % ( |
|
860
|
|
|
section_name, line, |
|
861
|
|
|
traceback.format_exc().splitlines()[-1])) |
|
862
|
|
|
else: |
|
863
|
|
|
section.append(parser(**values)) |
|
864
|
|
|
return section |
|
865
|
|
|
|
|
866
|
|
|
def read_data(self, number_of_curves=None, null_subs=True): |
|
867
|
|
|
s = self.read_data_string() |
|
868
|
|
|
if not self.wrap: |
|
869
|
|
|
try: |
|
870
|
|
|
arr = numpy.loadtxt(StringIO(s)) |
|
871
|
|
|
except: |
|
872
|
|
|
raise LASDataError("Failed to read data:\n%s" % ( |
|
873
|
|
|
traceback.format_exc().splitlines()[-1])) |
|
874
|
|
|
else: |
|
875
|
|
|
eol_chars = r"[\n\t\r]" |
|
876
|
|
|
s = re.sub(eol_chars, " ", s) |
|
877
|
|
|
try: |
|
878
|
|
|
arr = numpy.loadtxt(StringIO(s)) |
|
879
|
|
|
except: |
|
880
|
|
|
raise LASDataError("Failed to read wrapped data: %s" % ( |
|
881
|
|
|
traceback.format_exc().splitlines()[-1])) |
|
882
|
|
|
logger.debug('Reader.read_data arr shape = %s' % (arr.shape)) |
|
883
|
|
|
logger.debug('Reader.read_data number of curves = %s' % number_of_curves) |
|
884
|
|
|
arr = numpy.reshape(arr, (-1, number_of_curves)) |
|
885
|
|
|
if not arr.shape or (arr.ndim == 1 and arr.shape[0] == 0): |
|
886
|
|
|
logger.warning('Reader.read_dataN o data present.') |
|
887
|
|
|
return None, None |
|
888
|
|
|
else: |
|
889
|
|
|
logger.info('Reader.read_data LAS file shape = %s' % str(arr.shape)) |
|
890
|
|
|
logger.debug('Reader.read_data checking for nulls (NULL = %s)' % self.null) |
|
891
|
|
|
if null_subs: |
|
892
|
|
|
arr[arr == self.null] = numpy.nan |
|
893
|
|
|
return arr |
|
894
|
|
|
|
|
895
|
|
|
def read_data_string(self): |
|
896
|
|
|
start_data = None |
|
897
|
|
|
for i, line in enumerate(self.lines): |
|
898
|
|
|
line = line.strip().strip('\t').strip() |
|
899
|
|
|
if line.startswith('~A'): |
|
900
|
|
|
start_data = i + 1 |
|
901
|
|
|
break |
|
902
|
|
|
s = '\n'.join(self.lines[start_data:]) |
|
903
|
|
|
s = re.sub(r'(\d)-(\d)', r'\1 -\2', s) |
|
904
|
|
|
s = re.sub('-?\d*\.\d*\.\d*', ' NaN NaN ', s) |
|
905
|
|
|
s = re.sub('NaN.\d*', ' NaN NaN ', s) |
|
906
|
|
|
return s |
|
907
|
|
|
|
|
908
|
|
|
|
|
909
|
|
|
class SectionParser(object): |
|
910
|
|
|
|
|
911
|
|
|
def __init__(self, section_name, version=1.2): |
|
912
|
|
|
if section_name.startswith('~C'): |
|
913
|
|
|
self.func = self.curves |
|
914
|
|
|
elif section_name.startswith('~P'): |
|
915
|
|
|
self.func = self.params |
|
916
|
|
|
else: |
|
917
|
|
|
self.func = self.metadata |
|
918
|
|
|
|
|
919
|
|
|
self.version = version |
|
920
|
|
|
self.section_name = section_name |
|
921
|
|
|
self.section_name2 = {"~C": "Curves", |
|
922
|
|
|
"~W": "Well", |
|
923
|
|
|
"~V": "Version", |
|
924
|
|
|
"~P": "Parameter"}[section_name] |
|
925
|
|
|
|
|
926
|
|
|
section_orders = ORDER_DEFINITIONS[self.version][self.section_name2] |
|
927
|
|
|
self.default_order = section_orders[0] |
|
928
|
|
|
self.orders = {} |
|
929
|
|
|
for order, mnemonics in section_orders[1:]: |
|
930
|
|
|
for mnemonic in mnemonics: |
|
931
|
|
|
self.orders[mnemonic] = order |
|
932
|
|
|
|
|
933
|
|
|
def __call__(self, **keys): |
|
934
|
|
|
item = self.func(**keys) |
|
935
|
|
|
# if item.name == "": |
|
936
|
|
|
# item.mnemonic = "UNKNOWN" |
|
937
|
|
|
return item |
|
938
|
|
|
|
|
939
|
|
|
def num(self, x, default=None): |
|
940
|
|
|
if default is None: |
|
941
|
|
|
default = x |
|
942
|
|
|
try: |
|
943
|
|
|
return numpy.int(x) |
|
944
|
|
|
except: |
|
945
|
|
|
try: |
|
946
|
|
|
return numpy.float(x) |
|
947
|
|
|
except: |
|
948
|
|
|
return default |
|
949
|
|
|
|
|
950
|
|
|
def metadata(self, **keys): |
|
951
|
|
|
key_order = self.orders.get(keys["name"], self.default_order) |
|
952
|
|
|
if key_order == "value:descr": |
|
953
|
|
|
return HeaderItem( |
|
954
|
|
|
keys["name"], # mnemonic |
|
955
|
|
|
keys["unit"], # unit |
|
956
|
|
|
self.num(keys["value"]), # value |
|
957
|
|
|
keys["descr"], # descr |
|
958
|
|
|
) |
|
959
|
|
|
elif key_order == "descr:value": |
|
960
|
|
|
return HeaderItem( |
|
961
|
|
|
keys["name"], # mnemonic |
|
962
|
|
|
keys["unit"], # unit |
|
963
|
|
|
keys["descr"], # descr |
|
964
|
|
|
self.num(keys["value"]), # value |
|
965
|
|
|
) |
|
966
|
|
|
|
|
967
|
|
|
def curves(self, **keys): |
|
968
|
|
|
# logger.debug(str(keys)) |
|
969
|
|
|
item = CurveItem( |
|
970
|
|
|
keys['name'], # mnemonic |
|
971
|
|
|
keys['unit'], # unit |
|
972
|
|
|
keys['value'], # value |
|
973
|
|
|
keys['descr'], # descr |
|
974
|
|
|
) |
|
975
|
|
|
return item |
|
976
|
|
|
|
|
977
|
|
|
def params(self, **keys): |
|
978
|
|
|
return HeaderItem( |
|
979
|
|
|
keys['name'], # mnemonic |
|
980
|
|
|
keys['unit'], # unit |
|
981
|
|
|
self.num(keys['value']), # value |
|
982
|
|
|
keys['descr'], # descr |
|
983
|
|
|
) |
|
984
|
|
|
|
|
985
|
|
|
|
|
986
|
|
|
def read_line(line, pattern=None): |
|
987
|
|
|
'''Read a line from a LAS header section. |
|
988
|
|
|
|
|
989
|
|
|
The line is parsed with a regular expression -- see LAS file specs for |
|
990
|
|
|
more details, but it should basically be in the format:: |
|
991
|
|
|
|
|
992
|
|
|
name.unit value : descr |
|
993
|
|
|
|
|
994
|
|
|
Arguments: |
|
995
|
|
|
line (str): line from a LAS header section |
|
996
|
|
|
|
|
997
|
|
|
Returns: |
|
998
|
|
|
A dictionary with keys "name", "unit", "value", and "descr", each |
|
999
|
|
|
containing a string as value. |
|
1000
|
|
|
|
|
1001
|
|
|
''' |
|
1002
|
|
|
d = {} |
|
1003
|
|
|
if pattern is None: |
|
1004
|
|
|
pattern = (r"\.?(?P<name>[^.]*)\." + |
|
1005
|
|
|
r"(?P<unit>[^\s:]*)" + |
|
1006
|
|
|
r"(?P<value>[^:]*):" + |
|
1007
|
|
|
r"(?P<descr>.*)") |
|
1008
|
|
|
m = re.match(pattern, line) |
|
1009
|
|
|
mdict = m.groupdict() |
|
1010
|
|
|
# if mdict["name"] == "": |
|
1011
|
|
|
# mdict["name"] = "UNKNOWN" |
|
1012
|
|
|
for key, value in mdict.items(): |
|
1013
|
|
|
d[key] = value.strip() |
|
1014
|
|
|
if key == "unit": |
|
1015
|
|
|
if d[key].endswith("."): |
|
1016
|
|
|
d[key] = d[key].strip(".") # see issue #36 |
|
1017
|
|
|
return d |
|
1018
|
|
|
|
|
1019
|
|
|
|
|
1020
|
|
|
def open_file(file_ref, encoding=None, encoding_errors="replace", |
|
1021
|
|
|
autodetect_encoding=False, autodetect_encoding_chars=40e3): |
|
1022
|
|
|
'''Open a file if necessary. |
|
1023
|
|
|
|
|
1024
|
|
|
If autodetect_encoding is True then either cchardet or chardet (see PyPi) |
|
1025
|
|
|
needs to be installed, or else an ImportError will be raised. |
|
1026
|
|
|
|
|
1027
|
|
|
Arguments: |
|
1028
|
|
|
file_ref: either a filename, an open file object, a URL, or a string of |
|
1029
|
|
|
a LAS file contents. |
|
1030
|
|
|
|
|
1031
|
|
|
Keyword Arguments: |
|
1032
|
|
|
encoding (str): character encoding to open file_ref with |
|
1033
|
|
|
encoding_errors (str): "strict", "replace" (default), "ignore" - how to |
|
1034
|
|
|
handle errors with encodings (see standard library codecs module or |
|
1035
|
|
|
Python Unicode HOWTO for more information) |
|
1036
|
|
|
autodetect_encoding (bool): use chardet/ccharet to detect encoding |
|
1037
|
|
|
autodetect_encoding_chars (int/None): number of chars to read from LAS |
|
1038
|
|
|
file for auto-detection of encoding. |
|
1039
|
|
|
|
|
1040
|
|
|
Returns: |
|
1041
|
|
|
An open file-like object ready for reading from. |
|
1042
|
|
|
|
|
1043
|
|
|
''' |
|
1044
|
|
|
if isinstance(file_ref, str): |
|
1045
|
|
|
lines = file_ref.splitlines() |
|
1046
|
|
|
if len(lines) == 1: # File name |
|
1047
|
|
|
if URL_REGEXP.match(file_ref): |
|
1048
|
|
|
try: |
|
1049
|
|
|
import urllib2 |
|
1050
|
|
|
file_ref = urllib2.urlopen(file_ref) |
|
1051
|
|
|
except ImportError: |
|
1052
|
|
|
import urllib.request |
|
1053
|
|
|
response = urllib.request.urlopen(file_ref) |
|
1054
|
|
|
enc = response.headers.get_content_charset("utf-8") |
|
1055
|
|
|
file_ref = StringIO(response.read().decode(enc)) |
|
1056
|
|
|
else: # filename |
|
1057
|
|
|
data = get_unicode_from_filename( |
|
1058
|
|
|
file_ref, encoding, encoding_errors, autodetect_encoding, |
|
1059
|
|
|
autodetect_encoding_chars) |
|
1060
|
|
|
file_ref = StringIO(data) |
|
1061
|
|
|
else: |
|
1062
|
|
|
file_ref = StringIO("\n".join(lines)) |
|
1063
|
|
|
return file_ref |
|
1064
|
|
|
|
|
1065
|
|
|
|
|
1066
|
|
|
def get_unicode_from_filename(fn, enc, errors, auto, nbytes): |
|
1067
|
|
|
''' |
|
1068
|
|
|
Read Unicode data from file. |
|
1069
|
|
|
|
|
1070
|
|
|
Arguments: |
|
1071
|
|
|
fn (str): path to file |
|
1072
|
|
|
enc (str): encoding - can be None |
|
1073
|
|
|
errors (str): unicode error handling - can be "strict", "ignore", "replace" |
|
1074
|
|
|
auto (str): auto-detection of character encoding - can be either |
|
1075
|
|
|
"chardet", "cchardet", or True |
|
1076
|
|
|
nbytes (int): number of characters for read for auto-detection |
|
1077
|
|
|
|
|
1078
|
|
|
Returns: |
|
1079
|
|
|
a unicode or string object |
|
1080
|
|
|
|
|
1081
|
|
|
''' |
|
1082
|
|
|
if nbytes: |
|
1083
|
|
|
nbytes = int(nbytes) |
|
1084
|
|
|
|
|
1085
|
|
|
# Detect BOM in UTF-8 files |
|
1086
|
|
|
|
|
1087
|
|
|
nbytes_test = min(32, os.path.getsize(fn)) |
|
1088
|
|
|
with open(fn, mode="rb") as test: |
|
1089
|
|
|
raw = test.read(nbytes_test) |
|
1090
|
|
|
if raw.startswith(codecs.BOM_UTF8): |
|
1091
|
|
|
enc = "utf-8-sig" |
|
1092
|
|
|
auto = False |
|
1093
|
|
|
|
|
1094
|
|
|
if auto: |
|
1095
|
|
|
with open(fn, mode="rb") as test: |
|
1096
|
|
|
if nbytes is None: |
|
1097
|
|
|
raw = test.read() |
|
1098
|
|
|
else: |
|
1099
|
|
|
raw = test.read(nbytes) |
|
1100
|
|
|
enc = get_encoding(auto, raw) |
|
1101
|
|
|
|
|
1102
|
|
|
# codecs.open is smarter than cchardet or chardet IME. |
|
1103
|
|
|
|
|
1104
|
|
|
with codecs.open(fn, mode="r", encoding=enc, errors=errors) as f: |
|
1105
|
|
|
data = f.read() |
|
1106
|
|
|
|
|
1107
|
|
|
return data |
|
1108
|
|
|
|
|
1109
|
|
|
|
|
1110
|
|
|
def get_encoding(auto, raw): |
|
1111
|
|
|
''' |
|
1112
|
|
|
Automatically detect character encoding. |
|
1113
|
|
|
|
|
1114
|
|
|
Arguments: |
|
1115
|
|
|
auto (str): auto-detection of character encoding - can be either |
|
1116
|
|
|
"chardet", "cchardet", or True |
|
1117
|
|
|
raw (bytes): array of bytes to detect from |
|
1118
|
|
|
|
|
1119
|
|
|
Returns: |
|
1120
|
|
|
A string specifying the character encoding. |
|
1121
|
|
|
|
|
1122
|
|
|
''' |
|
1123
|
|
|
if auto is True: |
|
1124
|
|
|
try: |
|
1125
|
|
|
import cchardet as chardet |
|
1126
|
|
|
except ImportError: |
|
1127
|
|
|
try: |
|
1128
|
|
|
import chardet |
|
1129
|
|
|
except ImportError: |
|
1130
|
|
|
raise ImportError( |
|
1131
|
|
|
"chardet or cchardet is required for automatic" |
|
1132
|
|
|
" detection of character encodings.") |
|
1133
|
|
|
else: |
|
1134
|
|
|
logger.debug("get_encoding Using chardet") |
|
1135
|
|
|
method = "chardet" |
|
1136
|
|
|
else: |
|
1137
|
|
|
logger.debug("get_encoding Using cchardet") |
|
1138
|
|
|
method = "cchardet" |
|
1139
|
|
|
elif auto.lower() == "chardet": |
|
1140
|
|
|
import chardet |
|
1141
|
|
|
logger.debug("get_encoding Using chardet") |
|
1142
|
|
|
method = "chardet" |
|
1143
|
|
|
elif auto.lower() == "cchardet": |
|
1144
|
|
|
import cchardet as chardet |
|
1145
|
|
|
logger.debug("get_encoding Using cchardet") |
|
1146
|
|
|
method = "cchardet" |
|
1147
|
|
|
|
|
1148
|
|
|
result = chardet.detect(raw) |
|
1149
|
|
|
logger.debug("get_encoding %s results=%s" % (method, result)) |
|
1150
|
|
|
return result["encoding"] |
|
1151
|
|
|
|
|
1152
|
|
|
|
|
1153
|
|
|
def get_formatter_function(order, left_width=None, middle_width=None): |
|
1154
|
|
|
'''Create function to format a LAS header item. |
|
1155
|
|
|
|
|
1156
|
|
|
Arguments: |
|
1157
|
|
|
order: format of item, either "descr:value" or "value:descr" -- see |
|
1158
|
|
|
LAS 1.2 and 2.0 specifications for more information. |
|
1159
|
|
|
|
|
1160
|
|
|
Keyword Arguments: |
|
1161
|
|
|
left_width (int): number of characters to the left hand side of the |
|
1162
|
|
|
first period |
|
1163
|
|
|
middle_width (int): total number of characters minus 1 between the |
|
1164
|
|
|
first period from the left and the first colon from the left. |
|
1165
|
|
|
|
|
1166
|
|
|
Returns: |
|
1167
|
|
|
A function which takes a header item (e.g. LASHeaderItem or Curve) |
|
1168
|
|
|
as its single argument and which in turn returns a string which is |
|
1169
|
|
|
the correctly formatted LAS header line. |
|
1170
|
|
|
|
|
1171
|
|
|
''' |
|
1172
|
|
|
if left_width is None: |
|
1173
|
|
|
left_width = 10 |
|
1174
|
|
|
if middle_width is None: |
|
1175
|
|
|
middle_width = 40 |
|
1176
|
|
|
mnemonic_func = lambda mnemonic: mnemonic.ljust(left_width) |
|
1177
|
|
|
middle_func = lambda unit, right_hand_item: ( |
|
1178
|
|
|
unit |
|
1179
|
|
|
+ " " * (middle_width - len(str(unit)) - len(right_hand_item)) |
|
1180
|
|
|
+ right_hand_item |
|
1181
|
|
|
) |
|
1182
|
|
|
if order == "descr:value": |
|
1183
|
|
|
return lambda item: "%s.%s : %s" % ( |
|
1184
|
|
|
mnemonic_func(item.original_mnemonic), |
|
1185
|
|
|
middle_func(str(item.unit), str(item.descr)), |
|
1186
|
|
|
item.value |
|
1187
|
|
|
) |
|
1188
|
|
|
elif order == "value:descr": |
|
1189
|
|
|
return lambda item: "%s.%s : %s" % ( |
|
1190
|
|
|
mnemonic_func(item.original_mnemonic), |
|
1191
|
|
|
middle_func(str(item.unit), str(item.value)), |
|
1192
|
|
|
item.descr |
|
1193
|
|
|
) |
|
1194
|
|
|
|
|
1195
|
|
|
|
|
1196
|
|
|
def get_section_order_function(section, version, |
|
1197
|
|
|
order_definitions=ORDER_DEFINITIONS): |
|
1198
|
|
|
'''Get a function that returns the order per mnemonic and section. |
|
1199
|
|
|
|
|
1200
|
|
|
Arguments: |
|
1201
|
|
|
section (str): either "well", "params", "curves", "version" |
|
1202
|
|
|
version (float): either 1.2 and 2.0 |
|
1203
|
|
|
|
|
1204
|
|
|
Keyword Arguments: |
|
1205
|
|
|
order_definitions (dict): ... |
|
1206
|
|
|
|
|
1207
|
|
|
Returns: |
|
1208
|
|
|
A function which takes a mnemonic (str) as its only argument, and |
|
1209
|
|
|
in turn returns the order "value:descr" or "descr:value". |
|
1210
|
|
|
|
|
1211
|
|
|
''' |
|
1212
|
|
|
section_orders = order_definitions[version][section] |
|
1213
|
|
|
default_order = section_orders[0] |
|
1214
|
|
|
orders = {} |
|
1215
|
|
|
for order, mnemonics in section_orders[1:]: |
|
1216
|
|
|
for mnemonic in mnemonics: |
|
1217
|
|
|
orders[mnemonic] = order |
|
1218
|
|
|
return lambda mnemonic: orders.get(mnemonic, default_order) |
|
1219
|
|
|
|
|
1220
|
|
|
|
|
1221
|
|
|
def get_section_widths(section_name, items, version, order_func, middle_padding=5): |
|
1222
|
|
|
'''Find minimum section widths fitting the content in *items*. |
|
1223
|
|
|
|
|
1224
|
|
|
Arguments: |
|
1225
|
|
|
section_name (str): either "version", "well", "curves", or "params" |
|
1226
|
|
|
items (SectionItems): section items |
|
1227
|
|
|
version (float): either 1.2 or 2.0 |
|
1228
|
|
|
|
|
1229
|
|
|
''' |
|
1230
|
|
|
section_widths = { |
|
1231
|
|
|
"left_width": None, |
|
1232
|
|
|
"middle_width": None |
|
1233
|
|
|
} |
|
1234
|
|
|
if len(items) > 0: |
|
1235
|
|
|
section_widths["left_width"] = max([len(i.original_mnemonic) for i in items]) |
|
1236
|
|
|
middle_widths = [] |
|
1237
|
|
|
for i in items: |
|
1238
|
|
|
order = order_func(i.mnemonic) |
|
1239
|
|
|
rhs_element = order.split(':')[0] |
|
1240
|
|
|
logger.debug('get_section_widths %s\n\torder=%s rhs_element=%s' % (i, order, rhs_element)) |
|
1241
|
|
|
middle_widths.append(len(str(i.unit)) + 1 + len(str(i[rhs_element]))) |
|
1242
|
|
|
section_widths['middle_width'] = max(middle_widths) |
|
1243
|
|
|
return section_widths |
|
1244
|
|
|
|
|
1245
|
|
|
|
|
1246
|
|
|
def read(file_ref, **kwargs): |
|
1247
|
|
|
'''Read a LAS file. |
|
1248
|
|
|
|
|
1249
|
|
|
Note that only versions 1.2 and 2.0 of the LAS file specification |
|
1250
|
|
|
are currently supported. |
|
1251
|
|
|
|
|
1252
|
|
|
Arguments: |
|
1253
|
|
|
file_ref: either a filename, an open file object, or a string of |
|
1254
|
|
|
a LAS file contents. |
|
1255
|
|
|
|
|
1256
|
|
|
Keyword Arguments: |
|
1257
|
|
|
encoding (str): character encoding to open file_ref with |
|
1258
|
|
|
encoding_errors (str): "strict", "replace" (default), "ignore" - how to |
|
1259
|
|
|
handle errors with encodings (see standard library codecs module or |
|
1260
|
|
|
Python Unicode HOWTO for more information) |
|
1261
|
|
|
autodetect_encoding (bool): use chardet/ccharet to detect encoding |
|
1262
|
|
|
autodetect_encoding_chars (int/None): number of chars to read from LAS |
|
1263
|
|
|
file for auto-detection of encoding. |
|
1264
|
|
|
|
|
1265
|
|
|
Returns: |
|
1266
|
|
|
A LASFile object representing the file -- see above |
|
1267
|
|
|
|
|
1268
|
|
|
''' |
|
1269
|
|
|
return LASFile(file_ref, **kwargs) |
|
1270
|
|
|
|