1
|
|
|
'''las.py - read Log ASCII Standard files |
2
|
|
|
|
3
|
|
|
See README.rst and LICENSE for more information. |
4
|
|
|
|
5
|
|
|
''' |
6
|
|
|
from __future__ import print_function |
7
|
|
|
|
8
|
|
|
# Standard library packages |
9
|
|
|
import codecs |
10
|
|
|
import json |
11
|
|
|
import logging |
12
|
|
|
import os |
13
|
|
|
import re |
14
|
|
|
import textwrap |
15
|
|
|
import traceback |
16
|
|
|
|
17
|
|
|
# The standard library OrderedDict was introduced in Python 2.7 so |
18
|
|
|
# we have a third-party option to support Python 2.6 |
19
|
|
|
|
20
|
|
|
try: |
21
|
|
|
from collections import OrderedDict |
22
|
|
|
except ImportError: |
23
|
|
|
from ordereddict import OrderedDict |
24
|
|
|
|
25
|
|
|
# Convoluted import for StringIO in order to support: |
26
|
|
|
# |
27
|
|
|
# - Python 3 - io.StringIO |
28
|
|
|
# - Python 2 (optimized) - cStringIO.StringIO |
29
|
|
|
# - Python 2 (all) - StringIO.StringIO |
30
|
|
|
|
31
|
|
|
try: |
32
|
|
|
import cStringIO as StringIO |
33
|
|
|
except ImportError: |
34
|
|
|
try: # cStringIO not available on this system |
35
|
|
|
import StringIO |
36
|
|
|
except ImportError: # Python 3 |
37
|
|
|
from io import StringIO |
38
|
|
|
else: |
39
|
|
|
from StringIO import StringIO |
40
|
|
|
else: |
41
|
|
|
from StringIO import StringIO |
42
|
|
|
|
43
|
|
|
# get basestring in py3 |
44
|
|
|
|
45
|
|
|
try: |
46
|
|
|
unicode = unicode |
47
|
|
|
except NameError: |
48
|
|
|
# 'unicode' is undefined, must be Python 3 |
49
|
|
|
str = str |
50
|
|
|
unicode = str |
51
|
|
|
bytes = bytes |
52
|
|
|
basestring = (str,bytes) |
53
|
|
|
else: |
54
|
|
|
# 'unicode' exists, must be Python 2 |
55
|
|
|
str = str |
56
|
|
|
unicode = unicode |
57
|
|
|
bytes = str |
58
|
|
|
basestring = basestring |
59
|
|
|
|
60
|
|
|
# Required third-party packages available on PyPi: |
61
|
|
|
|
62
|
|
|
from namedlist import namedlist |
63
|
|
|
import numpy |
64
|
|
|
|
65
|
|
|
# Optional third-party packages available on PyPI are mostly |
66
|
|
|
# imported inline below. |
67
|
|
|
|
68
|
|
|
|
69
|
|
|
logger = logging.getLogger(__name__) |
70
|
|
|
|
71
|
|
|
__version__ = '0.10' |
72
|
|
|
|
73
|
|
|
|
74
|
|
|
ORDER_DEFINITIONS = { |
75
|
|
|
1.2: OrderedDict([ |
76
|
|
|
("Version", ["value:descr"]), |
77
|
|
|
("Well", [ |
78
|
|
|
"descr:value", |
79
|
|
|
("value:descr", ["STRT", "STOP", "STEP", "NULL"])]), |
80
|
|
|
("Curves", ["value:descr"]), |
81
|
|
|
("Parameter", ["value:descr"]), |
82
|
|
|
]), |
83
|
|
|
2.0: OrderedDict([ |
84
|
|
|
("Version", ["value:descr"]), |
85
|
|
|
("Well", ["value:descr"]), |
86
|
|
|
("Curves", ["value:descr"]), |
87
|
|
|
("Parameter", ["value:descr"]) |
88
|
|
|
])} |
89
|
|
|
|
90
|
|
|
URL_REGEXP = re.compile( |
91
|
|
|
r'^(?:http|ftp)s?://' # http:// or https:// |
92
|
|
|
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}' |
93
|
|
|
r'\.?|[A-Z0-9-]{2,}\.?)|' # (cont.) domain... |
94
|
|
|
r'localhost|' # localhost... |
95
|
|
|
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip |
96
|
|
|
r'(?::\d+)?' # optional port |
97
|
|
|
r'(?:/?|[/?]\S+)$', re.IGNORECASE) |
98
|
|
|
|
99
|
|
|
NULLS_COMMON_NUMERIC = [999.25, -999.25, 9999.25, -9999.25, 0, -999, 999, 9999, -9999, 2147483647, -2147483647, 32767, -32767] |
100
|
|
|
|
101
|
|
|
NULLS_AGGRESSIVE_NUMERIC = [0] |
102
|
|
|
|
103
|
|
|
# Expressions for use in re.sub |
104
|
|
|
|
105
|
|
|
NULLS_COMMON_ALPHA = [ |
106
|
|
|
r'(#N/A)[ ]', r'[ ](#N/A)', # matches #N/A |
107
|
|
|
r'(-?1\.#INF)[ ]', r'[ ](-?1\.#INF)', # matches 1.#INF -1.#INF |
108
|
|
|
r'(-?1\.#IO)[ ]', r'[ ](-?1\.#IO)', # matches 1.#IO -1.#IO |
109
|
|
|
r'(-?1\.#IND)[ ]', r'[ ](-?1\.#IND)', # matches 1.#IND -1.#IND |
110
|
|
|
] |
111
|
|
|
NULLS_AGGRESSIVE_ALPHA = [ |
112
|
|
|
r'([^0-9.\-+]+)[ ]', # matches - not a float (trailing space/newline) |
113
|
|
|
r'[ ]([^0-9.\-+]+)', # matches - not a float (leading space/newline) |
114
|
|
|
] |
115
|
|
|
# Generally this would be a bad idea because these files |
116
|
|
|
# ought to raise an exception and be manually fixed. |
117
|
|
|
# But - that's why this mode is called "aggressive". |
118
|
|
|
|
119
|
|
|
|
120
|
|
|
|
121
|
|
|
class LASDataError(Exception): |
122
|
|
|
|
123
|
|
|
'''Error during reading of numerical data from LAS file.''' |
124
|
|
|
pass |
125
|
|
|
|
126
|
|
|
|
127
|
|
|
class LASHeaderError(Exception): |
128
|
|
|
|
129
|
|
|
'''Error during reading of header data from LAS file.''' |
130
|
|
|
pass |
131
|
|
|
|
132
|
|
|
|
133
|
|
|
class LASUnknownUnitError(Exception): |
134
|
|
|
|
135
|
|
|
'''Error of unknown unit in LAS file.''' |
136
|
|
|
pass |
137
|
|
|
|
138
|
|
|
|
139
|
|
|
class HeaderItem(OrderedDict): |
140
|
|
|
def __init__(self, mnemonic, unit="", value="", descr=""): |
141
|
|
|
super(HeaderItem, self).__init__() |
142
|
|
|
|
143
|
|
|
# The original mnemonic needs to be stored for rewriting a new file. |
144
|
|
|
# it might be nothing - '' - or a duplicate e.g. two 'RHO' curves, |
145
|
|
|
# or unique - 'X11124' - or perhaps invalid?? |
146
|
|
|
|
147
|
|
|
self.original_mnemonic = mnemonic |
148
|
|
|
|
149
|
|
|
# We also need to store a more useful mnemonic, which will be used |
150
|
|
|
# (technically not, but read on) for people to access the curve while |
151
|
|
|
# the LASFile object exists. For example, a curve which is unnamed |
152
|
|
|
# and has the mnemonic '' will be accessed via 'UNKNOWN'. |
153
|
|
|
|
154
|
|
|
if mnemonic.strip() == '': |
155
|
|
|
self.useful_mnemonic = 'UNKNOWN' |
156
|
|
|
else: |
157
|
|
|
self.useful_mnemonic = mnemonic |
158
|
|
|
|
159
|
|
|
# But note that we need to (later) check (repeatedly) for duplicate |
160
|
|
|
# mnemonics. Any duplicates will have ':1', ':2', ':3', etc., appended |
161
|
|
|
# to them. The result of this will be stored in the below variable, |
162
|
|
|
# which is what the user should actually see and use 99.5% of the time. |
163
|
|
|
|
164
|
|
|
self.mnemonic = self.useful_mnemonic |
165
|
|
|
|
166
|
|
|
self.unit = unit |
167
|
|
|
self.value = value |
168
|
|
|
self.descr = descr |
169
|
|
|
|
170
|
|
|
def __getitem__(self, key): |
171
|
|
|
if key == 'mnemonic': |
172
|
|
|
return self.mnemonic |
173
|
|
|
elif key == 'original_mnemonic': |
174
|
|
|
return self.original_mnemonic |
175
|
|
|
elif key == 'useful_mnemonic': |
176
|
|
|
return self.useful_mnemonic |
177
|
|
|
elif key == 'unit': |
178
|
|
|
return self.unit |
179
|
|
|
elif key == 'value': |
180
|
|
|
return self.value |
181
|
|
|
elif key == 'descr': |
182
|
|
|
return self.descr |
183
|
|
|
else: |
184
|
|
|
raise KeyError('CurveItem only has restricted items (not %s)' % key) |
185
|
|
|
|
186
|
|
|
def __repr__(self): |
187
|
|
|
return ( |
188
|
|
|
"%s(mnemonic=%s, unit=%s, value=%s, " |
189
|
|
|
"descr=%s, original_mnemonic=%s)" % ( |
190
|
|
|
self.__class__.__name__, self.mnemonic, self.unit, self.value, |
191
|
|
|
self.descr, self.original_mnemonic)) |
192
|
|
|
|
193
|
|
|
def _repr_pretty_(self, p, cycle): |
194
|
|
|
return p.text(self.__repr__()) |
195
|
|
|
|
196
|
|
|
|
197
|
|
|
class CurveItem(HeaderItem): |
198
|
|
|
def __init__(self, *args, **kwargs): |
199
|
|
|
self.data = numpy.ndarray([]) |
200
|
|
|
super(CurveItem, self).__init__(*args, **kwargs) |
201
|
|
|
|
202
|
|
|
@property |
203
|
|
|
def API_code(self): |
204
|
|
|
return self.value |
205
|
|
|
|
206
|
|
|
def __repr__(self): |
207
|
|
|
return ( |
208
|
|
|
"%s(mnemonic=%s, unit=%s, value=%s, " |
209
|
|
|
"descr=%s, original_mnemonic=%s, data.shape=%s)" % ( |
210
|
|
|
self.__class__.__name__, self.mnemonic, self.unit, self.value, |
211
|
|
|
self.descr, self.original_mnemonic, self.data.shape)) |
212
|
|
|
|
213
|
|
|
|
214
|
|
|
class SectionItems(list): |
215
|
|
|
|
216
|
|
|
def __contains__(self, testitem): |
217
|
|
|
'''Allows testing of a mnemonic or an actual item.''' |
218
|
|
|
for item in self: |
219
|
|
|
if testitem == item.mnemonic: |
220
|
|
|
return True |
221
|
|
|
elif hasattr(testitem, 'mnemonic'): |
222
|
|
|
if testitem.mnemonic == item.mnemonic: |
223
|
|
|
return True |
224
|
|
|
elif testitem is item: |
225
|
|
|
return True |
226
|
|
|
else: |
227
|
|
|
return False |
228
|
|
|
|
229
|
|
|
def keys(self): |
230
|
|
|
return [item.mnemonic for item in self] |
231
|
|
|
|
232
|
|
|
def values(self): |
233
|
|
|
return self |
234
|
|
|
|
235
|
|
|
def items(self): |
236
|
|
|
return [(item.mnemonic, item) for item in self] |
237
|
|
|
|
238
|
|
|
def iterkeys(self): |
239
|
|
|
return iter(self.keys()) |
240
|
|
|
|
241
|
|
|
def itervalues(self): |
242
|
|
|
return iter(self) |
243
|
|
|
|
244
|
|
|
def iteritems(self): |
245
|
|
|
return iter(self.items()) |
246
|
|
|
|
247
|
|
|
def __getitem__(self, key): |
248
|
|
|
for item in self: |
249
|
|
|
if item.mnemonic == key: |
250
|
|
|
return item |
251
|
|
|
if isinstance(key, int): |
252
|
|
|
return super(SectionItems, self).__getitem__(key) |
253
|
|
|
else: |
254
|
|
|
raise KeyError("%s not in %s" % (key, self.keys())) |
255
|
|
|
|
256
|
|
|
def __setitem__(self, key, newitem): |
257
|
|
|
if isinstance(newitem, HeaderItem): |
258
|
|
|
self.set_item(key, newitem) |
259
|
|
|
else: |
260
|
|
|
self.set_item_value(key, newitem) |
261
|
|
|
|
262
|
|
|
def __getattr__(self, key): |
263
|
|
|
if key in self: |
264
|
|
|
return self[key] |
265
|
|
|
else: |
266
|
|
|
super(SectionItems, self).__getattr__(key) |
267
|
|
|
|
268
|
|
|
def __setattr__(self, key, value): |
269
|
|
|
if key in self: |
270
|
|
|
self[key] = value |
271
|
|
|
else: |
272
|
|
|
super(SectionItems, self).__setattr__(key, value) |
273
|
|
|
|
274
|
|
|
def set_item(self, key, newitem): |
275
|
|
|
for i, item in enumerate(self): |
276
|
|
|
if key == item.mnemonic: |
277
|
|
|
|
278
|
|
|
# This is very important. We replace items where |
279
|
|
|
# 'mnemonic' is equal - i.e. we do not check useful_mnemonic |
280
|
|
|
# or original_mnemonic. Is this correct? Needs to thought |
281
|
|
|
# about and tested more carefully. |
282
|
|
|
|
283
|
|
|
logger.debug('SectionItems.__setitem__ Replaced %s item' % key) |
284
|
|
|
return super(SectionItems, self).__setitem__(i, newitem) |
285
|
|
|
else: |
286
|
|
|
self.append(newitem) |
287
|
|
|
|
288
|
|
|
def set_item_value(self, key, value): |
289
|
|
|
self[key].value = value |
290
|
|
|
|
291
|
|
|
def append(self, newitem): |
292
|
|
|
'''Check to see if the item's mnemonic needs altering.''' |
293
|
|
|
logger.debug("SectionItems.append type=%s str=%s" % (type(newitem), newitem)) |
294
|
|
|
super(SectionItems, self).append(newitem) |
295
|
|
|
|
296
|
|
|
# Check to fix the :n suffixes |
297
|
|
|
existing = [item.useful_mnemonic for item in self] |
298
|
|
|
locations = [] |
299
|
|
|
for i, item in enumerate(self): |
300
|
|
|
if item.useful_mnemonic == newitem.mnemonic: |
301
|
|
|
locations.append(i) |
302
|
|
|
if len(locations) > 1: |
303
|
|
|
current_count = 1 |
304
|
|
|
for i, loc in enumerate(locations): |
305
|
|
|
item = self[loc] |
306
|
|
|
# raise Exception("%s" % str(type(item))) |
307
|
|
|
item.mnemonic = item.useful_mnemonic + ":%d" % (i + 1) |
308
|
|
|
|
309
|
|
|
def dictview(self): |
310
|
|
|
return dict(zip(self.keys(), [i.value for i in self.values()])) |
311
|
|
|
|
312
|
|
|
# def __repr__(self): |
313
|
|
|
# return ( |
314
|
|
|
# "{cls}({contents})".format( |
315
|
|
|
# cls=self.__class__.__name__, |
316
|
|
|
# contents=', '.join([str(item) for item in self]))) |
317
|
|
|
|
318
|
|
|
|
319
|
|
|
class JSONEncoder(json.JSONEncoder): |
320
|
|
|
|
321
|
|
|
def default(self, obj): |
322
|
|
|
if isinstance(obj, LASFile): |
323
|
|
|
d = {'metadata': {}, |
324
|
|
|
'data': {}} |
325
|
|
|
for name, section in obj.sections.items(): |
326
|
|
|
if isinstance(section, basestring): |
327
|
|
|
d['metadata'][name] = section |
328
|
|
|
else: |
329
|
|
|
d['metadata'][name] = [] |
330
|
|
|
for item in section: |
331
|
|
|
d['metadata'][name].append(dict(item)) |
332
|
|
|
for curve in obj.curves: |
333
|
|
|
d['data'][curve.mnemonic] = list(curve.data) |
334
|
|
|
return d |
335
|
|
|
|
336
|
|
|
|
337
|
|
|
|
338
|
|
|
DEFAULT_ITEMS = { |
339
|
|
|
"Version": SectionItems([ |
340
|
|
|
HeaderItem("VERS", "", 2.0, "CWLS log ASCII Standard -VERSION 2.0"), |
341
|
|
|
HeaderItem("WRAP", "", "NO", "One line per depth step"), |
342
|
|
|
HeaderItem("DLM", "", "SPACE", "Column Data Section Delimiter"), |
343
|
|
|
]), |
344
|
|
|
"Well": SectionItems([ |
345
|
|
|
HeaderItem("STRT", "m", numpy.nan, "START DEPTH"), |
346
|
|
|
HeaderItem("STOP", "m", numpy.nan, "STOP DEPTH"), |
347
|
|
|
HeaderItem("STEP", "m", numpy.nan, "STEP"), |
348
|
|
|
HeaderItem("NULL", "", -9999.25, "NULL VALUE"), |
349
|
|
|
HeaderItem("COMP", "", "", "COMPANY"), |
350
|
|
|
HeaderItem("WELL", "", "", "WELL"), |
351
|
|
|
HeaderItem("FLD", "", "", "FIELD"), |
352
|
|
|
HeaderItem("LOC", "", "", "LOCATION"), |
353
|
|
|
HeaderItem("PROV", "", "", "PROVINCE"), |
354
|
|
|
HeaderItem("CNTY", "", "", "COUNTY"), |
355
|
|
|
HeaderItem("STAT", "", "", "STATE"), |
356
|
|
|
HeaderItem("CTRY", "", "", "COUNTRY"), |
357
|
|
|
HeaderItem("SRVC", "", "", "SERVICE COMPANY"), |
358
|
|
|
HeaderItem("DATE", "", "", "DATE"), |
359
|
|
|
HeaderItem("UWI", "", "", "UNIQUE WELL ID"), |
360
|
|
|
HeaderItem("API", "", "", "API NUMBER") |
361
|
|
|
]), |
362
|
|
|
"Curves": SectionItems([]), |
363
|
|
|
"Parameter": SectionItems([]), |
364
|
|
|
"Other": "", |
365
|
|
|
"Data": numpy.zeros(shape=(0, 1)), |
366
|
|
|
} |
367
|
|
|
|
368
|
|
|
|
369
|
|
|
|
370
|
|
|
class LASFile(object): |
371
|
|
|
|
372
|
|
|
'''LAS file object. |
373
|
|
|
|
374
|
|
|
Keyword Arguments: |
375
|
|
|
file_ref: either a filename, an open file object, or a string of |
376
|
|
|
a LAS file contents. |
377
|
|
|
encoding (str): character encoding to open file_ref with |
378
|
|
|
encoding_errors (str): "strict", "replace" (default), "ignore" - how to |
379
|
|
|
handle errors with encodings (see standard library codecs module or |
380
|
|
|
Python Unicode HOWTO for more information) |
381
|
|
|
autodetect_encoding (bool): use chardet/ccharet to detect encoding |
382
|
|
|
autodetect_encoding_chars (int/None): number of chars to read from LAS |
383
|
|
|
file for auto-detection of encoding. |
384
|
|
|
|
385
|
|
|
''' |
386
|
|
|
def __init__(self, file_ref=None, **kwargs): |
387
|
|
|
|
388
|
|
|
self._text = '' |
389
|
|
|
self._use_pandas = "auto" |
390
|
|
|
self.index_unit = None |
391
|
|
|
self.sections = { |
392
|
|
|
"Version": DEFAULT_ITEMS["Version"], |
393
|
|
|
"Well": DEFAULT_ITEMS["Well"], |
394
|
|
|
"Curves": DEFAULT_ITEMS["Curves"], |
395
|
|
|
"Parameter": DEFAULT_ITEMS["Parameter"], |
396
|
|
|
"Other": str(DEFAULT_ITEMS["Other"]), |
397
|
|
|
} |
398
|
|
|
|
399
|
|
|
if not (file_ref is None): |
400
|
|
|
self.read(file_ref, **kwargs) |
401
|
|
|
|
402
|
|
|
def read(self, file_ref, use_pandas="auto", null_policy='common', **kwargs): |
403
|
|
|
'''Read a LAS file. |
404
|
|
|
|
405
|
|
|
Arguments: |
406
|
|
|
file_ref: either a filename, an open file object, or a string of |
407
|
|
|
a LAS file contents. |
408
|
|
|
|
409
|
|
|
Keyword Arguments: |
410
|
|
|
use_pandas (str): bool or "auto" -- use pandas if available -- provide |
411
|
|
|
False option for faster loading where pandas functionality is not |
412
|
|
|
needed. "auto" becomes True if pandas is installed, and False if not. |
413
|
|
|
null_policy (str): either None, 'NULL', 'common' or 'aggressive' -- |
414
|
|
|
see https://github.com/kinverarity1/lasio/issues/49#issuecomment-127980359 |
415
|
|
|
encoding (str): character encoding to open file_ref with |
416
|
|
|
encoding_errors (str): "strict", "replace" (default), "ignore" - how to |
417
|
|
|
handle errors with encodings (see standard library codecs module or |
418
|
|
|
Python Unicode HOWTO for more information) |
419
|
|
|
autodetect_encoding (bool): use chardet/cchardet to detect encoding |
420
|
|
|
autodetect_encoding_chars (int/None): number of chars to read from LAS |
421
|
|
|
file for auto-detection of encoding. |
422
|
|
|
|
423
|
|
|
''' |
424
|
|
|
if not use_pandas is None: |
425
|
|
|
self._use_pandas = use_pandas |
426
|
|
|
|
427
|
|
|
f = open_file(file_ref, **kwargs) |
428
|
|
|
|
429
|
|
|
self._text = f.read() |
430
|
|
|
logger.debug("LASFile.read LAS content is type %s" % type(self._text)) |
431
|
|
|
|
432
|
|
|
reader = Reader(self._text, version=1.2) |
433
|
|
|
self.sections["Version"] = reader.read_section('~V') |
434
|
|
|
|
435
|
|
|
# Set version |
436
|
|
|
try: |
437
|
|
|
# raise Exception("%s %s" % (type(self.version['VERS']), self.version["VERS"])) |
438
|
|
|
reader.version = self.version['VERS'].value |
439
|
|
|
except KeyError: |
440
|
|
|
raise KeyError("No key VERS in ~V section") |
441
|
|
|
|
442
|
|
|
# Validate version |
443
|
|
|
try: |
444
|
|
|
assert reader.version in (1.2, 2) |
445
|
|
|
except AssertionError: |
446
|
|
|
logger.warning("LAS spec version is %s -- neither 1.2 nor 2" % |
447
|
|
|
reader.version) |
448
|
|
|
if reader.version < 2: |
449
|
|
|
reader.version = 1.2 |
450
|
|
|
else: |
451
|
|
|
reader.version = 2 |
452
|
|
|
reader.wrap = self.version['WRAP'].value == 'YES' |
453
|
|
|
|
454
|
|
|
self.sections["Well"] = reader.read_section('~W') |
455
|
|
|
self.sections["Curves"] = reader.read_section('~C') |
456
|
|
|
try: |
457
|
|
|
self.sections["Parameter"] = reader.read_section('~P') |
458
|
|
|
except LASHeaderError: |
459
|
|
|
logger.warning(traceback.format_exc().splitlines()[-1]) |
460
|
|
|
self.sections["Other"] = reader.read_raw_text('~O') |
461
|
|
|
|
462
|
|
|
# Set null value |
463
|
|
|
reader.null = self.well['NULL'].value |
464
|
|
|
|
465
|
|
|
data = reader.read_data(len(self.curves), null_policy=null_policy) |
466
|
|
|
|
467
|
|
|
for i, c in enumerate(self.curves): |
468
|
|
|
d = data[:, i] |
469
|
|
|
c.data = d |
470
|
|
|
|
471
|
|
|
if (self.well["STRT"].unit.upper() == "M" and |
472
|
|
|
self.well["STOP"].unit.upper() == "M" and |
473
|
|
|
self.well["STEP"].unit.upper() == "M" and |
474
|
|
|
self.curves[0].unit.upper() == "M"): |
475
|
|
|
self.index_unit = "M" |
476
|
|
|
elif (self.well["STRT"].unit.upper() in ("F", "FT") and |
477
|
|
|
self.well["STOP"].unit.upper() in ("F", "FT") and |
478
|
|
|
self.well["STEP"].unit.upper() in ("F", "FT") and |
479
|
|
|
self.curves[0].unit.upper() in ("F", "FT")): |
480
|
|
|
self.index_unit = "FT" |
481
|
|
|
|
482
|
|
|
self.refresh() |
483
|
|
|
|
484
|
|
|
def refresh(self, use_pandas=None): |
485
|
|
|
'''Refresh curve names and indices.''' |
486
|
|
|
if not use_pandas is None: |
487
|
|
|
self._use_pandas = use_pandas |
488
|
|
|
|
489
|
|
|
# n = len(self.curves) |
490
|
|
|
# for i, curve in enumerate(self.curves): |
491
|
|
|
# self[curve.mnemonic] = curve.data |
492
|
|
|
# self[i] = curve.data |
493
|
|
|
# self[i - n] = curve.data |
494
|
|
|
|
495
|
|
|
if not self._use_pandas is False: |
496
|
|
|
try: |
497
|
|
|
import pandas |
498
|
|
|
except ImportError: |
499
|
|
|
logger.info( |
500
|
|
|
"pandas not installed - skipping LASFile.df creation") |
501
|
|
|
self._use_pandas = False |
502
|
|
|
|
503
|
|
|
if self._use_pandas: |
504
|
|
|
self.df = pandas.DataFrame(self.data, columns=self.keys()) |
505
|
|
|
self.df.set_index(self.curves[0].mnemonic, inplace=True) |
506
|
|
|
|
507
|
|
|
@property |
508
|
|
|
def data(self): |
509
|
|
|
'''2D array of data from LAS file.''' |
510
|
|
|
return numpy.vstack([c.data for c in self.curves]).T |
511
|
|
|
|
512
|
|
|
def write(self, file_object, version=None, wrap=None, |
513
|
|
|
STRT=None, STOP=None, STEP=None, fmt="%10.5g"): |
514
|
|
|
'''Write to a file. |
515
|
|
|
|
516
|
|
|
Arguments: |
517
|
|
|
file_object: a file_like object opening for writing. |
518
|
|
|
version (float): either 1.2 or 2 |
519
|
|
|
wrap (bool): True, False, or None (last uses WRAP item in version) |
520
|
|
|
STRT (float): optional override to automatic calculation using |
521
|
|
|
the first index curve value. |
522
|
|
|
STOP (float): optional override to automatic calculation using |
523
|
|
|
the last index curve value. |
524
|
|
|
STEP (float): optional override to automatic calculation using |
525
|
|
|
the first step size in the index curve. |
526
|
|
|
fmt (str): format string for numerical data being written to data |
527
|
|
|
section. |
528
|
|
|
|
529
|
|
|
Examples: |
530
|
|
|
|
531
|
|
|
>>> with open("test_output.las", mode="w") as f: |
532
|
|
|
... lasfile_obj.write(f, 2.0) # <-- this method |
533
|
|
|
|
534
|
|
|
''' |
535
|
|
|
if wrap is None: |
536
|
|
|
wrap = self.version["WRAP"] == "YES" |
537
|
|
|
elif wrap is True: |
538
|
|
|
self.version["WRAP"] = HeaderItem( |
539
|
|
|
"WRAP", "", "YES", "Multiple lines per depth step") |
540
|
|
|
elif wrap is False: |
541
|
|
|
self.version["WRAP"] = HeaderItem( |
542
|
|
|
"WRAP", "", "NO", "One line per depth step") |
543
|
|
|
lines = [] |
544
|
|
|
|
545
|
|
|
assert version in (1.2, 2, None) |
546
|
|
|
if version is None: |
547
|
|
|
version = self.version["VERS"].value |
548
|
|
|
if version == 1.2: |
549
|
|
|
self.version["VERS"] = HeaderItem( |
550
|
|
|
"VERS", "", 1.2, "CWLS LOG ASCII STANDARD - VERSION 1.2") |
551
|
|
|
elif version == 2: |
552
|
|
|
self.version["VERS"] = HeaderItem( |
553
|
|
|
"VERS", "", 2.0, "CWLS log ASCII Standard -VERSION 2.0") |
554
|
|
|
|
555
|
|
|
if STRT is None: |
556
|
|
|
STRT = self.index[0] |
557
|
|
|
if STOP is None: |
558
|
|
|
STOP = self.index[-1] |
559
|
|
|
if STEP is None: |
560
|
|
|
STEP = self.index[1] - self.index[0] # Faster than numpy.gradient |
561
|
|
|
self.well["STRT"].value = STRT |
562
|
|
|
self.well["STOP"].value = STOP |
563
|
|
|
self.well["STEP"].value = STEP |
564
|
|
|
|
565
|
|
|
|
566
|
|
|
# Check for any changes in the pandas dataframe and if there are, |
567
|
|
|
# create new curves so they are reflected in the output LAS file. |
568
|
|
|
|
569
|
|
|
if self._use_pandas: |
570
|
|
|
curve_names = lambda: [ci.mnemonic for ci in self.curves] |
571
|
|
|
for df_curve_name in list(self.df.columns.values): |
572
|
|
|
if not df_curve_name in curve_names(): |
573
|
|
|
self.add_curve(df_curve_name, self.df[df_curve_name]) |
574
|
|
|
|
575
|
|
|
# Write each section. |
576
|
|
|
|
577
|
|
|
# ~Version |
578
|
|
|
logger.debug('LASFile.write Version section') |
579
|
|
|
lines.append("~Version ".ljust(60, "-")) |
580
|
|
|
order_func = get_section_order_function("Version", version) |
581
|
|
|
section_widths = get_section_widths("Version", self.version, version, order_func) |
582
|
|
|
for header_item in self.version.values(): |
583
|
|
|
mnemonic = header_item.original_mnemonic |
584
|
|
|
# logger.debug("LASFile.write " + str(header_item)) |
585
|
|
|
order = order_func(mnemonic) |
586
|
|
|
# logger.debug("LASFile.write order = %s" % (order, )) |
587
|
|
|
logger.debug('LASFile.write %s\norder=%s section_widths=%s' % (header_item, order, section_widths)) |
588
|
|
|
formatter_func = get_formatter_function(order, **section_widths) |
589
|
|
|
line = formatter_func(header_item) |
590
|
|
|
lines.append(line) |
591
|
|
|
|
592
|
|
|
# ~Well |
593
|
|
|
logger.debug('LASFile.write Well section') |
594
|
|
|
lines.append("~Well ".ljust(60, "-")) |
595
|
|
|
order_func = get_section_order_function("Well", version) |
596
|
|
|
section_widths = get_section_widths("Well", self.well, version, order_func) |
597
|
|
|
# logger.debug('LASFile.write well section_widths=%s' % section_widths) |
598
|
|
|
for header_item in self.well.values(): |
599
|
|
|
mnemonic = header_item.original_mnemonic |
600
|
|
|
order = order_func(mnemonic) |
601
|
|
|
logger.debug('LASFile.write %s\norder=%s section_widths=%s' % (header_item, order, section_widths)) |
602
|
|
|
formatter_func = get_formatter_function(order, **section_widths) |
603
|
|
|
line = formatter_func(header_item) |
604
|
|
|
lines.append(line) |
605
|
|
|
|
606
|
|
|
# ~Curves |
607
|
|
|
logger.debug('LASFile.write Curves section') |
608
|
|
|
lines.append("~Curves ".ljust(60, "-")) |
609
|
|
|
order_func = get_section_order_function("Curves", version) |
610
|
|
|
section_widths = get_section_widths("Curves", self.curves, version, order_func) |
611
|
|
|
for header_item in self.curves: |
612
|
|
|
mnemonic = header_item.original_mnemonic |
613
|
|
|
order = order_func(mnemonic) |
614
|
|
|
formatter_func = get_formatter_function(order, **section_widths) |
615
|
|
|
line = formatter_func(header_item) |
616
|
|
|
lines.append(line) |
617
|
|
|
|
618
|
|
|
# ~Params |
619
|
|
|
lines.append("~Params ".ljust(60, "-")) |
620
|
|
|
order_func = get_section_order_function("Parameter", version) |
621
|
|
|
section_widths = get_section_widths("Parameter", self.params, version, order_func) |
622
|
|
|
for header_item in self.params.values(): |
623
|
|
|
mnemonic = header_item.original_mnemonic |
624
|
|
|
order = order_func(mnemonic) |
625
|
|
|
formatter_func = get_formatter_function(order, **section_widths) |
626
|
|
|
line = formatter_func(header_item) |
627
|
|
|
lines.append(line) |
628
|
|
|
|
629
|
|
|
# ~Other |
630
|
|
|
lines.append("~Other ".ljust(60, "-")) |
631
|
|
|
lines += self.other.splitlines() |
632
|
|
|
|
633
|
|
|
lines.append("~ASCII ".ljust(60, "-")) |
634
|
|
|
|
635
|
|
|
file_object.write("\n".join(lines)) |
636
|
|
|
file_object.write("\n") |
637
|
|
|
|
638
|
|
|
data_arr = numpy.column_stack([c.data for c in self.curves]) |
639
|
|
|
nrows, ncols = data_arr.shape |
640
|
|
|
|
641
|
|
|
def format_data_section_line(n, fmt, l=10, spacer=" "): |
642
|
|
|
if numpy.isnan(n): |
643
|
|
|
return spacer + str(self.well["NULL"].value).rjust(l) |
644
|
|
|
else: |
645
|
|
|
return spacer + (fmt % n).rjust(l) |
646
|
|
|
|
647
|
|
|
twrapper = textwrap.TextWrapper(width=79) |
648
|
|
|
for i in range(nrows): |
649
|
|
|
depth_slice = '' |
650
|
|
|
for j in range(ncols): |
651
|
|
|
depth_slice += format_data_section_line(data_arr[i, j], fmt) |
652
|
|
|
|
653
|
|
|
if wrap: |
654
|
|
|
lines = twrapper.wrap(depth_slice) |
655
|
|
|
logger.debug("LASFile.write Wrapped %d lines out of %s" % |
656
|
|
|
(len(lines), depth_slice)) |
657
|
|
|
else: |
658
|
|
|
lines = [depth_slice] |
659
|
|
|
|
660
|
|
|
if self.version["VERS"].value == 1.2: |
661
|
|
|
for line in lines: |
662
|
|
|
if len(line) > 255: |
663
|
|
|
logger.warning("LASFile.write Data line > 256 chars: %s" % line) |
664
|
|
|
|
665
|
|
|
for line in lines: |
666
|
|
|
file_object.write(line + "\n") |
667
|
|
|
|
668
|
|
|
def get_curve(self, mnemonic): |
669
|
|
|
'''Return Curve object. |
670
|
|
|
|
671
|
|
|
Arguments: |
672
|
|
|
mnemonic (str): the name of the curve |
673
|
|
|
|
674
|
|
|
Returns: |
675
|
|
|
A Curve object, not just the data array. |
676
|
|
|
|
677
|
|
|
''' |
678
|
|
|
for curve in self.curves: |
679
|
|
|
if curve.mnemonic == mnemonic: |
680
|
|
|
return curve |
681
|
|
|
|
682
|
|
|
# def __getattr__(self, key): |
683
|
|
|
# # if hasattr(self, 'sections'): |
684
|
|
|
# # if key in self.sections['Curves']: |
685
|
|
|
# # return self[key] |
686
|
|
|
# # else: |
687
|
|
|
# # raise AttributeError |
688
|
|
|
# pass |
689
|
|
|
|
690
|
|
|
def __getitem__(self, key): |
691
|
|
|
if isinstance(key, int): |
692
|
|
|
return self.curves[key].data |
693
|
|
|
elif isinstance(key, str): |
694
|
|
|
if key in self.keys(): |
695
|
|
|
return self.curves[key].data |
696
|
|
|
else: |
697
|
|
|
super(LASFile, self).__getitem__(key) |
698
|
|
|
|
699
|
|
|
# def __setattr__(self, key, value): |
700
|
|
|
# assert NotImplementedError('not yet') |
701
|
|
|
|
702
|
|
|
def __setitem__(self, key, value): |
703
|
|
|
assert NotImplementedError('not yet') |
704
|
|
|
|
705
|
|
|
def keys(self): |
706
|
|
|
return [c.mnemonic for c in self.curves] |
707
|
|
|
|
708
|
|
|
def values(self): |
709
|
|
|
return [c.data for c in self.curves] |
710
|
|
|
|
711
|
|
|
def items(self): |
712
|
|
|
return [(c.mnemonic, c.data) for c in self.curves] |
713
|
|
|
|
714
|
|
|
def iterkeys(self): |
715
|
|
|
return iter(list(self.keys())) |
716
|
|
|
|
717
|
|
|
def itervalues(self): |
718
|
|
|
return iter(list(self.values())) |
719
|
|
|
|
720
|
|
|
def iteritems(self): |
721
|
|
|
return iter(list(self.items())) |
722
|
|
|
|
723
|
|
|
@property |
724
|
|
|
def version(self): |
725
|
|
|
return self.sections["Version"] |
726
|
|
|
|
727
|
|
|
@version.setter |
728
|
|
|
def version(self, section): |
729
|
|
|
self.sections["Version"] = section |
730
|
|
|
|
731
|
|
|
@property |
732
|
|
|
def well(self): |
733
|
|
|
return self.sections["Well"] |
734
|
|
|
|
735
|
|
|
@well.setter |
736
|
|
|
def well(self, section): |
737
|
|
|
self.sections["Well"] = section |
738
|
|
|
|
739
|
|
|
@property |
740
|
|
|
def curves(self): |
741
|
|
|
return self.sections["Curves"] |
742
|
|
|
|
743
|
|
|
@curves.setter |
744
|
|
|
def curves(self, section): |
745
|
|
|
self.sections["Curves"] = section |
746
|
|
|
|
747
|
|
|
@property |
748
|
|
|
def params(self): |
749
|
|
|
return self.sections["Parameter"] |
750
|
|
|
|
751
|
|
|
@params.setter |
752
|
|
|
def params(self, section): |
753
|
|
|
self.sections["Parameter"] = section |
754
|
|
|
|
755
|
|
|
@property |
756
|
|
|
def other(self): |
757
|
|
|
return self.sections["Other"] |
758
|
|
|
|
759
|
|
|
@other.setter |
760
|
|
|
def other(self, section): |
761
|
|
|
self.sections["Other"] = section |
762
|
|
|
|
763
|
|
|
|
764
|
|
|
@property |
765
|
|
|
def metadata(self): |
766
|
|
|
s = SectionItems() |
767
|
|
|
for section in self.sections: |
768
|
|
|
for item in section: |
769
|
|
|
s.append(item) |
770
|
|
|
return s |
771
|
|
|
|
772
|
|
|
@metadata.setter |
773
|
|
|
def metadata(self, value): |
774
|
|
|
raise Warning('Set values in the version/well/params attrs directly') |
775
|
|
|
|
776
|
|
|
@property |
777
|
|
|
def df(self): |
778
|
|
|
if self._use_pandas: |
779
|
|
|
return self._df |
780
|
|
|
else: |
781
|
|
|
logger.warning( |
782
|
|
|
"pandas is not installed or use_pandas was set to False") |
783
|
|
|
# raise Warning("pandas is not installed or use_pandas was set to False") |
784
|
|
|
|
785
|
|
|
@df.setter |
786
|
|
|
def df(self, value): |
787
|
|
|
self._df = value |
788
|
|
|
|
789
|
|
|
@property |
790
|
|
|
def index(self): |
791
|
|
|
return self.data[:, 0] |
792
|
|
|
|
793
|
|
|
@property |
794
|
|
|
def depth_m(self): |
795
|
|
|
if self.index_unit == "M": |
796
|
|
|
return self.index |
797
|
|
|
elif self.index_unit == "FT": |
798
|
|
|
return self.index * 0.3048 |
799
|
|
|
else: |
800
|
|
|
raise LASUnknownUnitError("Unit of depth index not known") |
801
|
|
|
|
802
|
|
|
@property |
803
|
|
|
def depth_ft(self): |
804
|
|
|
if self.index_unit == "M": |
805
|
|
|
return self.index / 0.3048 |
806
|
|
|
elif self.index_unit == "FT": |
807
|
|
|
return self.index |
808
|
|
|
else: |
809
|
|
|
raise LASUnknownUnitError("Unit of depth index not known") |
810
|
|
|
|
811
|
|
|
def add_curve(self, mnemonic, data, unit="", descr="", value=""): |
812
|
|
|
# assert not mnemonic in self.curves |
813
|
|
|
curve = CurveItem(mnemonic, unit, value, descr) |
814
|
|
|
curve.data = data |
815
|
|
|
self.curves[mnemonic] = curve |
816
|
|
|
self.refresh() |
817
|
|
|
|
818
|
|
|
@property |
819
|
|
|
def header(self): |
820
|
|
|
return self.sections |
821
|
|
|
|
822
|
|
|
|
823
|
|
|
class Las(LASFile): |
824
|
|
|
|
825
|
|
|
'''LAS file object. |
826
|
|
|
|
827
|
|
|
Retained for backwards compatibility. |
828
|
|
|
|
829
|
|
|
''' |
830
|
|
|
pass |
831
|
|
|
|
832
|
|
|
|
833
|
|
|
class Reader(object): |
834
|
|
|
|
835
|
|
|
def __init__(self, text, version): |
836
|
|
|
self.lines = text.splitlines() |
837
|
|
|
self.version = version |
838
|
|
|
self.null = numpy.nan |
839
|
|
|
self.wrap = True |
840
|
|
|
|
841
|
|
|
@property |
842
|
|
|
def section_names(self): |
843
|
|
|
names = [] |
844
|
|
|
for line in self.lines: |
845
|
|
|
line = line.strip().strip('\t').strip() |
846
|
|
|
if not line or line.startswith('#'): |
847
|
|
|
continue |
848
|
|
|
if line.startswith('~'): |
849
|
|
|
names.append(line) |
850
|
|
|
return names |
851
|
|
|
|
852
|
|
|
def iter_section_lines(self, section_name, ignore_comments=True): |
853
|
|
|
in_section = False |
854
|
|
|
for i, line in enumerate(self.lines): |
855
|
|
|
line = line.strip().strip('\t').strip() |
856
|
|
|
if not line: |
857
|
|
|
continue |
858
|
|
|
if ignore_comments and line.startswith('#'): |
859
|
|
|
continue |
860
|
|
|
if line.startswith(section_name): |
861
|
|
|
if in_section: |
862
|
|
|
return |
863
|
|
|
else: |
864
|
|
|
in_section = True |
865
|
|
|
continue |
866
|
|
|
if line.lower().startswith('~') and in_section: |
867
|
|
|
# Start of the next section; we're done here. |
868
|
|
|
break |
869
|
|
|
if in_section: |
870
|
|
|
yield line |
871
|
|
|
|
872
|
|
|
def read_raw_text(self, section_name): |
873
|
|
|
return '\n'.join(self.iter_section_lines(section_name, |
874
|
|
|
ignore_comments=False)) |
875
|
|
|
|
876
|
|
|
def read_section(self, section_name): |
877
|
|
|
parser = SectionParser(section_name, version=self.version) |
878
|
|
|
section = SectionItems() |
879
|
|
|
for line in self.iter_section_lines(section_name): |
880
|
|
|
try: |
881
|
|
|
values = read_line(line) |
882
|
|
|
except: |
883
|
|
|
raise LASHeaderError("Failed in %s section on line:\n%s%s" % ( |
884
|
|
|
section_name, line, |
885
|
|
|
traceback.format_exc().splitlines()[-1])) |
886
|
|
|
else: |
887
|
|
|
section.append(parser(**values)) |
888
|
|
|
return section |
889
|
|
|
|
890
|
|
|
def read_data(self, number_of_curves=None, null_policy='common'): |
891
|
|
|
s = self.read_data_string(null_policy=null_policy) |
892
|
|
|
if not self.wrap: |
893
|
|
|
try: |
894
|
|
|
arr = numpy.loadtxt(StringIO(s)) |
895
|
|
|
except: |
896
|
|
|
raise LASDataError("Failed to read data:\n%s" % ( |
897
|
|
|
traceback.format_exc().splitlines()[-1])) |
898
|
|
|
else: |
899
|
|
|
eol_chars = r"[\n\t\r]" |
900
|
|
|
s = re.sub(eol_chars, " ", s) |
901
|
|
|
try: |
902
|
|
|
arr = numpy.loadtxt(StringIO(s)) |
903
|
|
|
except: |
904
|
|
|
raise LASDataError("Failed to read wrapped data: %s" % ( |
905
|
|
|
traceback.format_exc().splitlines()[-1])) |
906
|
|
|
logger.debug('Reader.read_data arr shape = %s' % (arr.shape)) |
907
|
|
|
logger.debug('Reader.read_data number of curves = %s' % number_of_curves) |
908
|
|
|
arr = numpy.reshape(arr, (-1, number_of_curves)) |
909
|
|
|
if not arr.shape or (arr.ndim == 1 and arr.shape[0] == 0): |
910
|
|
|
logger.warning('Reader.read_dataN o data present.') |
911
|
|
|
return None, None |
912
|
|
|
else: |
913
|
|
|
logger.info('LAS file shape = %s' % str(arr.shape)) |
914
|
|
|
logger.debug('checking for nulls (NULL = %s)' % self.null) |
915
|
|
|
if null_policy in ['NULL', 'common', 'aggressive']: |
916
|
|
|
arr[arr == self.null] = numpy.nan |
917
|
|
|
if null_policy in ['common', 'aggressive']: |
918
|
|
|
for value in NULLS_COMMON_NUMERIC: |
919
|
|
|
arr[arr == value] = numpy.nan |
920
|
|
|
if null_policy in ['aggressive']: |
921
|
|
|
for value in NULLS_AGGRESSIVE_NUMERIC: |
922
|
|
|
arr[arr == value] = numpy.nan |
923
|
|
|
return arr |
924
|
|
|
|
925
|
|
|
def read_data_string(self, null_policy): |
926
|
|
|
start_data = None |
927
|
|
|
for i, line in enumerate(self.lines): |
928
|
|
|
line = line.strip().strip('\t').strip() |
929
|
|
|
if line.startswith('~A'): |
930
|
|
|
start_data = i + 1 |
931
|
|
|
break |
932
|
|
|
s = '\n'.join(self.lines[start_data:]) |
933
|
|
|
s = re.sub(r'(\d)-(\d)', r'\1 -\2', s) |
934
|
|
|
s = re.sub('-?\d*\.\d*\.\d*', ' NaN NaN ', s) |
935
|
|
|
s = re.sub('NaN.\d*', ' NaN NaN ', s) |
936
|
|
|
|
937
|
|
|
if null_policy in ['common', 'aggressive']: |
938
|
|
|
for pattern in NULLS_COMMON_ALPHA: |
939
|
|
|
s = re.sub(pattern, null_alpha_repl, s) |
940
|
|
|
if null_policy in ['aggressive']: |
941
|
|
|
for pattern in NULLS_AGGRESSIVE_ALPHA: |
942
|
|
|
s = re.sub(pattern, null_alpha_repl, s) |
943
|
|
|
return s |
944
|
|
|
|
945
|
|
|
def null_alpha_repl(match): |
946
|
|
|
if match.re.pattern.startswith('[ ]'): |
947
|
|
|
# return ' ' + 'NaN'.rjust(len(match.group(1))) |
948
|
|
|
return ' NaN ' |
949
|
|
|
elif match.re.pattern.endswith('[ ]'): |
950
|
|
|
# n = len(match.group(1)) |
951
|
|
|
return ' NaN ' |
952
|
|
|
|
953
|
|
|
|
954
|
|
|
class SectionParser(object): |
955
|
|
|
|
956
|
|
|
def __init__(self, section_name, version=1.2): |
957
|
|
|
if section_name.startswith('~C'): |
958
|
|
|
self.func = self.curves |
959
|
|
|
elif section_name.startswith('~P'): |
960
|
|
|
self.func = self.params |
961
|
|
|
else: |
962
|
|
|
self.func = self.metadata |
963
|
|
|
|
964
|
|
|
self.version = version |
965
|
|
|
self.section_name = section_name |
966
|
|
|
self.section_name2 = {"~C": "Curves", |
967
|
|
|
"~W": "Well", |
968
|
|
|
"~V": "Version", |
969
|
|
|
"~P": "Parameter"}[section_name] |
970
|
|
|
|
971
|
|
|
section_orders = ORDER_DEFINITIONS[self.version][self.section_name2] |
972
|
|
|
self.default_order = section_orders[0] |
973
|
|
|
self.orders = {} |
974
|
|
|
for order, mnemonics in section_orders[1:]: |
975
|
|
|
for mnemonic in mnemonics: |
976
|
|
|
self.orders[mnemonic] = order |
977
|
|
|
|
978
|
|
|
def __call__(self, **keys): |
979
|
|
|
item = self.func(**keys) |
980
|
|
|
# if item.name == "": |
981
|
|
|
# item.mnemonic = "UNKNOWN" |
982
|
|
|
return item |
983
|
|
|
|
984
|
|
|
def num(self, x, default=None): |
985
|
|
|
if default is None: |
986
|
|
|
default = x |
987
|
|
|
try: |
988
|
|
|
return numpy.int(x) |
989
|
|
|
except: |
990
|
|
|
try: |
991
|
|
|
return numpy.float(x) |
992
|
|
|
except: |
993
|
|
|
return default |
994
|
|
|
|
995
|
|
|
def metadata(self, **keys): |
996
|
|
|
key_order = self.orders.get(keys["name"], self.default_order) |
997
|
|
|
if key_order == "value:descr": |
998
|
|
|
return HeaderItem( |
999
|
|
|
keys["name"], # mnemonic |
1000
|
|
|
keys["unit"], # unit |
1001
|
|
|
self.num(keys["value"]), # value |
1002
|
|
|
keys["descr"], # descr |
1003
|
|
|
) |
1004
|
|
|
elif key_order == "descr:value": |
1005
|
|
|
return HeaderItem( |
1006
|
|
|
keys["name"], # mnemonic |
1007
|
|
|
keys["unit"], # unit |
1008
|
|
|
keys["descr"], # descr |
1009
|
|
|
self.num(keys["value"]), # value |
1010
|
|
|
) |
1011
|
|
|
|
1012
|
|
|
def curves(self, **keys): |
1013
|
|
|
# logger.debug(str(keys)) |
1014
|
|
|
item = CurveItem( |
1015
|
|
|
keys['name'], # mnemonic |
1016
|
|
|
keys['unit'], # unit |
1017
|
|
|
keys['value'], # value |
1018
|
|
|
keys['descr'], # descr |
1019
|
|
|
) |
1020
|
|
|
return item |
1021
|
|
|
|
1022
|
|
|
def params(self, **keys): |
1023
|
|
|
return HeaderItem( |
1024
|
|
|
keys['name'], # mnemonic |
1025
|
|
|
keys['unit'], # unit |
1026
|
|
|
self.num(keys['value']), # value |
1027
|
|
|
keys['descr'], # descr |
1028
|
|
|
) |
1029
|
|
|
|
1030
|
|
|
|
1031
|
|
|
def read_line(line, pattern=None): |
1032
|
|
|
'''Read a line from a LAS header section. |
1033
|
|
|
|
1034
|
|
|
The line is parsed with a regular expression -- see LAS file specs for |
1035
|
|
|
more details, but it should basically be in the format:: |
1036
|
|
|
|
1037
|
|
|
name.unit value : descr |
1038
|
|
|
|
1039
|
|
|
Arguments: |
1040
|
|
|
line (str): line from a LAS header section |
1041
|
|
|
|
1042
|
|
|
Returns: |
1043
|
|
|
A dictionary with keys "name", "unit", "value", and "descr", each |
1044
|
|
|
containing a string as value. |
1045
|
|
|
|
1046
|
|
|
''' |
1047
|
|
|
d = {} |
1048
|
|
|
if pattern is None: |
1049
|
|
|
pattern = (r"\.?(?P<name>[^.]*)\." + |
1050
|
|
|
r"(?P<unit>[^\s:]*)" + |
1051
|
|
|
r"(?P<value>[^:]*):" + |
1052
|
|
|
r"(?P<descr>.*)") |
1053
|
|
|
m = re.match(pattern, line) |
1054
|
|
|
mdict = m.groupdict() |
1055
|
|
|
# if mdict["name"] == "": |
1056
|
|
|
# mdict["name"] = "UNKNOWN" |
1057
|
|
|
for key, value in mdict.items(): |
1058
|
|
|
d[key] = value.strip() |
1059
|
|
|
if key == "unit": |
1060
|
|
|
if d[key].endswith("."): |
1061
|
|
|
d[key] = d[key].strip(".") # see issue #36 |
1062
|
|
|
return d |
1063
|
|
|
|
1064
|
|
|
|
1065
|
|
|
def open_file(file_ref, encoding=None, encoding_errors="replace", |
1066
|
|
|
autodetect_encoding=False, autodetect_encoding_chars=40e3): |
1067
|
|
|
'''Open a file if necessary. |
1068
|
|
|
|
1069
|
|
|
If autodetect_encoding is True then either cchardet or chardet (see PyPi) |
1070
|
|
|
needs to be installed, or else an ImportError will be raised. |
1071
|
|
|
|
1072
|
|
|
Arguments: |
1073
|
|
|
file_ref: either a filename, an open file object, a URL, or a string of |
1074
|
|
|
a LAS file contents. |
1075
|
|
|
|
1076
|
|
|
Keyword Arguments: |
1077
|
|
|
encoding (str): character encoding to open file_ref with |
1078
|
|
|
encoding_errors (str): "strict", "replace" (default), "ignore" - how to |
1079
|
|
|
handle errors with encodings (see standard library codecs module or |
1080
|
|
|
Python Unicode HOWTO for more information) |
1081
|
|
|
autodetect_encoding (bool): use chardet/ccharet to detect encoding |
1082
|
|
|
autodetect_encoding_chars (int/None): number of chars to read from LAS |
1083
|
|
|
file for auto-detection of encoding. |
1084
|
|
|
|
1085
|
|
|
Returns: |
1086
|
|
|
An open file-like object ready for reading from. |
1087
|
|
|
|
1088
|
|
|
''' |
1089
|
|
|
if isinstance(file_ref, str): |
1090
|
|
|
lines = file_ref.splitlines() |
1091
|
|
|
if len(lines) == 1: # File name |
1092
|
|
|
if URL_REGEXP.match(file_ref): |
1093
|
|
|
try: |
1094
|
|
|
import urllib2 |
1095
|
|
|
file_ref = urllib2.urlopen(file_ref) |
1096
|
|
|
except ImportError: |
1097
|
|
|
import urllib.request |
1098
|
|
|
response = urllib.request.urlopen(file_ref) |
1099
|
|
|
enc = response.headers.get_content_charset("utf-8") |
1100
|
|
|
file_ref = StringIO(response.read().decode(enc)) |
1101
|
|
|
else: # filename |
1102
|
|
|
data = get_unicode_from_filename( |
1103
|
|
|
file_ref, encoding, encoding_errors, autodetect_encoding, |
1104
|
|
|
autodetect_encoding_chars) |
1105
|
|
|
file_ref = StringIO(data) |
1106
|
|
|
else: |
1107
|
|
|
file_ref = StringIO("\n".join(lines)) |
1108
|
|
|
return file_ref |
1109
|
|
|
|
1110
|
|
|
|
1111
|
|
|
def get_unicode_from_filename(fn, enc, errors, auto, nbytes): |
1112
|
|
|
''' |
1113
|
|
|
Read Unicode data from file. |
1114
|
|
|
|
1115
|
|
|
Arguments: |
1116
|
|
|
fn (str): path to file |
1117
|
|
|
enc (str): encoding - can be None |
1118
|
|
|
errors (str): unicode error handling - can be "strict", "ignore", "replace" |
1119
|
|
|
auto (str): auto-detection of character encoding - can be either |
1120
|
|
|
"chardet", "cchardet", or True |
1121
|
|
|
nbytes (int): number of characters for read for auto-detection |
1122
|
|
|
|
1123
|
|
|
Returns: |
1124
|
|
|
a unicode or string object |
1125
|
|
|
|
1126
|
|
|
''' |
1127
|
|
|
if nbytes: |
1128
|
|
|
nbytes = int(nbytes) |
1129
|
|
|
|
1130
|
|
|
# Detect BOM in UTF-8 files |
1131
|
|
|
|
1132
|
|
|
nbytes_test = min(32, os.path.getsize(fn)) |
1133
|
|
|
with open(fn, mode="rb") as test: |
1134
|
|
|
raw = test.read(nbytes_test) |
1135
|
|
|
if raw.startswith(codecs.BOM_UTF8): |
1136
|
|
|
enc = "utf-8-sig" |
1137
|
|
|
auto = False |
1138
|
|
|
|
1139
|
|
|
if auto: |
1140
|
|
|
with open(fn, mode="rb") as test: |
1141
|
|
|
if nbytes is None: |
1142
|
|
|
raw = test.read() |
1143
|
|
|
else: |
1144
|
|
|
raw = test.read(nbytes) |
1145
|
|
|
enc = get_encoding(auto, raw) |
1146
|
|
|
|
1147
|
|
|
# codecs.open is smarter than cchardet or chardet IME. |
1148
|
|
|
|
1149
|
|
|
with codecs.open(fn, mode="r", encoding=enc, errors=errors) as f: |
1150
|
|
|
data = f.read() |
1151
|
|
|
|
1152
|
|
|
return data |
1153
|
|
|
|
1154
|
|
|
|
1155
|
|
|
def get_encoding(auto, raw): |
1156
|
|
|
''' |
1157
|
|
|
Automatically detect character encoding. |
1158
|
|
|
|
1159
|
|
|
Arguments: |
1160
|
|
|
auto (str): auto-detection of character encoding - can be either |
1161
|
|
|
"chardet", "cchardet", or True |
1162
|
|
|
raw (bytes): array of bytes to detect from |
1163
|
|
|
|
1164
|
|
|
Returns: |
1165
|
|
|
A string specifying the character encoding. |
1166
|
|
|
|
1167
|
|
|
''' |
1168
|
|
|
if auto is True: |
1169
|
|
|
try: |
1170
|
|
|
import cchardet as chardet |
1171
|
|
|
except ImportError: |
1172
|
|
|
try: |
1173
|
|
|
import chardet |
1174
|
|
|
except ImportError: |
1175
|
|
|
raise ImportError( |
1176
|
|
|
"chardet or cchardet is required for automatic" |
1177
|
|
|
" detection of character encodings.") |
1178
|
|
|
else: |
1179
|
|
|
logger.debug("get_encoding Using chardet") |
1180
|
|
|
method = "chardet" |
1181
|
|
|
else: |
1182
|
|
|
logger.debug("get_encoding Using cchardet") |
1183
|
|
|
method = "cchardet" |
1184
|
|
|
elif auto.lower() == "chardet": |
1185
|
|
|
import chardet |
1186
|
|
|
logger.debug("get_encoding Using chardet") |
1187
|
|
|
method = "chardet" |
1188
|
|
|
elif auto.lower() == "cchardet": |
1189
|
|
|
import cchardet as chardet |
1190
|
|
|
logger.debug("get_encoding Using cchardet") |
1191
|
|
|
method = "cchardet" |
1192
|
|
|
|
1193
|
|
|
result = chardet.detect(raw) |
1194
|
|
|
logger.debug("get_encoding %s results=%s" % (method, result)) |
1195
|
|
|
return result["encoding"] |
1196
|
|
|
|
1197
|
|
|
|
1198
|
|
|
def get_formatter_function(order, left_width=None, middle_width=None): |
1199
|
|
|
'''Create function to format a LAS header item. |
1200
|
|
|
|
1201
|
|
|
Arguments: |
1202
|
|
|
order: format of item, either "descr:value" or "value:descr" -- see |
1203
|
|
|
LAS 1.2 and 2.0 specifications for more information. |
1204
|
|
|
|
1205
|
|
|
Keyword Arguments: |
1206
|
|
|
left_width (int): number of characters to the left hand side of the |
1207
|
|
|
first period |
1208
|
|
|
middle_width (int): total number of characters minus 1 between the |
1209
|
|
|
first period from the left and the first colon from the left. |
1210
|
|
|
|
1211
|
|
|
Returns: |
1212
|
|
|
A function which takes a header item (e.g. LASHeaderItem or Curve) |
1213
|
|
|
as its single argument and which in turn returns a string which is |
1214
|
|
|
the correctly formatted LAS header line. |
1215
|
|
|
|
1216
|
|
|
''' |
1217
|
|
|
if left_width is None: |
1218
|
|
|
left_width = 10 |
1219
|
|
|
if middle_width is None: |
1220
|
|
|
middle_width = 40 |
1221
|
|
|
mnemonic_func = lambda mnemonic: mnemonic.ljust(left_width) |
1222
|
|
|
middle_func = lambda unit, right_hand_item: ( |
1223
|
|
|
unit |
1224
|
|
|
+ " " * (middle_width - len(str(unit)) - len(right_hand_item)) |
1225
|
|
|
+ right_hand_item |
1226
|
|
|
) |
1227
|
|
|
if order == "descr:value": |
1228
|
|
|
return lambda item: "%s.%s : %s" % ( |
1229
|
|
|
mnemonic_func(item.original_mnemonic), |
1230
|
|
|
middle_func(str(item.unit), str(item.descr)), |
1231
|
|
|
item.value |
1232
|
|
|
) |
1233
|
|
|
elif order == "value:descr": |
1234
|
|
|
return lambda item: "%s.%s : %s" % ( |
1235
|
|
|
mnemonic_func(item.original_mnemonic), |
1236
|
|
|
middle_func(str(item.unit), str(item.value)), |
1237
|
|
|
item.descr |
1238
|
|
|
) |
1239
|
|
|
|
1240
|
|
|
|
1241
|
|
|
def get_section_order_function(section, version, |
1242
|
|
|
order_definitions=ORDER_DEFINITIONS): |
1243
|
|
|
'''Get a function that returns the order per mnemonic and section. |
1244
|
|
|
|
1245
|
|
|
Arguments: |
1246
|
|
|
section (str): either "well", "params", "curves", "version" |
1247
|
|
|
version (float): either 1.2 and 2.0 |
1248
|
|
|
|
1249
|
|
|
Keyword Arguments: |
1250
|
|
|
order_definitions (dict): ... |
1251
|
|
|
|
1252
|
|
|
Returns: |
1253
|
|
|
A function which takes a mnemonic (str) as its only argument, and |
1254
|
|
|
in turn returns the order "value:descr" or "descr:value". |
1255
|
|
|
|
1256
|
|
|
''' |
1257
|
|
|
section_orders = order_definitions[version][section] |
1258
|
|
|
default_order = section_orders[0] |
1259
|
|
|
orders = {} |
1260
|
|
|
for order, mnemonics in section_orders[1:]: |
1261
|
|
|
for mnemonic in mnemonics: |
1262
|
|
|
orders[mnemonic] = order |
1263
|
|
|
return lambda mnemonic: orders.get(mnemonic, default_order) |
1264
|
|
|
|
1265
|
|
|
|
1266
|
|
|
def get_section_widths(section_name, items, version, order_func, middle_padding=5): |
1267
|
|
|
'''Find minimum section widths fitting the content in *items*. |
1268
|
|
|
|
1269
|
|
|
Arguments: |
1270
|
|
|
section_name (str): either "version", "well", "curves", or "params" |
1271
|
|
|
items (SectionItems): section items |
1272
|
|
|
version (float): either 1.2 or 2.0 |
1273
|
|
|
|
1274
|
|
|
''' |
1275
|
|
|
section_widths = { |
1276
|
|
|
"left_width": None, |
1277
|
|
|
"middle_width": None |
1278
|
|
|
} |
1279
|
|
|
if len(items) > 0: |
1280
|
|
|
section_widths["left_width"] = max([len(i.original_mnemonic) for i in items]) |
1281
|
|
|
middle_widths = [] |
1282
|
|
|
for i in items: |
1283
|
|
|
order = order_func(i.mnemonic) |
1284
|
|
|
rhs_element = order.split(':')[0] |
1285
|
|
|
logger.debug('get_section_widths %s\n\torder=%s rhs_element=%s' % (i, order, rhs_element)) |
1286
|
|
|
middle_widths.append(len(str(i.unit)) + 1 + len(str(i[rhs_element]))) |
1287
|
|
|
section_widths['middle_width'] = max(middle_widths) |
1288
|
|
|
return section_widths |
1289
|
|
|
|
1290
|
|
|
|
1291
|
|
|
def read(file_ref, **kwargs): |
1292
|
|
|
'''Read a LAS file. |
1293
|
|
|
|
1294
|
|
|
Note that only versions 1.2 and 2.0 of the LAS file specification |
1295
|
|
|
are currently supported. |
1296
|
|
|
|
1297
|
|
|
Arguments: |
1298
|
|
|
file_ref: either a filename, an open file object, or a string of |
1299
|
|
|
a LAS file contents. |
1300
|
|
|
|
1301
|
|
|
Keyword Arguments: |
1302
|
|
|
encoding (str): character encoding to open file_ref with |
1303
|
|
|
encoding_errors (str): "strict", "replace" (default), "ignore" - how to |
1304
|
|
|
handle errors with encodings (see standard library codecs module or |
1305
|
|
|
Python Unicode HOWTO for more information) |
1306
|
|
|
autodetect_encoding (bool): use chardet/ccharet to detect encoding |
1307
|
|
|
autodetect_encoding_chars (int/None): number of chars to read from LAS |
1308
|
|
|
file for auto-detection of encoding. |
1309
|
|
|
|
1310
|
|
|
Returns: |
1311
|
|
|
A LASFile object representing the file -- see above |
1312
|
|
|
|
1313
|
|
|
''' |
1314
|
|
|
return LASFile(file_ref, **kwargs) |
1315
|
|
|
|