1
|
|
|
'''las.py - read Log ASCII Standard files |
2
|
|
|
|
3
|
|
|
See README.rst and LICENSE for more information. |
4
|
|
|
|
5
|
|
|
''' |
6
|
|
|
from __future__ import print_function |
7
|
|
|
|
8
|
|
|
# Standard library packages |
9
|
|
|
import codecs |
10
|
|
|
import json |
11
|
|
|
import logging |
12
|
|
|
import os |
13
|
|
|
import re |
14
|
|
|
import textwrap |
15
|
|
|
import traceback |
16
|
|
|
|
17
|
|
|
# The standard library OrderedDict was introduced in Python 2.7 so |
18
|
|
|
# we have a third-party option to support Python 2.6 |
19
|
|
|
|
20
|
|
|
try: |
21
|
|
|
from collections import OrderedDict |
22
|
|
|
except ImportError: |
23
|
|
|
from ordereddict import OrderedDict |
24
|
|
|
|
25
|
|
|
# Convoluted import for StringIO in order to support: |
26
|
|
|
# |
27
|
|
|
# - Python 3 - io.StringIO |
28
|
|
|
# - Python 2 (optimized) - cStringIO.StringIO |
29
|
|
|
# - Python 2 (all) - StringIO.StringIO |
30
|
|
|
|
31
|
|
|
try: |
32
|
|
|
import cStringIO as StringIO |
33
|
|
|
except ImportError: |
34
|
|
|
try: # cStringIO not available on this system |
35
|
|
|
import StringIO |
36
|
|
|
except ImportError: # Python 3 |
37
|
|
|
from io import StringIO |
38
|
|
|
else: |
39
|
|
|
from StringIO import StringIO |
40
|
|
|
else: |
41
|
|
|
from StringIO import StringIO |
42
|
|
|
|
43
|
|
|
# get basestring in py3 |
44
|
|
|
|
45
|
|
|
try: |
46
|
|
|
unicode = unicode |
47
|
|
|
except NameError: |
48
|
|
|
# 'unicode' is undefined, must be Python 3 |
49
|
|
|
str = str |
50
|
|
|
unicode = str |
51
|
|
|
bytes = bytes |
52
|
|
|
basestring = (str,bytes) |
53
|
|
|
else: |
54
|
|
|
# 'unicode' exists, must be Python 2 |
55
|
|
|
str = str |
56
|
|
|
unicode = unicode |
57
|
|
|
bytes = str |
58
|
|
|
basestring = basestring |
59
|
|
|
|
60
|
|
|
# Required third-party packages available on PyPi: |
61
|
|
|
|
62
|
|
|
from namedlist import namedlist |
63
|
|
|
import numpy |
64
|
|
|
|
65
|
|
|
# Optional third-party packages available on PyPI are mostly |
66
|
|
|
# imported inline below. |
67
|
|
|
|
68
|
|
|
|
69
|
|
|
logger = logging.getLogger(__name__) |
70
|
|
|
__version__ = "0.9.1" |
71
|
|
|
|
72
|
|
|
|
73
|
|
|
class LASDataError(Exception): |
74
|
|
|
|
75
|
|
|
'''Error during reading of numerical data from LAS file.''' |
76
|
|
|
pass |
77
|
|
|
|
78
|
|
|
|
79
|
|
|
class LASHeaderError(Exception): |
80
|
|
|
|
81
|
|
|
'''Error during reading of header data from LAS file.''' |
82
|
|
|
pass |
83
|
|
|
|
84
|
|
|
|
85
|
|
|
class LASUnknownUnitError(Exception): |
86
|
|
|
|
87
|
|
|
'''Error of unknown unit in LAS file.''' |
88
|
|
|
pass |
89
|
|
|
|
90
|
|
|
|
91
|
|
|
class HeaderItem(OrderedDict): |
92
|
|
|
def __init__(self, mnemonic, unit="", value="", descr=""): |
93
|
|
|
super(HeaderItem, self).__init__() |
94
|
|
|
|
95
|
|
|
# The original mnemonic needs to be stored for rewriting a new file. |
96
|
|
|
# it might be nothing - '' - or a duplicate e.g. two 'RHO' curves, |
97
|
|
|
# or unique - 'X11124' - or perhaps invalid?? |
98
|
|
|
|
99
|
|
|
self.original_mnemonic = mnemonic |
100
|
|
|
|
101
|
|
|
# We also need to store a more useful mnemonic, which will be used |
102
|
|
|
# (technically not, but read on) for people to access the curve while |
103
|
|
|
# the LASFile object exists. For example, a curve which is unnamed |
104
|
|
|
# and has the mnemonic '' will be accessed via 'UNKNOWN'. |
105
|
|
|
|
106
|
|
|
if mnemonic.strip() == '': |
107
|
|
|
self.useful_mnemonic = 'UNKNOWN' |
108
|
|
|
else: |
109
|
|
|
self.useful_mnemonic = mnemonic |
110
|
|
|
|
111
|
|
|
# But note that we need to (later) check (repeatedly) for duplicate |
112
|
|
|
# mnemonics. Any duplicates will have ':1', ':2', ':3', etc., appended |
113
|
|
|
# to them. The result of this will be stored in the below variable, |
114
|
|
|
# which is what the user should actually see and use 99.5% of the time. |
115
|
|
|
|
116
|
|
|
self.mnemonic = self.useful_mnemonic |
117
|
|
|
|
118
|
|
|
self.unit = unit |
119
|
|
|
self.value = value |
120
|
|
|
self.descr = descr |
121
|
|
|
|
122
|
|
|
def __getitem__(self, key): |
123
|
|
|
if key == 'mnemonic': |
124
|
|
|
return self.mnemonic |
125
|
|
|
elif key == 'original_mnemonic': |
126
|
|
|
return self.original_mnemonic |
127
|
|
|
elif key == 'useful_mnemonic': |
128
|
|
|
return self.useful_mnemonic |
129
|
|
|
elif key == 'unit': |
130
|
|
|
return self.unit |
131
|
|
|
elif key == 'value': |
132
|
|
|
return self.value |
133
|
|
|
elif key == 'descr': |
134
|
|
|
return self.descr |
135
|
|
|
else: |
136
|
|
|
raise KeyError('CurveItem only has restricted items (not %s)' % key) |
137
|
|
|
|
138
|
|
|
def __repr__(self): |
139
|
|
|
return ( |
140
|
|
|
"%s(mnemonic=%s, unit=%s, value=%s, " |
141
|
|
|
"descr=%s, original_mnemonic=%s)" % ( |
142
|
|
|
self.__class__.__name__, self.mnemonic, self.unit, self.value, |
143
|
|
|
self.descr, self.original_mnemonic)) |
144
|
|
|
|
145
|
|
|
def _repr_pretty_(self, p, cycle): |
146
|
|
|
return p.text(self.__repr__()) |
147
|
|
|
|
148
|
|
|
|
149
|
|
|
class CurveItem(HeaderItem): |
150
|
|
|
def __init__(self, *args, **kwargs): |
151
|
|
|
self.data = numpy.ndarray([]) |
152
|
|
|
super(CurveItem, self).__init__(*args, **kwargs) |
153
|
|
|
|
154
|
|
|
@property |
155
|
|
|
def API_code(self): |
156
|
|
|
return self.value |
157
|
|
|
|
158
|
|
|
def __repr__(self): |
159
|
|
|
return ( |
160
|
|
|
"%s(mnemonic=%s, unit=%s, value=%s, " |
161
|
|
|
"descr=%s, original_mnemonic=%s, data.shape=%s)" % ( |
162
|
|
|
self.__class__.__name__, self.mnemonic, self.unit, self.value, |
163
|
|
|
self.descr, self.original_mnemonic, self.data.shape)) |
164
|
|
|
|
165
|
|
|
|
166
|
|
|
class SectionItems(list): |
167
|
|
|
|
168
|
|
|
def __contains__(self, testitem): |
169
|
|
|
'''Allows testing of a mnemonic or an actual item.''' |
170
|
|
|
for item in self: |
171
|
|
|
if testitem == item.mnemonic: |
172
|
|
|
return True |
173
|
|
|
elif hasattr(testitem, 'mnemonic'): |
174
|
|
|
if testitem.mnemonic == item.mnemonic: |
175
|
|
|
return True |
176
|
|
|
elif testitem is item: |
177
|
|
|
return True |
178
|
|
|
else: |
179
|
|
|
return False |
180
|
|
|
|
181
|
|
|
def keys(self): |
182
|
|
|
return [item.mnemonic for item in self] |
183
|
|
|
|
184
|
|
|
def values(self): |
185
|
|
|
return self |
186
|
|
|
|
187
|
|
|
def items(self): |
188
|
|
|
return [(item.mnemonic, item) for item in self] |
189
|
|
|
|
190
|
|
|
def iterkeys(self): |
191
|
|
|
return iter(self.keys()) |
192
|
|
|
|
193
|
|
|
def itervalues(self): |
194
|
|
|
return iter(self) |
195
|
|
|
|
196
|
|
|
def iteritems(self): |
197
|
|
|
return iter(self.items()) |
198
|
|
|
|
199
|
|
|
def __getitem__(self, key): |
200
|
|
|
for item in self: |
201
|
|
|
if item.mnemonic == key: |
202
|
|
|
return item |
203
|
|
|
if isinstance(key, int): |
204
|
|
|
return super(SectionItems, self).__getitem__(key) |
205
|
|
|
else: |
206
|
|
|
raise KeyError("%s not in %s" % (key, self.keys())) |
207
|
|
|
|
208
|
|
|
def __setitem__(self, key, newitem): |
209
|
|
|
if isinstance(newitem, HeaderItem): |
210
|
|
|
self.set_item(key, newitem) |
211
|
|
|
else: |
212
|
|
|
self.set_item_value(key, newitem) |
213
|
|
|
|
214
|
|
|
def __getattr__(self, key): |
215
|
|
|
if key in self: |
216
|
|
|
return self[key] |
217
|
|
|
else: |
218
|
|
|
super(SectionItems, self).__getattr__(key) |
219
|
|
|
|
220
|
|
|
def __setattr__(self, key, value): |
221
|
|
|
if key in self: |
222
|
|
|
self[key] = value |
223
|
|
|
else: |
224
|
|
|
super(SectionItems, self).__setattr__(key, value) |
225
|
|
|
|
226
|
|
|
def set_item(self, key, newitem): |
227
|
|
|
for i, item in enumerate(self): |
228
|
|
|
if key == item.mnemonic: |
229
|
|
|
|
230
|
|
|
# This is very important. We replace items where |
231
|
|
|
# 'mnemonic' is equal - i.e. we do not check useful_mnemonic |
232
|
|
|
# or original_mnemonic. Is this correct? Needs to thought |
233
|
|
|
# about and tested more carefully. |
234
|
|
|
|
235
|
|
|
logger.debug('SectionItems.__setitem__ Replaced %s item' % key) |
236
|
|
|
return super(SectionItems, self).__setitem__(i, newitem) |
237
|
|
|
else: |
238
|
|
|
self.append(newitem) |
239
|
|
|
|
240
|
|
|
def set_item_value(self, key, value): |
241
|
|
|
self[key].value = value |
242
|
|
|
|
243
|
|
|
def append(self, newitem): |
244
|
|
|
'''Check to see if the item's mnemonic needs altering.''' |
245
|
|
|
logger.debug("SectionItems.append type=%s str=%s" % (type(newitem), newitem)) |
246
|
|
|
super(SectionItems, self).append(newitem) |
247
|
|
|
|
248
|
|
|
# Check to fix the :n suffixes |
249
|
|
|
existing = [item.useful_mnemonic for item in self] |
250
|
|
|
locations = [] |
251
|
|
|
for i, item in enumerate(self): |
252
|
|
|
if item.useful_mnemonic == newitem.mnemonic: |
253
|
|
|
locations.append(i) |
254
|
|
|
if len(locations) > 1: |
255
|
|
|
current_count = 1 |
256
|
|
|
for i, loc in enumerate(locations): |
257
|
|
|
item = self[loc] |
258
|
|
|
# raise Exception("%s" % str(type(item))) |
259
|
|
|
item.mnemonic = item.useful_mnemonic + ":%d" % (i + 1) |
260
|
|
|
|
261
|
|
|
def dictview(self): |
262
|
|
|
return dict(zip(self.keys(), [i.value for i in self.values()])) |
263
|
|
|
|
264
|
|
|
# def __repr__(self): |
265
|
|
|
# return ( |
266
|
|
|
# "{cls}({contents})".format( |
267
|
|
|
# cls=self.__class__.__name__, |
268
|
|
|
# contents=', '.join([str(item) for item in self]))) |
269
|
|
|
|
270
|
|
|
|
271
|
|
|
class JSONEncoder(json.JSONEncoder): |
272
|
|
|
|
273
|
|
|
def default(self, obj): |
274
|
|
|
if isinstance(obj, LASFile): |
275
|
|
|
d = {'metadata': {}, |
276
|
|
|
'data': {}} |
277
|
|
|
for name, section in obj.sections.items(): |
278
|
|
|
if isinstance(section, basestring): |
279
|
|
|
d['metadata'][name] = section |
280
|
|
|
else: |
281
|
|
|
d['metadata'][name] = [] |
282
|
|
|
for item in section: |
283
|
|
|
d['metadata'][name].append(dict(item)) |
284
|
|
|
for curve in obj.curves: |
285
|
|
|
d['data'][curve.mnemonic] = list(curve.data) |
286
|
|
|
return d |
287
|
|
|
|
288
|
|
|
|
289
|
|
|
|
290
|
|
|
DEFAULT_ITEMS = { |
291
|
|
|
"Version": SectionItems([ |
292
|
|
|
HeaderItem("VERS", "", 2.0, "CWLS log ASCII Standard -VERSION 2.0"), |
293
|
|
|
HeaderItem("WRAP", "", "NO", "One line per depth step"), |
294
|
|
|
HeaderItem("DLM", "", "SPACE", "Column Data Section Delimiter"), |
295
|
|
|
]), |
296
|
|
|
"Well": SectionItems([ |
297
|
|
|
HeaderItem("STRT", "m", numpy.nan, "START DEPTH"), |
298
|
|
|
HeaderItem("STOP", "m", numpy.nan, "STOP DEPTH"), |
299
|
|
|
HeaderItem("STEP", "m", numpy.nan, "STEP"), |
300
|
|
|
HeaderItem("NULL", "", -9999.25, "NULL VALUE"), |
301
|
|
|
HeaderItem("COMP", "", "", "COMPANY"), |
302
|
|
|
HeaderItem("WELL", "", "", "WELL"), |
303
|
|
|
HeaderItem("FLD", "", "", "FIELD"), |
304
|
|
|
HeaderItem("LOC", "", "", "LOCATION"), |
305
|
|
|
HeaderItem("PROV", "", "", "PROVINCE"), |
306
|
|
|
HeaderItem("CNTY", "", "", "COUNTY"), |
307
|
|
|
HeaderItem("STAT", "", "", "STATE"), |
308
|
|
|
HeaderItem("CTRY", "", "", "COUNTRY"), |
309
|
|
|
HeaderItem("SRVC", "", "", "SERVICE COMPANY"), |
310
|
|
|
HeaderItem("DATE", "", "", "DATE"), |
311
|
|
|
HeaderItem("UWI", "", "", "UNIQUE WELL ID"), |
312
|
|
|
HeaderItem("API", "", "", "API NUMBER") |
313
|
|
|
]), |
314
|
|
|
"Curves": SectionItems([]), |
315
|
|
|
"Parameter": SectionItems([]), |
316
|
|
|
"Other": "", |
317
|
|
|
"Data": numpy.zeros(shape=(0, 1)), |
318
|
|
|
} |
319
|
|
|
|
320
|
|
|
|
321
|
|
|
ORDER_DEFINITIONS = { |
322
|
|
|
1.2: OrderedDict([ |
323
|
|
|
("Version", ["value:descr"]), |
324
|
|
|
("Well", [ |
325
|
|
|
"descr:value", |
326
|
|
|
("value:descr", ["STRT", "STOP", "STEP", "NULL"])]), |
327
|
|
|
("Curves", ["value:descr"]), |
328
|
|
|
("Parameter", ["value:descr"]), |
329
|
|
|
]), |
330
|
|
|
2.0: OrderedDict([ |
331
|
|
|
("Version", ["value:descr"]), |
332
|
|
|
("Well", ["value:descr"]), |
333
|
|
|
("Curves", ["value:descr"]), |
334
|
|
|
("Parameter", ["value:descr"]) |
335
|
|
|
])} |
336
|
|
|
|
337
|
|
|
|
338
|
|
|
URL_REGEXP = re.compile( |
339
|
|
|
r'^(?:http|ftp)s?://' # http:// or https:// |
340
|
|
|
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}' |
341
|
|
|
r'\.?|[A-Z0-9-]{2,}\.?)|' # (cont.) domain... |
342
|
|
|
r'localhost|' # localhost... |
343
|
|
|
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip |
344
|
|
|
r'(?::\d+)?' # optional port |
345
|
|
|
r'(?:/?|[/?]\S+)$', re.IGNORECASE) |
346
|
|
|
|
347
|
|
|
|
348
|
|
|
class LASFile(object): |
349
|
|
|
|
350
|
|
|
'''LAS file object. |
351
|
|
|
|
352
|
|
|
Keyword Arguments: |
353
|
|
|
file_ref: either a filename, an open file object, or a string of |
354
|
|
|
a LAS file contents. |
355
|
|
|
encoding (str): character encoding to open file_ref with |
356
|
|
|
encoding_errors (str): "strict", "replace" (default), "ignore" - how to |
357
|
|
|
handle errors with encodings (see standard library codecs module or |
358
|
|
|
Python Unicode HOWTO for more information) |
359
|
|
|
autodetect_encoding (bool): use chardet/ccharet to detect encoding |
360
|
|
|
autodetect_encoding_chars (int/None): number of chars to read from LAS |
361
|
|
|
file for auto-detection of encoding. |
362
|
|
|
|
363
|
|
|
''' |
364
|
|
|
def __init__(self, file_ref=None, **kwargs): |
365
|
|
|
|
366
|
|
|
self._text = '' |
367
|
|
|
self._use_pandas = "auto" |
368
|
|
|
self.index_unit = None |
369
|
|
|
self.sections = { |
370
|
|
|
"Version": DEFAULT_ITEMS["Version"], |
371
|
|
|
"Well": DEFAULT_ITEMS["Well"], |
372
|
|
|
"Curves": DEFAULT_ITEMS["Curves"], |
373
|
|
|
"Parameter": DEFAULT_ITEMS["Parameter"], |
374
|
|
|
"Other": str(DEFAULT_ITEMS["Other"]), |
375
|
|
|
} |
376
|
|
|
|
377
|
|
|
if not (file_ref is None): |
378
|
|
|
self.read(file_ref, **kwargs) |
379
|
|
|
|
380
|
|
|
def read(self, file_ref, use_pandas="auto", null_subs=True, **kwargs): |
381
|
|
|
'''Read a LAS file. |
382
|
|
|
|
383
|
|
|
Arguments: |
384
|
|
|
file_ref: either a filename, an open file object, or a string of |
385
|
|
|
a LAS file contents. |
386
|
|
|
|
387
|
|
|
Keyword Arguments: |
388
|
|
|
use_pandas (str): bool or "auto" -- use pandas if available -- provide |
389
|
|
|
False option for faster loading where pandas functionality is not |
390
|
|
|
needed. "auto" becomes True if pandas is installed, and False if not. |
391
|
|
|
encoding (str): character encoding to open file_ref with |
392
|
|
|
encoding_errors (str): "strict", "replace" (default), "ignore" - how to |
393
|
|
|
handle errors with encodings (see standard library codecs module or |
394
|
|
|
Python Unicode HOWTO for more information) |
395
|
|
|
autodetect_encoding (bool): use chardet/cchardet to detect encoding |
396
|
|
|
autodetect_encoding_chars (int/None): number of chars to read from LAS |
397
|
|
|
file for auto-detection of encoding. |
398
|
|
|
|
399
|
|
|
''' |
400
|
|
|
if not use_pandas is None: |
401
|
|
|
self._use_pandas = use_pandas |
402
|
|
|
|
403
|
|
|
f = open_file(file_ref, **kwargs) |
404
|
|
|
|
405
|
|
|
self._text = f.read() |
406
|
|
|
logger.debug("LASFile.read LAS content is type %s" % type(self._text)) |
407
|
|
|
|
408
|
|
|
reader = Reader(self._text, version=1.2) |
409
|
|
|
self.sections["Version"] = reader.read_section('~V') |
410
|
|
|
|
411
|
|
|
# Set version |
412
|
|
|
try: |
413
|
|
|
# raise Exception("%s %s" % (type(self.version['VERS']), self.version["VERS"])) |
414
|
|
|
reader.version = self.version['VERS'].value |
415
|
|
|
except KeyError: |
416
|
|
|
raise KeyError("No key VERS in ~V section") |
417
|
|
|
|
418
|
|
|
# Validate version |
419
|
|
|
try: |
420
|
|
|
assert reader.version in (1.2, 2) |
421
|
|
|
except AssertionError: |
422
|
|
|
logger.warning("LAS spec version is %s -- neither 1.2 nor 2" % |
423
|
|
|
reader.version) |
424
|
|
|
if reader.version < 2: |
425
|
|
|
reader.version = 1.2 |
426
|
|
|
else: |
427
|
|
|
reader.version = 2 |
428
|
|
|
reader.wrap = self.version['WRAP'].value == 'YES' |
429
|
|
|
|
430
|
|
|
self.sections["Well"] = reader.read_section('~W') |
431
|
|
|
self.sections["Curves"] = reader.read_section('~C') |
432
|
|
|
try: |
433
|
|
|
self.sections["Parameter"] = reader.read_section('~P') |
434
|
|
|
except LASHeaderError: |
435
|
|
|
logger.warning(traceback.format_exc().splitlines()[-1]) |
436
|
|
|
self.sections["Other"] = reader.read_raw_text('~O') |
437
|
|
|
|
438
|
|
|
# Set null value |
439
|
|
|
reader.null = self.well['NULL'].value |
440
|
|
|
|
441
|
|
|
data = reader.read_data(len(self.curves), null_subs=null_subs) |
442
|
|
|
|
443
|
|
|
for i, c in enumerate(self.curves): |
444
|
|
|
d = data[:, i] |
445
|
|
|
c.data = d |
446
|
|
|
|
447
|
|
|
if (self.well["STRT"].unit.upper() == "M" and |
448
|
|
|
self.well["STOP"].unit.upper() == "M" and |
449
|
|
|
self.well["STEP"].unit.upper() == "M" and |
450
|
|
|
self.curves[0].unit.upper() == "M"): |
451
|
|
|
self.index_unit = "M" |
452
|
|
|
elif (self.well["STRT"].unit.upper() in ("F", "FT") and |
453
|
|
|
self.well["STOP"].unit.upper() in ("F", "FT") and |
454
|
|
|
self.well["STEP"].unit.upper() in ("F", "FT") and |
455
|
|
|
self.curves[0].unit.upper() in ("F", "FT")): |
456
|
|
|
self.index_unit = "FT" |
457
|
|
|
|
458
|
|
|
self.refresh() |
459
|
|
|
|
460
|
|
|
def refresh(self, use_pandas=None): |
461
|
|
|
'''Refresh curve names and indices.''' |
462
|
|
|
if not use_pandas is None: |
463
|
|
|
self._use_pandas = use_pandas |
464
|
|
|
|
465
|
|
|
# n = len(self.curves) |
466
|
|
|
# for i, curve in enumerate(self.curves): |
467
|
|
|
# self[curve.mnemonic] = curve.data |
468
|
|
|
# self[i] = curve.data |
469
|
|
|
# self[i - n] = curve.data |
470
|
|
|
|
471
|
|
|
if not self._use_pandas is False: |
472
|
|
|
try: |
473
|
|
|
import pandas |
474
|
|
|
except ImportError: |
475
|
|
|
logger.info( |
476
|
|
|
"pandas not installed - skipping LASFile.df creation") |
477
|
|
|
self._use_pandas = False |
478
|
|
|
|
479
|
|
|
if self._use_pandas: |
480
|
|
|
self.df = pandas.DataFrame(self.data, columns=self.keys()) |
481
|
|
|
self.df.set_index(self.curves[0].mnemonic, inplace=True) |
482
|
|
|
|
483
|
|
|
@property |
484
|
|
|
def data(self): |
485
|
|
|
'''2D array of data from LAS file.''' |
486
|
|
|
return numpy.vstack([c.data for c in self.curves]).T |
487
|
|
|
|
488
|
|
|
def write(self, file_object, version=None, wrap=None, |
489
|
|
|
STRT=None, STOP=None, STEP=None, fmt="%10.5g"): |
490
|
|
|
'''Write to a file. |
491
|
|
|
|
492
|
|
|
Arguments: |
493
|
|
|
file_object: a file_like object opening for writing. |
494
|
|
|
version (float): either 1.2 or 2 |
495
|
|
|
wrap (bool): True, False, or None (last uses WRAP item in version) |
496
|
|
|
STRT (float): optional override to automatic calculation using |
497
|
|
|
the first index curve value. |
498
|
|
|
STOP (float): optional override to automatic calculation using |
499
|
|
|
the last index curve value. |
500
|
|
|
STEP (float): optional override to automatic calculation using |
501
|
|
|
the first step size in the index curve. |
502
|
|
|
fmt (str): format string for numerical data being written to data |
503
|
|
|
section. |
504
|
|
|
|
505
|
|
|
Examples: |
506
|
|
|
|
507
|
|
|
>>> with open("test_output.las", mode="w") as f: |
508
|
|
|
... lasfile_obj.write(f, 2.0) # <-- this method |
509
|
|
|
|
510
|
|
|
''' |
511
|
|
|
if wrap is None: |
512
|
|
|
wrap = self.version["WRAP"] == "YES" |
513
|
|
|
elif wrap is True: |
514
|
|
|
self.version["WRAP"] = HeaderItem( |
515
|
|
|
"WRAP", "", "YES", "Multiple lines per depth step") |
516
|
|
|
elif wrap is False: |
517
|
|
|
self.version["WRAP"] = HeaderItem( |
518
|
|
|
"WRAP", "", "NO", "One line per depth step") |
519
|
|
|
lines = [] |
520
|
|
|
|
521
|
|
|
assert version in (1.2, 2, None) |
522
|
|
|
if version is None: |
523
|
|
|
version = self.version["VERS"].value |
524
|
|
|
if version == 1.2: |
525
|
|
|
self.version["VERS"] = HeaderItem( |
526
|
|
|
"VERS", "", 1.2, "CWLS LOG ASCII STANDARD - VERSION 1.2") |
527
|
|
|
elif version == 2: |
528
|
|
|
self.version["VERS"] = HeaderItem( |
529
|
|
|
"VERS", "", 2.0, "CWLS log ASCII Standard -VERSION 2.0") |
530
|
|
|
|
531
|
|
|
if STRT is None: |
532
|
|
|
STRT = self.index[0] |
533
|
|
|
if STOP is None: |
534
|
|
|
STOP = self.index[-1] |
535
|
|
|
if STEP is None: |
536
|
|
|
STEP = self.index[1] - self.index[0] # Faster than numpy.gradient |
537
|
|
|
self.well["STRT"].value = STRT |
538
|
|
|
self.well["STOP"].value = STOP |
539
|
|
|
self.well["STEP"].value = STEP |
540
|
|
|
|
541
|
|
|
|
542
|
|
|
# Check for any changes in the pandas dataframe and if there are, |
543
|
|
|
# create new curves so they are reflected in the output LAS file. |
544
|
|
|
|
545
|
|
|
if self._use_pandas: |
546
|
|
|
curve_names = lambda: [ci.mnemonic for ci in self.curves] |
547
|
|
|
for df_curve_name in list(self.df.columns.values): |
548
|
|
|
if not df_curve_name in curve_names(): |
549
|
|
|
self.add_curve(df_curve_name, self.df[df_curve_name]) |
550
|
|
|
|
551
|
|
|
# Write each section. |
552
|
|
|
|
553
|
|
|
# ~Version |
554
|
|
|
logger.debug('LASFile.write Version section') |
555
|
|
|
lines.append("~Version ".ljust(60, "-")) |
556
|
|
|
order_func = get_section_order_function("Version", version) |
557
|
|
|
section_widths = get_section_widths("Version", self.version, version, order_func) |
558
|
|
|
for header_item in self.version.values(): |
559
|
|
|
mnemonic = header_item.original_mnemonic |
560
|
|
|
# logger.debug("LASFile.write " + str(header_item)) |
561
|
|
|
order = order_func(mnemonic) |
562
|
|
|
# logger.debug("LASFile.write order = %s" % (order, )) |
563
|
|
|
logger.debug('LASFile.write %s\norder=%s section_widths=%s' % (header_item, order, section_widths)) |
564
|
|
|
formatter_func = get_formatter_function(order, **section_widths) |
565
|
|
|
line = formatter_func(header_item) |
566
|
|
|
lines.append(line) |
567
|
|
|
|
568
|
|
|
# ~Well |
569
|
|
|
logger.debug('LASFile.write Well section') |
570
|
|
|
lines.append("~Well ".ljust(60, "-")) |
571
|
|
|
order_func = get_section_order_function("Well", version) |
572
|
|
|
section_widths = get_section_widths("Well", self.well, version, order_func) |
573
|
|
|
# logger.debug('LASFile.write well section_widths=%s' % section_widths) |
574
|
|
|
for header_item in self.well.values(): |
575
|
|
|
mnemonic = header_item.original_mnemonic |
576
|
|
|
order = order_func(mnemonic) |
577
|
|
|
logger.debug('LASFile.write %s\norder=%s section_widths=%s' % (header_item, order, section_widths)) |
578
|
|
|
formatter_func = get_formatter_function(order, **section_widths) |
579
|
|
|
line = formatter_func(header_item) |
580
|
|
|
lines.append(line) |
581
|
|
|
|
582
|
|
|
# ~Curves |
583
|
|
|
logger.debug('LASFile.write Curves section') |
584
|
|
|
lines.append("~Curves ".ljust(60, "-")) |
585
|
|
|
order_func = get_section_order_function("Curves", version) |
586
|
|
|
section_widths = get_section_widths("Curves", self.curves, version, order_func) |
587
|
|
|
for header_item in self.curves: |
588
|
|
|
mnemonic = header_item.original_mnemonic |
589
|
|
|
order = order_func(mnemonic) |
590
|
|
|
formatter_func = get_formatter_function(order, **section_widths) |
591
|
|
|
line = formatter_func(header_item) |
592
|
|
|
lines.append(line) |
593
|
|
|
|
594
|
|
|
# ~Params |
595
|
|
|
lines.append("~Params ".ljust(60, "-")) |
596
|
|
|
order_func = get_section_order_function("Parameter", version) |
597
|
|
|
section_widths = get_section_widths("Parameter", self.params, version, order_func) |
598
|
|
|
for header_item in self.params.values(): |
599
|
|
|
mnemonic = header_item.original_mnemonic |
600
|
|
|
order = order_func(mnemonic) |
601
|
|
|
formatter_func = get_formatter_function(order, **section_widths) |
602
|
|
|
line = formatter_func(header_item) |
603
|
|
|
lines.append(line) |
604
|
|
|
|
605
|
|
|
# ~Other |
606
|
|
|
lines.append("~Other ".ljust(60, "-")) |
607
|
|
|
lines += self.other.splitlines() |
608
|
|
|
|
609
|
|
|
lines.append("~ASCII ".ljust(60, "-")) |
610
|
|
|
|
611
|
|
|
file_object.write("\n".join(lines)) |
612
|
|
|
file_object.write("\n") |
613
|
|
|
|
614
|
|
|
data_arr = numpy.column_stack([c.data for c in self.curves]) |
615
|
|
|
nrows, ncols = data_arr.shape |
616
|
|
|
|
617
|
|
|
def format_data_section_line(n, fmt, l=10, spacer=" "): |
618
|
|
|
if numpy.isnan(n): |
619
|
|
|
return spacer + str(self.well["NULL"].value).rjust(l) |
620
|
|
|
else: |
621
|
|
|
return spacer + (fmt % n).rjust(l) |
622
|
|
|
|
623
|
|
|
twrapper = textwrap.TextWrapper(width=79) |
624
|
|
|
for i in range(nrows): |
625
|
|
|
depth_slice = '' |
626
|
|
|
for j in range(ncols): |
627
|
|
|
depth_slice += format_data_section_line(data_arr[i, j], fmt) |
628
|
|
|
|
629
|
|
|
if wrap: |
630
|
|
|
lines = twrapper.wrap(depth_slice) |
631
|
|
|
logger.debug("LASFile.write Wrapped %d lines out of %s" % |
632
|
|
|
(len(lines), depth_slice)) |
633
|
|
|
else: |
634
|
|
|
lines = [depth_slice] |
635
|
|
|
|
636
|
|
|
if self.version["VERS"].value == 1.2: |
637
|
|
|
for line in lines: |
638
|
|
|
if len(line) > 255: |
639
|
|
|
logger.warning("LASFile.write Data line > 256 chars: %s" % line) |
640
|
|
|
|
641
|
|
|
for line in lines: |
642
|
|
|
file_object.write(line + "\n") |
643
|
|
|
|
644
|
|
|
def get_curve(self, mnemonic): |
645
|
|
|
'''Return Curve object. |
646
|
|
|
|
647
|
|
|
Arguments: |
648
|
|
|
mnemonic (str): the name of the curve |
649
|
|
|
|
650
|
|
|
Returns: |
651
|
|
|
A Curve object, not just the data array. |
652
|
|
|
|
653
|
|
|
''' |
654
|
|
|
for curve in self.curves: |
655
|
|
|
if curve.mnemonic == mnemonic: |
656
|
|
|
return curve |
657
|
|
|
|
658
|
|
|
# def __getattr__(self, key): |
659
|
|
|
# # if hasattr(self, 'sections'): |
660
|
|
|
# # if key in self.sections['Curves']: |
661
|
|
|
# # return self[key] |
662
|
|
|
# # else: |
663
|
|
|
# # raise AttributeError |
664
|
|
|
# pass |
665
|
|
|
|
666
|
|
|
def __getitem__(self, key): |
667
|
|
|
if isinstance(key, int): |
668
|
|
|
return self.curves[key].data |
669
|
|
|
elif isinstance(key, str): |
670
|
|
|
if key in self.keys(): |
671
|
|
|
return self.curves[key].data |
672
|
|
|
else: |
673
|
|
|
super(LASFile, self).__getitem__(key) |
674
|
|
|
|
675
|
|
|
# def __setattr__(self, key, value): |
676
|
|
|
# assert NotImplementedError('not yet') |
677
|
|
|
|
678
|
|
|
def __setitem__(self, key, value): |
679
|
|
|
assert NotImplementedError('not yet') |
680
|
|
|
|
681
|
|
|
def keys(self): |
682
|
|
|
return [c.mnemonic for c in self.curves] |
683
|
|
|
|
684
|
|
|
def values(self): |
685
|
|
|
return [c.data for c in self.curves] |
686
|
|
|
|
687
|
|
|
def items(self): |
688
|
|
|
return [(c.mnemonic, c.data) for c in self.curves] |
689
|
|
|
|
690
|
|
|
def iterkeys(self): |
691
|
|
|
return iter(list(self.keys())) |
692
|
|
|
|
693
|
|
|
def itervalues(self): |
694
|
|
|
return iter(list(self.values())) |
695
|
|
|
|
696
|
|
|
def iteritems(self): |
697
|
|
|
return iter(list(self.items())) |
698
|
|
|
|
699
|
|
|
@property |
700
|
|
|
def version(self): |
701
|
|
|
return self.sections["Version"] |
702
|
|
|
|
703
|
|
|
@version.setter |
704
|
|
|
def version(self, section): |
705
|
|
|
self.sections["Version"] = section |
706
|
|
|
|
707
|
|
|
@property |
708
|
|
|
def well(self): |
709
|
|
|
return self.sections["Well"] |
710
|
|
|
|
711
|
|
|
@well.setter |
712
|
|
|
def well(self, section): |
713
|
|
|
self.sections["Well"] = section |
714
|
|
|
|
715
|
|
|
@property |
716
|
|
|
def curves(self): |
717
|
|
|
return self.sections["Curves"] |
718
|
|
|
|
719
|
|
|
@curves.setter |
720
|
|
|
def curves(self, section): |
721
|
|
|
self.sections["Curves"] = section |
722
|
|
|
|
723
|
|
|
@property |
724
|
|
|
def params(self): |
725
|
|
|
return self.sections["Parameter"] |
726
|
|
|
|
727
|
|
|
@params.setter |
728
|
|
|
def params(self, section): |
729
|
|
|
self.sections["Parameter"] = section |
730
|
|
|
|
731
|
|
|
@property |
732
|
|
|
def other(self): |
733
|
|
|
return self.sections["Other"] |
734
|
|
|
|
735
|
|
|
@other.setter |
736
|
|
|
def other(self, section): |
737
|
|
|
self.sections["Other"] = section |
738
|
|
|
|
739
|
|
|
|
740
|
|
|
@property |
741
|
|
|
def metadata(self): |
742
|
|
|
s = SectionItems() |
743
|
|
|
for section in self.sections: |
744
|
|
|
for item in section: |
745
|
|
|
s.append(item) |
746
|
|
|
return s |
747
|
|
|
|
748
|
|
|
@metadata.setter |
749
|
|
|
def metadata(self, value): |
750
|
|
|
raise Warning('Set values in the version/well/params attrs directly') |
751
|
|
|
|
752
|
|
|
@property |
753
|
|
|
def df(self): |
754
|
|
|
if self._use_pandas: |
755
|
|
|
return self._df |
756
|
|
|
else: |
757
|
|
|
logger.warning( |
758
|
|
|
"pandas is not installed or use_pandas was set to False") |
759
|
|
|
# raise Warning("pandas is not installed or use_pandas was set to False") |
760
|
|
|
|
761
|
|
|
@df.setter |
762
|
|
|
def df(self, value): |
763
|
|
|
self._df = value |
764
|
|
|
|
765
|
|
|
@property |
766
|
|
|
def index(self): |
767
|
|
|
return self.data[:, 0] |
768
|
|
|
|
769
|
|
|
@property |
770
|
|
|
def depth_m(self): |
771
|
|
|
if self.index_unit == "M": |
772
|
|
|
return self.index |
773
|
|
|
elif self.index_unit == "FT": |
774
|
|
|
return self.index * 0.3048 |
775
|
|
|
else: |
776
|
|
|
raise LASUnknownUnitError("Unit of depth index not known") |
777
|
|
|
|
778
|
|
|
@property |
779
|
|
|
def depth_ft(self): |
780
|
|
|
if self.index_unit == "M": |
781
|
|
|
return self.index / 0.3048 |
782
|
|
|
elif self.index_unit == "FT": |
783
|
|
|
return self.index |
784
|
|
|
else: |
785
|
|
|
raise LASUnknownUnitError("Unit of depth index not known") |
786
|
|
|
|
787
|
|
|
def add_curve(self, mnemonic, data, unit="", descr="", value=""): |
788
|
|
|
# assert not mnemonic in self.curves |
789
|
|
|
curve = CurveItem(mnemonic, unit, value, descr) |
790
|
|
|
curve.data = data |
791
|
|
|
self.curves[mnemonic] = curve |
792
|
|
|
self.refresh() |
793
|
|
|
|
794
|
|
|
@property |
795
|
|
|
def header(self): |
796
|
|
|
return self.sections |
797
|
|
|
|
798
|
|
|
|
799
|
|
|
class Las(LASFile): |
800
|
|
|
|
801
|
|
|
'''LAS file object. |
802
|
|
|
|
803
|
|
|
Retained for backwards compatibility. |
804
|
|
|
|
805
|
|
|
''' |
806
|
|
|
pass |
807
|
|
|
|
808
|
|
|
|
809
|
|
|
class Reader(object): |
810
|
|
|
|
811
|
|
|
def __init__(self, text, version): |
812
|
|
|
self.lines = text.splitlines() |
813
|
|
|
self.version = version |
814
|
|
|
self.null = numpy.nan |
815
|
|
|
self.wrap = True |
816
|
|
|
|
817
|
|
|
@property |
818
|
|
|
def section_names(self): |
819
|
|
|
names = [] |
820
|
|
|
for line in self.lines: |
821
|
|
|
line = line.strip().strip('\t').strip() |
822
|
|
|
if not line or line.startswith('#'): |
823
|
|
|
continue |
824
|
|
|
if line.startswith('~'): |
825
|
|
|
names.append(line) |
826
|
|
|
return names |
827
|
|
|
|
828
|
|
|
def iter_section_lines(self, section_name, ignore_comments=True): |
829
|
|
|
in_section = False |
830
|
|
|
for i, line in enumerate(self.lines): |
831
|
|
|
line = line.strip().strip('\t').strip() |
832
|
|
|
if not line: |
833
|
|
|
continue |
834
|
|
|
if ignore_comments and line.startswith('#'): |
835
|
|
|
continue |
836
|
|
|
if line.startswith(section_name): |
837
|
|
|
if in_section: |
838
|
|
|
return |
839
|
|
|
else: |
840
|
|
|
in_section = True |
841
|
|
|
continue |
842
|
|
|
if line.lower().startswith('~') and in_section: |
843
|
|
|
# Start of the next section; we're done here. |
844
|
|
|
break |
845
|
|
|
if in_section: |
846
|
|
|
yield line |
847
|
|
|
|
848
|
|
|
def read_raw_text(self, section_name): |
849
|
|
|
return '\n'.join(self.iter_section_lines(section_name, |
850
|
|
|
ignore_comments=False)) |
851
|
|
|
|
852
|
|
|
def read_section(self, section_name): |
853
|
|
|
parser = SectionParser(section_name, version=self.version) |
854
|
|
|
section = SectionItems() |
855
|
|
|
for line in self.iter_section_lines(section_name): |
856
|
|
|
try: |
857
|
|
|
values = read_line(line) |
858
|
|
|
except: |
859
|
|
|
raise LASHeaderError("Failed in %s section on line:\n%s%s" % ( |
860
|
|
|
section_name, line, |
861
|
|
|
traceback.format_exc().splitlines()[-1])) |
862
|
|
|
else: |
863
|
|
|
section.append(parser(**values)) |
864
|
|
|
return section |
865
|
|
|
|
866
|
|
|
def read_data(self, number_of_curves=None, null_subs=True): |
867
|
|
|
s = self.read_data_string() |
868
|
|
|
if not self.wrap: |
869
|
|
|
try: |
870
|
|
|
arr = numpy.loadtxt(StringIO(s)) |
871
|
|
|
except: |
872
|
|
|
raise LASDataError("Failed to read data:\n%s" % ( |
873
|
|
|
traceback.format_exc().splitlines()[-1])) |
874
|
|
|
else: |
875
|
|
|
eol_chars = r"[\n\t\r]" |
876
|
|
|
s = re.sub(eol_chars, " ", s) |
877
|
|
|
try: |
878
|
|
|
arr = numpy.loadtxt(StringIO(s)) |
879
|
|
|
except: |
880
|
|
|
raise LASDataError("Failed to read wrapped data: %s" % ( |
881
|
|
|
traceback.format_exc().splitlines()[-1])) |
882
|
|
|
logger.debug('Reader.read_data arr shape = %s' % (arr.shape)) |
883
|
|
|
logger.debug('Reader.read_data number of curves = %s' % number_of_curves) |
884
|
|
|
arr = numpy.reshape(arr, (-1, number_of_curves)) |
885
|
|
|
if not arr.shape or (arr.ndim == 1 and arr.shape[0] == 0): |
886
|
|
|
logger.warning('Reader.read_dataN o data present.') |
887
|
|
|
return None, None |
888
|
|
|
else: |
889
|
|
|
logger.info('Reader.read_data LAS file shape = %s' % str(arr.shape)) |
890
|
|
|
logger.debug('Reader.read_data checking for nulls (NULL = %s)' % self.null) |
891
|
|
|
if null_subs: |
892
|
|
|
arr[arr == self.null] = numpy.nan |
893
|
|
|
return arr |
894
|
|
|
|
895
|
|
|
def read_data_string(self): |
896
|
|
|
start_data = None |
897
|
|
|
for i, line in enumerate(self.lines): |
898
|
|
|
line = line.strip().strip('\t').strip() |
899
|
|
|
if line.startswith('~A'): |
900
|
|
|
start_data = i + 1 |
901
|
|
|
break |
902
|
|
|
s = '\n'.join(self.lines[start_data:]) |
903
|
|
|
s = re.sub(r'(\d)-(\d)', r'\1 -\2', s) |
904
|
|
|
s = re.sub('-?\d*\.\d*\.\d*', ' NaN NaN ', s) |
905
|
|
|
s = re.sub('NaN.\d*', ' NaN NaN ', s) |
906
|
|
|
return s |
907
|
|
|
|
908
|
|
|
|
909
|
|
|
class SectionParser(object): |
910
|
|
|
|
911
|
|
|
def __init__(self, section_name, version=1.2): |
912
|
|
|
if section_name.startswith('~C'): |
913
|
|
|
self.func = self.curves |
914
|
|
|
elif section_name.startswith('~P'): |
915
|
|
|
self.func = self.params |
916
|
|
|
else: |
917
|
|
|
self.func = self.metadata |
918
|
|
|
|
919
|
|
|
self.version = version |
920
|
|
|
self.section_name = section_name |
921
|
|
|
self.section_name2 = {"~C": "Curves", |
922
|
|
|
"~W": "Well", |
923
|
|
|
"~V": "Version", |
924
|
|
|
"~P": "Parameter"}[section_name] |
925
|
|
|
|
926
|
|
|
section_orders = ORDER_DEFINITIONS[self.version][self.section_name2] |
927
|
|
|
self.default_order = section_orders[0] |
928
|
|
|
self.orders = {} |
929
|
|
|
for order, mnemonics in section_orders[1:]: |
930
|
|
|
for mnemonic in mnemonics: |
931
|
|
|
self.orders[mnemonic] = order |
932
|
|
|
|
933
|
|
|
def __call__(self, **keys): |
934
|
|
|
item = self.func(**keys) |
935
|
|
|
# if item.name == "": |
936
|
|
|
# item.mnemonic = "UNKNOWN" |
937
|
|
|
return item |
938
|
|
|
|
939
|
|
|
def num(self, x, default=None): |
940
|
|
|
if default is None: |
941
|
|
|
default = x |
942
|
|
|
try: |
943
|
|
|
return numpy.int(x) |
944
|
|
|
except: |
945
|
|
|
try: |
946
|
|
|
return numpy.float(x) |
947
|
|
|
except: |
948
|
|
|
return default |
949
|
|
|
|
950
|
|
|
def metadata(self, **keys): |
951
|
|
|
key_order = self.orders.get(keys["name"], self.default_order) |
952
|
|
|
if key_order == "value:descr": |
953
|
|
|
return HeaderItem( |
954
|
|
|
keys["name"], # mnemonic |
955
|
|
|
keys["unit"], # unit |
956
|
|
|
self.num(keys["value"]), # value |
957
|
|
|
keys["descr"], # descr |
958
|
|
|
) |
959
|
|
|
elif key_order == "descr:value": |
960
|
|
|
return HeaderItem( |
961
|
|
|
keys["name"], # mnemonic |
962
|
|
|
keys["unit"], # unit |
963
|
|
|
keys["descr"], # descr |
964
|
|
|
self.num(keys["value"]), # value |
965
|
|
|
) |
966
|
|
|
|
967
|
|
|
def curves(self, **keys): |
968
|
|
|
# logger.debug(str(keys)) |
969
|
|
|
item = CurveItem( |
970
|
|
|
keys['name'], # mnemonic |
971
|
|
|
keys['unit'], # unit |
972
|
|
|
keys['value'], # value |
973
|
|
|
keys['descr'], # descr |
974
|
|
|
) |
975
|
|
|
return item |
976
|
|
|
|
977
|
|
|
def params(self, **keys): |
978
|
|
|
return HeaderItem( |
979
|
|
|
keys['name'], # mnemonic |
980
|
|
|
keys['unit'], # unit |
981
|
|
|
self.num(keys['value']), # value |
982
|
|
|
keys['descr'], # descr |
983
|
|
|
) |
984
|
|
|
|
985
|
|
|
|
986
|
|
|
def read_line(line, pattern=None): |
987
|
|
|
'''Read a line from a LAS header section. |
988
|
|
|
|
989
|
|
|
The line is parsed with a regular expression -- see LAS file specs for |
990
|
|
|
more details, but it should basically be in the format:: |
991
|
|
|
|
992
|
|
|
name.unit value : descr |
993
|
|
|
|
994
|
|
|
Arguments: |
995
|
|
|
line (str): line from a LAS header section |
996
|
|
|
|
997
|
|
|
Returns: |
998
|
|
|
A dictionary with keys "name", "unit", "value", and "descr", each |
999
|
|
|
containing a string as value. |
1000
|
|
|
|
1001
|
|
|
''' |
1002
|
|
|
d = {} |
1003
|
|
|
if pattern is None: |
1004
|
|
|
pattern = (r"\.?(?P<name>[^.]*)\." + |
1005
|
|
|
r"(?P<unit>[^\s:]*)" + |
1006
|
|
|
r"(?P<value>[^:]*):" + |
1007
|
|
|
r"(?P<descr>.*)") |
1008
|
|
|
m = re.match(pattern, line) |
1009
|
|
|
mdict = m.groupdict() |
1010
|
|
|
# if mdict["name"] == "": |
1011
|
|
|
# mdict["name"] = "UNKNOWN" |
1012
|
|
|
for key, value in mdict.items(): |
1013
|
|
|
d[key] = value.strip() |
1014
|
|
|
if key == "unit": |
1015
|
|
|
if d[key].endswith("."): |
1016
|
|
|
d[key] = d[key].strip(".") # see issue #36 |
1017
|
|
|
return d |
1018
|
|
|
|
1019
|
|
|
|
1020
|
|
|
def open_file(file_ref, encoding=None, encoding_errors="replace", |
1021
|
|
|
autodetect_encoding=False, autodetect_encoding_chars=40e3): |
1022
|
|
|
'''Open a file if necessary. |
1023
|
|
|
|
1024
|
|
|
If autodetect_encoding is True then either cchardet or chardet (see PyPi) |
1025
|
|
|
needs to be installed, or else an ImportError will be raised. |
1026
|
|
|
|
1027
|
|
|
Arguments: |
1028
|
|
|
file_ref: either a filename, an open file object, a URL, or a string of |
1029
|
|
|
a LAS file contents. |
1030
|
|
|
|
1031
|
|
|
Keyword Arguments: |
1032
|
|
|
encoding (str): character encoding to open file_ref with |
1033
|
|
|
encoding_errors (str): "strict", "replace" (default), "ignore" - how to |
1034
|
|
|
handle errors with encodings (see standard library codecs module or |
1035
|
|
|
Python Unicode HOWTO for more information) |
1036
|
|
|
autodetect_encoding (bool): use chardet/ccharet to detect encoding |
1037
|
|
|
autodetect_encoding_chars (int/None): number of chars to read from LAS |
1038
|
|
|
file for auto-detection of encoding. |
1039
|
|
|
|
1040
|
|
|
Returns: |
1041
|
|
|
An open file-like object ready for reading from. |
1042
|
|
|
|
1043
|
|
|
''' |
1044
|
|
|
if isinstance(file_ref, str): |
1045
|
|
|
lines = file_ref.splitlines() |
1046
|
|
|
if len(lines) == 1: # File name |
1047
|
|
|
if URL_REGEXP.match(file_ref): |
1048
|
|
|
try: |
1049
|
|
|
import urllib2 |
1050
|
|
|
file_ref = urllib2.urlopen(file_ref) |
1051
|
|
|
except ImportError: |
1052
|
|
|
import urllib.request |
1053
|
|
|
response = urllib.request.urlopen(file_ref) |
1054
|
|
|
enc = response.headers.get_content_charset("utf-8") |
1055
|
|
|
file_ref = StringIO(response.read().decode(enc)) |
1056
|
|
|
else: # filename |
1057
|
|
|
data = get_unicode_from_filename( |
1058
|
|
|
file_ref, encoding, encoding_errors, autodetect_encoding, |
1059
|
|
|
autodetect_encoding_chars) |
1060
|
|
|
file_ref = StringIO(data) |
1061
|
|
|
else: |
1062
|
|
|
file_ref = StringIO("\n".join(lines)) |
1063
|
|
|
return file_ref |
1064
|
|
|
|
1065
|
|
|
|
1066
|
|
|
def get_unicode_from_filename(fn, enc, errors, auto, nbytes): |
1067
|
|
|
''' |
1068
|
|
|
Read Unicode data from file. |
1069
|
|
|
|
1070
|
|
|
Arguments: |
1071
|
|
|
fn (str): path to file |
1072
|
|
|
enc (str): encoding - can be None |
1073
|
|
|
errors (str): unicode error handling - can be "strict", "ignore", "replace" |
1074
|
|
|
auto (str): auto-detection of character encoding - can be either |
1075
|
|
|
"chardet", "cchardet", or True |
1076
|
|
|
nbytes (int): number of characters for read for auto-detection |
1077
|
|
|
|
1078
|
|
|
Returns: |
1079
|
|
|
a unicode or string object |
1080
|
|
|
|
1081
|
|
|
''' |
1082
|
|
|
if nbytes: |
1083
|
|
|
nbytes = int(nbytes) |
1084
|
|
|
|
1085
|
|
|
# Detect BOM in UTF-8 files |
1086
|
|
|
|
1087
|
|
|
nbytes_test = min(32, os.path.getsize(fn)) |
1088
|
|
|
with open(fn, mode="rb") as test: |
1089
|
|
|
raw = test.read(nbytes_test) |
1090
|
|
|
if raw.startswith(codecs.BOM_UTF8): |
1091
|
|
|
enc = "utf-8-sig" |
1092
|
|
|
auto = False |
1093
|
|
|
|
1094
|
|
|
if auto: |
1095
|
|
|
with open(fn, mode="rb") as test: |
1096
|
|
|
if nbytes is None: |
1097
|
|
|
raw = test.read() |
1098
|
|
|
else: |
1099
|
|
|
raw = test.read(nbytes) |
1100
|
|
|
enc = get_encoding(auto, raw) |
1101
|
|
|
|
1102
|
|
|
# codecs.open is smarter than cchardet or chardet IME. |
1103
|
|
|
|
1104
|
|
|
with codecs.open(fn, mode="r", encoding=enc, errors=errors) as f: |
1105
|
|
|
data = f.read() |
1106
|
|
|
|
1107
|
|
|
return data |
1108
|
|
|
|
1109
|
|
|
|
1110
|
|
|
def get_encoding(auto, raw): |
1111
|
|
|
''' |
1112
|
|
|
Automatically detect character encoding. |
1113
|
|
|
|
1114
|
|
|
Arguments: |
1115
|
|
|
auto (str): auto-detection of character encoding - can be either |
1116
|
|
|
"chardet", "cchardet", or True |
1117
|
|
|
raw (bytes): array of bytes to detect from |
1118
|
|
|
|
1119
|
|
|
Returns: |
1120
|
|
|
A string specifying the character encoding. |
1121
|
|
|
|
1122
|
|
|
''' |
1123
|
|
|
if auto is True: |
1124
|
|
|
try: |
1125
|
|
|
import cchardet as chardet |
1126
|
|
|
except ImportError: |
1127
|
|
|
try: |
1128
|
|
|
import chardet |
1129
|
|
|
except ImportError: |
1130
|
|
|
raise ImportError( |
1131
|
|
|
"chardet or cchardet is required for automatic" |
1132
|
|
|
" detection of character encodings.") |
1133
|
|
|
else: |
1134
|
|
|
logger.debug("get_encoding Using chardet") |
1135
|
|
|
method = "chardet" |
1136
|
|
|
else: |
1137
|
|
|
logger.debug("get_encoding Using cchardet") |
1138
|
|
|
method = "cchardet" |
1139
|
|
|
elif auto.lower() == "chardet": |
1140
|
|
|
import chardet |
1141
|
|
|
logger.debug("get_encoding Using chardet") |
1142
|
|
|
method = "chardet" |
1143
|
|
|
elif auto.lower() == "cchardet": |
1144
|
|
|
import cchardet as chardet |
1145
|
|
|
logger.debug("get_encoding Using cchardet") |
1146
|
|
|
method = "cchardet" |
1147
|
|
|
|
1148
|
|
|
result = chardet.detect(raw) |
1149
|
|
|
logger.debug("get_encoding %s results=%s" % (method, result)) |
1150
|
|
|
return result["encoding"] |
1151
|
|
|
|
1152
|
|
|
|
1153
|
|
|
def get_formatter_function(order, left_width=None, middle_width=None): |
1154
|
|
|
'''Create function to format a LAS header item. |
1155
|
|
|
|
1156
|
|
|
Arguments: |
1157
|
|
|
order: format of item, either "descr:value" or "value:descr" -- see |
1158
|
|
|
LAS 1.2 and 2.0 specifications for more information. |
1159
|
|
|
|
1160
|
|
|
Keyword Arguments: |
1161
|
|
|
left_width (int): number of characters to the left hand side of the |
1162
|
|
|
first period |
1163
|
|
|
middle_width (int): total number of characters minus 1 between the |
1164
|
|
|
first period from the left and the first colon from the left. |
1165
|
|
|
|
1166
|
|
|
Returns: |
1167
|
|
|
A function which takes a header item (e.g. LASHeaderItem or Curve) |
1168
|
|
|
as its single argument and which in turn returns a string which is |
1169
|
|
|
the correctly formatted LAS header line. |
1170
|
|
|
|
1171
|
|
|
''' |
1172
|
|
|
if left_width is None: |
1173
|
|
|
left_width = 10 |
1174
|
|
|
if middle_width is None: |
1175
|
|
|
middle_width = 40 |
1176
|
|
|
mnemonic_func = lambda mnemonic: mnemonic.ljust(left_width) |
1177
|
|
|
middle_func = lambda unit, right_hand_item: ( |
1178
|
|
|
unit |
1179
|
|
|
+ " " * (middle_width - len(str(unit)) - len(right_hand_item)) |
1180
|
|
|
+ right_hand_item |
1181
|
|
|
) |
1182
|
|
|
if order == "descr:value": |
1183
|
|
|
return lambda item: "%s.%s : %s" % ( |
1184
|
|
|
mnemonic_func(item.original_mnemonic), |
1185
|
|
|
middle_func(str(item.unit), str(item.descr)), |
1186
|
|
|
item.value |
1187
|
|
|
) |
1188
|
|
|
elif order == "value:descr": |
1189
|
|
|
return lambda item: "%s.%s : %s" % ( |
1190
|
|
|
mnemonic_func(item.original_mnemonic), |
1191
|
|
|
middle_func(str(item.unit), str(item.value)), |
1192
|
|
|
item.descr |
1193
|
|
|
) |
1194
|
|
|
|
1195
|
|
|
|
1196
|
|
|
def get_section_order_function(section, version, |
1197
|
|
|
order_definitions=ORDER_DEFINITIONS): |
1198
|
|
|
'''Get a function that returns the order per mnemonic and section. |
1199
|
|
|
|
1200
|
|
|
Arguments: |
1201
|
|
|
section (str): either "well", "params", "curves", "version" |
1202
|
|
|
version (float): either 1.2 and 2.0 |
1203
|
|
|
|
1204
|
|
|
Keyword Arguments: |
1205
|
|
|
order_definitions (dict): ... |
1206
|
|
|
|
1207
|
|
|
Returns: |
1208
|
|
|
A function which takes a mnemonic (str) as its only argument, and |
1209
|
|
|
in turn returns the order "value:descr" or "descr:value". |
1210
|
|
|
|
1211
|
|
|
''' |
1212
|
|
|
section_orders = order_definitions[version][section] |
1213
|
|
|
default_order = section_orders[0] |
1214
|
|
|
orders = {} |
1215
|
|
|
for order, mnemonics in section_orders[1:]: |
1216
|
|
|
for mnemonic in mnemonics: |
1217
|
|
|
orders[mnemonic] = order |
1218
|
|
|
return lambda mnemonic: orders.get(mnemonic, default_order) |
1219
|
|
|
|
1220
|
|
|
|
1221
|
|
|
def get_section_widths(section_name, items, version, order_func, middle_padding=5): |
1222
|
|
|
'''Find minimum section widths fitting the content in *items*. |
1223
|
|
|
|
1224
|
|
|
Arguments: |
1225
|
|
|
section_name (str): either "version", "well", "curves", or "params" |
1226
|
|
|
items (SectionItems): section items |
1227
|
|
|
version (float): either 1.2 or 2.0 |
1228
|
|
|
|
1229
|
|
|
''' |
1230
|
|
|
section_widths = { |
1231
|
|
|
"left_width": None, |
1232
|
|
|
"middle_width": None |
1233
|
|
|
} |
1234
|
|
|
if len(items) > 0: |
1235
|
|
|
section_widths["left_width"] = max([len(i.original_mnemonic) for i in items]) |
1236
|
|
|
middle_widths = [] |
1237
|
|
|
for i in items: |
1238
|
|
|
order = order_func(i.mnemonic) |
1239
|
|
|
rhs_element = order.split(':')[0] |
1240
|
|
|
logger.debug('get_section_widths %s\n\torder=%s rhs_element=%s' % (i, order, rhs_element)) |
1241
|
|
|
middle_widths.append(len(str(i.unit)) + 1 + len(str(i[rhs_element]))) |
1242
|
|
|
section_widths['middle_width'] = max(middle_widths) |
1243
|
|
|
return section_widths |
1244
|
|
|
|
1245
|
|
|
|
1246
|
|
|
def read(file_ref, **kwargs): |
1247
|
|
|
'''Read a LAS file. |
1248
|
|
|
|
1249
|
|
|
Note that only versions 1.2 and 2.0 of the LAS file specification |
1250
|
|
|
are currently supported. |
1251
|
|
|
|
1252
|
|
|
Arguments: |
1253
|
|
|
file_ref: either a filename, an open file object, or a string of |
1254
|
|
|
a LAS file contents. |
1255
|
|
|
|
1256
|
|
|
Keyword Arguments: |
1257
|
|
|
encoding (str): character encoding to open file_ref with |
1258
|
|
|
encoding_errors (str): "strict", "replace" (default), "ignore" - how to |
1259
|
|
|
handle errors with encodings (see standard library codecs module or |
1260
|
|
|
Python Unicode HOWTO for more information) |
1261
|
|
|
autodetect_encoding (bool): use chardet/ccharet to detect encoding |
1262
|
|
|
autodetect_encoding_chars (int/None): number of chars to read from LAS |
1263
|
|
|
file for auto-detection of encoding. |
1264
|
|
|
|
1265
|
|
|
Returns: |
1266
|
|
|
A LASFile object representing the file -- see above |
1267
|
|
|
|
1268
|
|
|
''' |
1269
|
|
|
return LASFile(file_ref, **kwargs) |
1270
|
|
|
|