|
1
|
|
|
import codecs |
|
2
|
|
|
import logging |
|
3
|
|
|
import os |
|
4
|
|
|
import re |
|
5
|
|
|
import textwrap |
|
6
|
|
|
import traceback |
|
7
|
|
|
|
|
8
|
|
|
import numpy as np |
|
9
|
|
|
|
|
10
|
|
|
from . import defaults |
|
11
|
|
|
|
|
12
|
|
|
# Convoluted import for StringIO in order to support: |
|
13
|
|
|
# |
|
14
|
|
|
# - Python 3 - io.StringIO |
|
15
|
|
|
# - Python 2 (optimized) - cStringIO.StringIO |
|
16
|
|
|
# - Python 2 (all) - StringIO.StringIO |
|
17
|
|
|
|
|
18
|
|
|
try: |
|
19
|
|
|
import cStringIO as StringIO |
|
20
|
|
|
except ImportError: |
|
21
|
|
|
try: # cStringIO not available on this system |
|
22
|
|
|
import StringIO |
|
23
|
|
|
except ImportError: # Python 3 |
|
24
|
|
|
from io import StringIO |
|
25
|
|
|
else: |
|
26
|
|
|
from StringIO import StringIO |
|
27
|
|
|
else: |
|
28
|
|
|
from StringIO import StringIO |
|
29
|
|
|
|
|
30
|
|
|
from . import defaults |
|
31
|
|
|
from . import exceptions |
|
32
|
|
|
from .las_items import HeaderItem, CurveItem, SectionItems, OrderedDict |
|
33
|
|
|
|
|
34
|
|
|
|
|
35
|
|
|
logger = logging.getLogger(__name__) |
|
36
|
|
|
|
|
37
|
|
|
URL_REGEXP = re.compile( |
|
38
|
|
|
r'^(?:http|ftp)s?://' # http:// or https:// |
|
39
|
|
|
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}' |
|
40
|
|
|
r'\.?|[A-Z0-9-]{2,}\.?)|' # (cont.) domain... |
|
41
|
|
|
r'localhost|' # localhost... |
|
42
|
|
|
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip |
|
43
|
|
|
r'(?::\d+)?' # optional port |
|
44
|
|
|
r'(?:/?|[/?]\S+)$', re.IGNORECASE) |
|
45
|
|
|
|
|
46
|
|
|
|
|
47
|
|
|
def open_file(file_ref, **encoding_kwargs): |
|
48
|
|
|
'''Open a file if necessary. |
|
49
|
|
|
|
|
50
|
|
|
If ``autodetect_encoding=True`` then either ``cchardet`` or ``chardet`` |
|
51
|
|
|
needs to be installed, or else an ``ImportError`` will be raised. |
|
52
|
|
|
|
|
53
|
|
|
Arguments: |
|
54
|
|
|
file_ref (file-like object, str): either a filename, an open file |
|
55
|
|
|
object, or a string containing the contents of a file. |
|
56
|
|
|
|
|
57
|
|
|
See :func:`lasio.reader.open_with_codecs` for keyword arguments that can be |
|
58
|
|
|
used here. |
|
59
|
|
|
|
|
60
|
|
|
Returns: |
|
61
|
|
|
tuple of an open file-like object, and the encoding that |
|
62
|
|
|
was used to decode it (if it were read from disk). |
|
63
|
|
|
|
|
64
|
|
|
''' |
|
65
|
|
|
encoding = None |
|
66
|
|
|
if isinstance(file_ref, str): # file_ref != file-like object, so what is it? |
|
67
|
|
|
lines = file_ref.splitlines() |
|
68
|
|
|
first_line = lines[0] |
|
69
|
|
|
if URL_REGEXP.match(first_line): # it's a URL |
|
70
|
|
|
logger.info('Loading URL {}'.format(first_line)) |
|
71
|
|
|
try: |
|
72
|
|
|
import urllib2 |
|
73
|
|
|
response = urllib2.urlopen(first_line) |
|
74
|
|
|
encoding = response.headers.getparam('charset') |
|
75
|
|
|
file_ref = StringIO(response.read()) |
|
76
|
|
|
logger.debug('Retrieved data had encoding {}'.format(encoding)) |
|
77
|
|
|
except ImportError: |
|
78
|
|
|
import urllib.request |
|
79
|
|
|
response = urllib.request.urlopen(file_ref) |
|
80
|
|
|
encoding = response.headers.get_content_charset() |
|
81
|
|
|
file_ref = StringIO(response.read().decode(encoding)) |
|
82
|
|
|
logger.debug('Retrieved data decoded via {}'.format(encoding)) |
|
83
|
|
|
elif len(lines) > 1: # it's LAS data as a string. |
|
84
|
|
|
file_ref = StringIO(file_ref) |
|
85
|
|
|
else: # it must be a filename |
|
86
|
|
|
file_ref, encoding = open_with_codecs(first_line, **encoding_kwargs) |
|
87
|
|
|
return file_ref, encoding |
|
88
|
|
|
|
|
89
|
|
|
|
|
90
|
|
|
def open_with_codecs(filename, encoding=None, encoding_errors='replace', |
|
91
|
|
|
autodetect_encoding=True, autodetect_encoding_chars=4000): |
|
92
|
|
|
''' |
|
93
|
|
|
Read Unicode data from file. |
|
94
|
|
|
|
|
95
|
|
|
Arguments: |
|
96
|
|
|
filename (str): path to file |
|
97
|
|
|
|
|
98
|
|
|
Keyword Arguments: |
|
99
|
|
|
encoding (str): character encoding to open file_ref with, using |
|
100
|
|
|
:func:`codecs.open`. |
|
101
|
|
|
encoding_errors (str): 'strict', 'replace' (default), 'ignore' - how to |
|
102
|
|
|
handle errors with encodings (see |
|
103
|
|
|
`this section |
|
104
|
|
|
<https://docs.python.org/3/library/codecs.html#codec-base-classes>`__ |
|
105
|
|
|
of the standard library's :mod:`codecs` module for more information) |
|
106
|
|
|
autodetect_encoding (str or bool): default True to use |
|
107
|
|
|
`chardet <https://github.com/chardet/chardet>`__/`cchardet |
|
108
|
|
|
<https://github.com/PyYoshi/cChardet>`__ to detect encoding. |
|
109
|
|
|
Note if set to False several common encodings will be tried but |
|
110
|
|
|
chardet won't be used. |
|
111
|
|
|
autodetect_encoding_chars (int/None): number of chars to read from LAS |
|
112
|
|
|
file for auto-detection of encoding. |
|
113
|
|
|
|
|
114
|
|
|
Returns: |
|
115
|
|
|
a unicode or string object |
|
116
|
|
|
|
|
117
|
|
|
This function is called by :func:`lasio.reader.open_file`. |
|
118
|
|
|
|
|
119
|
|
|
''' |
|
120
|
|
|
if autodetect_encoding_chars: |
|
121
|
|
|
nbytes = int(autodetect_encoding_chars) |
|
122
|
|
|
else: |
|
123
|
|
|
nbytes = None |
|
124
|
|
|
|
|
125
|
|
|
# Forget [c]chardet - if we can locate the BOM we just assume that's correct. |
|
126
|
|
|
nbytes_test = min(32, os.path.getsize(filename)) |
|
127
|
|
|
with open(filename, mode='rb') as test: |
|
128
|
|
|
raw = test.read(nbytes_test) |
|
129
|
|
|
if raw.startswith(codecs.BOM_UTF8): |
|
130
|
|
|
encoding = 'utf-8-sig' |
|
131
|
|
|
autodetect_encoding = False |
|
132
|
|
|
|
|
133
|
|
|
# If BOM wasn't found... |
|
134
|
|
|
if (autodetect_encoding) and (not encoding): |
|
135
|
|
|
with open(filename, mode='rb') as test: |
|
136
|
|
|
if nbytes is None: |
|
137
|
|
|
raw = test.read() |
|
138
|
|
|
else: |
|
139
|
|
|
raw = test.read(nbytes) |
|
140
|
|
|
encoding = get_encoding(autodetect_encoding, raw) |
|
141
|
|
|
autodetect_encoding = False |
|
142
|
|
|
|
|
143
|
|
|
# Or if no BOM found & chardet not installed |
|
144
|
|
|
if (not autodetect_encoding) and (not encoding): |
|
145
|
|
|
encoding = adhoc_test_encoding(filename) |
|
146
|
|
|
if encoding: |
|
147
|
|
|
logger.info('{} was found by ad hoc to work but note it might not' |
|
148
|
|
|
' be the correct encoding'.format(encoding)) |
|
149
|
|
|
|
|
150
|
|
|
# Now open and return the file-like object |
|
151
|
|
|
logger.info('Opening {} as {} and treating errors with "{}"'.format( |
|
152
|
|
|
filename, encoding, encoding_errors)) |
|
153
|
|
|
file_obj = codecs.open(filename, mode='r', encoding=encoding, |
|
154
|
|
|
errors=encoding_errors) |
|
155
|
|
|
return file_obj, encoding |
|
156
|
|
|
|
|
157
|
|
|
|
|
158
|
|
|
def adhoc_test_encoding(filename): |
|
159
|
|
|
test_encodings = ['ascii', 'windows-1252', 'latin-1'] |
|
160
|
|
|
for i in test_encodings: |
|
161
|
|
|
encoding = i |
|
162
|
|
|
with codecs.open(filename, mode='r', encoding=encoding) as f: |
|
163
|
|
|
try: |
|
164
|
|
|
f.readline() |
|
165
|
|
|
break |
|
166
|
|
|
except UnicodeDecodeError: |
|
167
|
|
|
logger.debug('{} tested, raised UnicodeDecodeError'.format(i)) |
|
168
|
|
|
pass |
|
169
|
|
|
encoding = None |
|
170
|
|
|
return encoding |
|
171
|
|
|
|
|
172
|
|
|
|
|
173
|
|
|
def get_encoding(auto, raw): |
|
174
|
|
|
''' |
|
175
|
|
|
Automatically detect character encoding. |
|
176
|
|
|
|
|
177
|
|
|
Arguments: |
|
178
|
|
|
auto (str): auto-detection of character encoding - can be either |
|
179
|
|
|
'chardet', 'cchardet', False, or True (the latter will pick the |
|
180
|
|
|
fastest available option) |
|
181
|
|
|
raw (bytes): array of bytes to detect from |
|
182
|
|
|
|
|
183
|
|
|
Returns: |
|
184
|
|
|
A string specifying the character encoding. |
|
185
|
|
|
|
|
186
|
|
|
''' |
|
187
|
|
|
if auto is True: |
|
188
|
|
|
try: |
|
189
|
|
|
import cchardet as chardet |
|
190
|
|
|
except ImportError: |
|
191
|
|
|
try: |
|
192
|
|
|
import chardet |
|
193
|
|
|
except ImportError: |
|
194
|
|
|
logger.debug('chardet or cchardet is recommended for automatic' |
|
195
|
|
|
' detection of character encodings. Instead trying some' |
|
196
|
|
|
' common encodings.') |
|
197
|
|
|
return None |
|
198
|
|
|
else: |
|
199
|
|
|
logger.debug('get_encoding Using chardet') |
|
200
|
|
|
method = 'chardet' |
|
201
|
|
|
else: |
|
202
|
|
|
logger.debug('get_encoding Using cchardet') |
|
203
|
|
|
method = 'cchardet' |
|
204
|
|
|
elif auto.lower() == 'chardet': |
|
205
|
|
|
import chardet |
|
206
|
|
|
logger.debug('get_encoding Using chardet') |
|
207
|
|
|
method = 'chardet' |
|
208
|
|
|
elif auto.lower() == 'cchardet': |
|
209
|
|
|
import cchardet as chardet |
|
210
|
|
|
logger.debug('get_encoding Using cchardet') |
|
211
|
|
|
method = 'cchardet' |
|
212
|
|
|
result = chardet.detect(raw) |
|
213
|
|
|
logger.debug('{} method detected encoding of {} at confidence {}'.format( |
|
214
|
|
|
method, result['encoding'], result['confidence'])) |
|
215
|
|
|
return result['encoding'] |
|
216
|
|
|
|
|
217
|
|
|
|
|
218
|
|
|
def read_file_contents(file_obj, regexp_subs, value_null_subs, |
|
219
|
|
|
ignore_data=False): |
|
220
|
|
|
'''Read file contents into memory. |
|
221
|
|
|
|
|
222
|
|
|
Arguments: |
|
223
|
|
|
file_obj (open file-like object) |
|
224
|
|
|
|
|
225
|
|
|
Keyword Arguments: |
|
226
|
|
|
null_subs (bool): True will substitute ``numpy.nan`` for invalid values |
|
227
|
|
|
ignore_data (bool): if True, do not read in the numerical data in the |
|
228
|
|
|
~ASCII section |
|
229
|
|
|
|
|
230
|
|
|
Returns: |
|
231
|
|
|
OrderedDict |
|
232
|
|
|
|
|
233
|
|
|
I think of the returned dictionary as a "raw section". The keys are |
|
234
|
|
|
the first line of the LAS section, including the tilde. Each value is |
|
235
|
|
|
a dict with either:: |
|
236
|
|
|
|
|
237
|
|
|
{"section_type": "header", |
|
238
|
|
|
"title": str, # title of section (including the ~) |
|
239
|
|
|
"lines": [str, ], # a list of the lines from the lAS file |
|
240
|
|
|
"line_nos": [int, ] # line nos from the original file |
|
241
|
|
|
} |
|
242
|
|
|
|
|
243
|
|
|
or:: |
|
244
|
|
|
|
|
245
|
|
|
{"section_type": "data", |
|
246
|
|
|
"title": str, # title of section (including the ~) |
|
247
|
|
|
"start_line": int, # location of data section (the title line) |
|
248
|
|
|
"ncols": int, # no. of columns on first line of data, |
|
249
|
|
|
"array": ndarray # 1-D numpy.ndarray, |
|
250
|
|
|
} |
|
251
|
|
|
|
|
252
|
|
|
''' |
|
253
|
|
|
sections = OrderedDict() |
|
254
|
|
|
sect_lines = [] |
|
255
|
|
|
sect_line_nos = [] |
|
256
|
|
|
sect_title_line = None |
|
257
|
|
|
|
|
258
|
|
|
for i, line in enumerate(file_obj): |
|
259
|
|
|
line = line.strip() |
|
260
|
|
|
if not line: |
|
261
|
|
|
continue |
|
262
|
|
|
if line.upper().startswith('~A'): |
|
263
|
|
|
# HARD CODED FOR VERSION 1.2 and 2.0; needs review for 3.0 |
|
264
|
|
|
# We have finished looking at the metadata and need |
|
265
|
|
|
# to start reading numerical data. |
|
266
|
|
|
if not sect_title_line is None: |
|
267
|
|
|
sections[sect_title_line] = { |
|
268
|
|
|
"section_type": "header", |
|
269
|
|
|
"title": sect_title_line, |
|
270
|
|
|
"lines": sect_lines, |
|
271
|
|
|
"line_nos": sect_line_nos, |
|
272
|
|
|
} |
|
273
|
|
|
if not ignore_data: |
|
274
|
|
|
try: |
|
275
|
|
|
data = read_data_section_iterative(file_obj, regexp_subs, value_null_subs) |
|
276
|
|
|
except: |
|
277
|
|
|
raise exceptions.LASDataError( |
|
278
|
|
|
traceback.format_exc()[:-1] + |
|
279
|
|
|
' in data section beginning line {}'.format(i + 1)) |
|
280
|
|
|
sections[line] = { |
|
281
|
|
|
"section_type": "data", |
|
282
|
|
|
"start_line": i, |
|
283
|
|
|
"title": line, |
|
284
|
|
|
"array": data, |
|
285
|
|
|
} |
|
286
|
|
|
logger.debug('Data section ["array"].shape = {}'.format(data.shape)) |
|
287
|
|
|
break |
|
288
|
|
|
|
|
289
|
|
|
elif line.startswith('~'): |
|
290
|
|
|
if sect_lines: |
|
291
|
|
|
# We have ended a section and need to start the next |
|
292
|
|
|
sections[sect_title_line] = { |
|
293
|
|
|
"section_type": "header", |
|
294
|
|
|
"title": sect_title_line, |
|
295
|
|
|
"lines": sect_lines, |
|
296
|
|
|
"line_nos": sect_line_nos, |
|
297
|
|
|
} |
|
298
|
|
|
sect_lines = [] |
|
299
|
|
|
sect_line_nos = [] |
|
300
|
|
|
else: |
|
301
|
|
|
# We are entering into a section for the first time |
|
302
|
|
|
pass |
|
303
|
|
|
sect_title_line = line # either way... this is the case. |
|
304
|
|
|
|
|
305
|
|
|
else: |
|
306
|
|
|
# We are in the middle of a section. |
|
307
|
|
|
if not line.startswith("#"): # ignore commented-out lines.. for now. |
|
308
|
|
|
sect_lines.append(line) |
|
309
|
|
|
sect_line_nos.append(i + 1) |
|
310
|
|
|
|
|
311
|
|
|
# Find the number of columns in the data section(s). This is only |
|
312
|
|
|
# useful if WRAP = NO, but we do it for all since we don't yet know |
|
313
|
|
|
# what the wrap setting is. |
|
314
|
|
|
|
|
315
|
|
|
for section in sections.values(): |
|
316
|
|
|
if section["section_type"] == "data": |
|
317
|
|
|
section["ncols"] = None |
|
318
|
|
|
file_obj.seek(0) |
|
319
|
|
|
for i, line in enumerate(file_obj): |
|
320
|
|
|
if i == section["start_line"] + 1: |
|
321
|
|
|
for pattern, sub_str in regexp_subs: |
|
322
|
|
|
line = re.sub(pattern, sub_str, line) |
|
323
|
|
|
section["ncols"] = len(line.split()) |
|
324
|
|
|
break |
|
325
|
|
|
return sections |
|
326
|
|
|
|
|
327
|
|
|
|
|
328
|
|
|
def read_data_section_iterative(file_obj, regexp_subs, value_null_subs): |
|
329
|
|
|
'''Read data section into memory. |
|
330
|
|
|
|
|
331
|
|
|
Arguments: |
|
332
|
|
|
file_obj (open file-like object): should be positioned in line-by-line |
|
333
|
|
|
reading mode, with the last line read being the title of the |
|
334
|
|
|
~ASCII data section. |
|
335
|
|
|
regexp_subs (list): each item should be a tuple of the pattern and |
|
336
|
|
|
substitution string for a call to re.sub() on each line of the |
|
337
|
|
|
data section. See defaults.py READ_SUBS and NULL_SUBS for examples. |
|
338
|
|
|
value_null_subs (list): list of numerical values to be replaced by |
|
339
|
|
|
numpy.nan values. |
|
340
|
|
|
|
|
341
|
|
|
Returns: |
|
342
|
|
|
A 1-D numpy ndarray. |
|
343
|
|
|
|
|
344
|
|
|
''' |
|
345
|
|
|
def items(f): |
|
346
|
|
|
for line in f: |
|
347
|
|
|
for pattern, sub_str in regexp_subs: |
|
348
|
|
|
line = re.sub(pattern, sub_str, line) |
|
349
|
|
|
for item in line.split(): |
|
350
|
|
|
yield item |
|
351
|
|
|
|
|
352
|
|
|
array = np.fromiter(items(file_obj), np.float64, -1) |
|
353
|
|
|
for value in value_null_subs: |
|
354
|
|
|
array[array == value] = np.nan |
|
355
|
|
|
return array |
|
356
|
|
|
|
|
357
|
|
|
|
|
358
|
|
|
def get_substitutions(read_policy, null_policy): |
|
359
|
|
|
'''Parse read and null policy definitions into a list of regexp and value |
|
360
|
|
|
substitutions. |
|
361
|
|
|
|
|
362
|
|
|
Arguments: |
|
363
|
|
|
read_policy (str, list, or substitution): either (1) a string defined in |
|
364
|
|
|
defaults.READ_POLICIES; (2) a list of substitutions as defined by |
|
365
|
|
|
the keys of defaults.READ_SUBS; or (3) a list of actual substitutions |
|
366
|
|
|
similar to the values of defaults.READ_SUBS. You can mix (2) and (3) |
|
367
|
|
|
together if you want. |
|
368
|
|
|
null_policy (str, list, or sub): as for read_policy but for |
|
369
|
|
|
defaults.NULL_POLICIES and defaults.NULL_SUBS |
|
370
|
|
|
|
|
371
|
|
|
Returns: |
|
372
|
|
|
regexp_subs, value_null_subs, version_NULL - two lists and a bool. |
|
373
|
|
|
The first list is pairs of regexp patterns and substrs, and the second |
|
374
|
|
|
list is just a list of floats or integers. The bool is whether or not |
|
375
|
|
|
'NULL' was located as a substitution. |
|
376
|
|
|
|
|
377
|
|
|
''' |
|
378
|
|
|
regexp_subs = [] |
|
379
|
|
|
numerical_subs = [] |
|
380
|
|
|
version_NULL = False |
|
381
|
|
|
|
|
382
|
|
|
for policy_typ, policy, policy_subs, subs in ( |
|
383
|
|
|
('read', read_policy, defaults.READ_POLICIES, defaults.READ_SUBS), |
|
384
|
|
|
('null', null_policy, defaults.NULL_POLICIES, defaults.NULL_SUBS)): |
|
385
|
|
|
try: |
|
386
|
|
|
is_policy = policy in policy_subs |
|
387
|
|
|
except TypeError: |
|
388
|
|
|
is_policy = False |
|
389
|
|
|
if is_policy: |
|
390
|
|
|
logger.debug('using {} policy of "{}"'.format(policy_typ, policy)) |
|
391
|
|
|
all_subs = [] |
|
392
|
|
|
for sub in policy_subs[policy]: |
|
393
|
|
|
logger.debug('adding substitution {}'.format(sub)) |
|
394
|
|
|
if sub in subs: |
|
395
|
|
|
all_subs += subs[sub] |
|
396
|
|
|
if sub == 'NULL': |
|
397
|
|
|
logger.debug('located substition for LAS.version.NULL as True') |
|
398
|
|
|
version_NULL = True |
|
399
|
|
|
else: |
|
400
|
|
|
all_subs = [] |
|
401
|
|
|
for item in policy: |
|
402
|
|
|
if item in subs: |
|
403
|
|
|
all_subs += subs[item] |
|
404
|
|
|
if item == 'NULL': |
|
405
|
|
|
logger.debug('located substition for LAS.version.NULL as True') |
|
406
|
|
|
version_NULL = True |
|
407
|
|
|
else: |
|
408
|
|
|
all_subs.append(item) |
|
409
|
|
|
for item in all_subs: |
|
410
|
|
|
try: |
|
411
|
|
|
iter(item) |
|
412
|
|
|
except TypeError: |
|
413
|
|
|
logger.debug('added numerical substitution: {}'.format(item)) |
|
414
|
|
|
numerical_subs.append(item) |
|
415
|
|
|
else: |
|
416
|
|
|
logger.debug('added regexp substitution: pattern={} substr="{}"'.format(item[0], item[1])) |
|
417
|
|
|
regexp_subs.append(item) |
|
418
|
|
|
numerical_subs = [n for n in numerical_subs if not n is None] |
|
419
|
|
|
|
|
420
|
|
|
return regexp_subs, numerical_subs, version_NULL |
|
421
|
|
|
|
|
422
|
|
|
|
|
423
|
|
|
def parse_header_section(sectdict, version, ignore_header_errors=False, |
|
424
|
|
|
mnemonic_case='preserve'): |
|
425
|
|
|
'''Parse a header section dict into a SectionItems containing HeaderItems. |
|
426
|
|
|
|
|
427
|
|
|
Arguments: |
|
428
|
|
|
sectdict (dict): object returned from |
|
429
|
|
|
:func:`lasio.reader.read_file_contents` |
|
430
|
|
|
version (float): either 1.2 or 2.0 |
|
431
|
|
|
|
|
432
|
|
|
Keyword Arguments: |
|
433
|
|
|
ignore_header_errors (bool): if True, issue HeaderItem parse errors |
|
434
|
|
|
as :func:`logging.warning` calls instead of a |
|
435
|
|
|
:exc:`lasio.exceptions.LASHeaderError` exception. |
|
436
|
|
|
mnemonic_case (str): 'preserve': keep the case of HeaderItem mnemonics |
|
437
|
|
|
'upper': convert all HeaderItem mnemonics to uppercase |
|
438
|
|
|
'lower': convert all HeaderItem mnemonics to lowercase |
|
439
|
|
|
|
|
440
|
|
|
Returns: |
|
441
|
|
|
:class:`lasio.las_items.SectionItems` |
|
442
|
|
|
|
|
443
|
|
|
''' |
|
444
|
|
|
title = sectdict["title"] |
|
445
|
|
|
assert len(sectdict["lines"]) == len(sectdict["line_nos"]) |
|
446
|
|
|
parser = SectionParser(title, version=version) |
|
447
|
|
|
|
|
448
|
|
|
section = SectionItems() |
|
449
|
|
|
assert mnemonic_case in ('upper', 'lower', 'preserve') |
|
450
|
|
|
if not mnemonic_case == 'preserve': |
|
451
|
|
|
section.mnemonic_transforms = True |
|
452
|
|
|
|
|
453
|
|
|
for i in range(len(sectdict["lines"])): |
|
454
|
|
|
line = sectdict["lines"][i] |
|
455
|
|
|
j = sectdict["line_nos"][i] |
|
456
|
|
|
if not line: |
|
457
|
|
|
continue |
|
458
|
|
|
try: |
|
459
|
|
|
values = read_line(line) |
|
460
|
|
|
except: |
|
461
|
|
|
message = 'line {} (section {}): "{}"'.format( |
|
462
|
|
|
# traceback.format_exc().splitlines()[-1].strip('\n'), |
|
463
|
|
|
j, title, line) |
|
464
|
|
|
if ignore_header_errors: |
|
465
|
|
|
logger.warning(message) |
|
466
|
|
|
else: |
|
467
|
|
|
raise exceptions.LASHeaderError(message) |
|
468
|
|
|
else: |
|
469
|
|
|
if mnemonic_case == 'upper': |
|
470
|
|
|
values['name'] = values['name'].upper() |
|
471
|
|
|
elif mnemonic_case == 'lower': |
|
472
|
|
|
values['name'] = values['name'].lower() |
|
473
|
|
|
section.append(parser(**values)) |
|
474
|
|
|
return section |
|
475
|
|
|
|
|
476
|
|
|
|
|
477
|
|
|
|
|
478
|
|
|
class SectionParser(object): |
|
479
|
|
|
|
|
480
|
|
|
'''Parse lines from header sections. |
|
481
|
|
|
|
|
482
|
|
|
Arguments: |
|
483
|
|
|
title (str): title line of section. Used to understand different |
|
484
|
|
|
order formatting across the special sections ~C, ~P, ~W, and ~V, |
|
485
|
|
|
depending on version 1.2 or 2.0. |
|
486
|
|
|
|
|
487
|
|
|
Keyword Arguments: |
|
488
|
|
|
version (float): version to parse according to. Default is 1.2. |
|
489
|
|
|
|
|
490
|
|
|
''' |
|
491
|
|
|
|
|
492
|
|
|
def __init__(self, title, version=1.2): |
|
493
|
|
|
if title.upper().startswith('~C'): |
|
494
|
|
|
self.func = self.curves |
|
495
|
|
|
self.section_name2 = "Curves" |
|
496
|
|
|
elif title.upper().startswith('~P'): |
|
497
|
|
|
self.func = self.params |
|
498
|
|
|
self.section_name2 = "Parameter" |
|
499
|
|
|
elif title.upper().startswith('~W'): |
|
500
|
|
|
self.func = self.metadata |
|
501
|
|
|
self.section_name2 = "Well" |
|
502
|
|
|
elif title.upper().startswith('~V'): |
|
503
|
|
|
self.func = self.metadata |
|
504
|
|
|
self.section_name2 = "Version" |
|
505
|
|
|
|
|
506
|
|
|
|
|
507
|
|
|
self.version = version |
|
508
|
|
|
self.section_name = title |
|
509
|
|
|
|
|
510
|
|
|
defs = defaults.ORDER_DEFINITIONS |
|
511
|
|
|
section_orders = defs[self.version][self.section_name2] |
|
512
|
|
|
self.default_order = section_orders[0]# |
|
513
|
|
|
self.orders = {} |
|
514
|
|
|
for order, mnemonics in section_orders[1:]: |
|
515
|
|
|
for mnemonic in mnemonics: |
|
516
|
|
|
self.orders[mnemonic] = order |
|
517
|
|
|
|
|
518
|
|
|
def __call__(self, **keys): |
|
519
|
|
|
'''Return the correct object for this type of section. |
|
520
|
|
|
|
|
521
|
|
|
Refer to :meth:`lasio.reader.SectionParser.metadata`, |
|
522
|
|
|
:meth:`lasio.reader.SectionParser.params`, and |
|
523
|
|
|
:meth:`lasio.reader.SectionParser.curves` for the methods actually |
|
524
|
|
|
used by this routine. |
|
525
|
|
|
|
|
526
|
|
|
Keyword arguments should be the key:value pairs returned by |
|
527
|
|
|
:func:`lasio.reader.read_header_line`. |
|
528
|
|
|
|
|
529
|
|
|
''' |
|
530
|
|
|
item = self.func(**keys) |
|
531
|
|
|
return item |
|
532
|
|
|
|
|
533
|
|
|
def num(self, x, default=None): |
|
534
|
|
|
'''Attempt to parse a number. |
|
535
|
|
|
|
|
536
|
|
|
Arguments: |
|
537
|
|
|
x (str, int, float): potential number |
|
538
|
|
|
default (int, float, None): fall-back option |
|
539
|
|
|
|
|
540
|
|
|
Returns: |
|
541
|
|
|
int, float, or **default** - from most to least preferred types. |
|
542
|
|
|
|
|
543
|
|
|
''' |
|
544
|
|
|
if default is None: |
|
545
|
|
|
default = x |
|
546
|
|
|
|
|
547
|
|
|
# in case it is a string. |
|
548
|
|
|
try: |
|
549
|
|
|
pattern, sub = defaults.READ_SUBS['comma-decimal-mark'][0] |
|
550
|
|
|
x = re.sub(pattern, sub, x) |
|
551
|
|
|
except: |
|
552
|
|
|
pass |
|
553
|
|
|
|
|
554
|
|
|
try: |
|
555
|
|
|
return np.int(x) |
|
556
|
|
|
except: |
|
557
|
|
|
try: |
|
558
|
|
|
x = np.float(x) |
|
559
|
|
|
except: |
|
560
|
|
|
return default |
|
561
|
|
|
if np.isfinite(x): |
|
562
|
|
|
return x |
|
563
|
|
|
else: |
|
564
|
|
|
return default |
|
565
|
|
|
|
|
566
|
|
|
def metadata(self, **keys): |
|
567
|
|
|
'''Return HeaderItem correctly formatted according to the order |
|
568
|
|
|
prescribed for LAS v 1.2 or 2.0 for the ~W section. |
|
569
|
|
|
|
|
570
|
|
|
Keyword arguments should be the key:value pairs returned by |
|
571
|
|
|
:func:`lasio.reader.read_header_line`. |
|
572
|
|
|
|
|
573
|
|
|
''' |
|
574
|
|
|
key_order = self.orders.get(keys['name'], self.default_order) |
|
575
|
|
|
if key_order == 'value:descr': |
|
576
|
|
|
return HeaderItem( |
|
577
|
|
|
keys['name'], # mnemonic |
|
578
|
|
|
keys['unit'], # unit |
|
579
|
|
|
self.num(keys['value']), # value |
|
580
|
|
|
keys['descr'], # descr |
|
581
|
|
|
) |
|
582
|
|
|
elif key_order == 'descr:value': |
|
583
|
|
|
return HeaderItem( |
|
584
|
|
|
keys['name'], # mnemonic |
|
585
|
|
|
keys['unit'], # unit |
|
586
|
|
|
keys['descr'], # descr |
|
587
|
|
|
self.num(keys['value']), # value |
|
588
|
|
|
) |
|
589
|
|
|
|
|
590
|
|
|
def curves(self, **keys): |
|
591
|
|
|
'''Return CurveItem. |
|
592
|
|
|
|
|
593
|
|
|
Keyword arguments should be the key:value pairs returned by |
|
594
|
|
|
:func:`lasio.reader.read_header_line`. |
|
595
|
|
|
|
|
596
|
|
|
''' |
|
597
|
|
|
item = CurveItem( |
|
598
|
|
|
keys['name'], # mnemonic |
|
599
|
|
|
keys['unit'], # unit |
|
600
|
|
|
keys['value'], # value |
|
601
|
|
|
keys['descr'], # descr |
|
602
|
|
|
) |
|
603
|
|
|
return item |
|
604
|
|
|
|
|
605
|
|
|
def params(self, **keys): |
|
606
|
|
|
'''Return HeaderItem for ~P section (the same between 1.2 and 2.0 specs) |
|
607
|
|
|
|
|
608
|
|
|
Keyword arguments should be the key:value pairs returned by |
|
609
|
|
|
:func:`lasio.reader.read_header_line`. |
|
610
|
|
|
|
|
611
|
|
|
''' |
|
612
|
|
|
return HeaderItem( |
|
613
|
|
|
keys['name'], # mnemonic |
|
614
|
|
|
keys['unit'], # unit |
|
615
|
|
|
self.num(keys['value']), # value |
|
616
|
|
|
keys['descr'], # descr |
|
617
|
|
|
) |
|
618
|
|
|
|
|
619
|
|
|
|
|
620
|
|
|
def read_line(*args, **kwargs): |
|
621
|
|
|
'''Retained for backwards-compatibility. |
|
622
|
|
|
|
|
623
|
|
|
See :func:`lasio.reader.read_header_line`. |
|
624
|
|
|
|
|
625
|
|
|
''' |
|
626
|
|
|
return read_header_line(*args, **kwargs) |
|
627
|
|
|
|
|
628
|
|
|
|
|
629
|
|
|
def read_header_line(line, pattern=None): |
|
630
|
|
|
'''Read a line from a LAS header section. |
|
631
|
|
|
|
|
632
|
|
|
The line is parsed with a regular expression -- see LAS file specs for |
|
633
|
|
|
more details, but it should basically be in the format:: |
|
634
|
|
|
|
|
635
|
|
|
name.unit value : descr |
|
636
|
|
|
|
|
637
|
|
|
Arguments: |
|
638
|
|
|
line (str): line from a LAS header section |
|
639
|
|
|
|
|
640
|
|
|
Returns: |
|
641
|
|
|
A dictionary with keys 'name', 'unit', 'value', and 'descr', each |
|
642
|
|
|
containing a string as value. |
|
643
|
|
|
|
|
644
|
|
|
''' |
|
645
|
|
|
d = {'name': '', 'unit': '', 'value': '', 'descr': ''} |
|
646
|
|
|
if pattern is None: |
|
647
|
|
|
if not ':' in line: |
|
648
|
|
|
pattern = (r'\.?(?P<name>[^.]*)\.' + |
|
649
|
|
|
r'(?P<unit>[^\s:]*)' + |
|
650
|
|
|
r'(?P<value>[^:]*)') |
|
651
|
|
|
else: |
|
652
|
|
|
pattern = (r'\.?(?P<name>[^.]*)\.' + |
|
653
|
|
|
r'(?P<unit>[^\s:]*)' + |
|
654
|
|
|
r'(?P<value>[^:]*):' + |
|
655
|
|
|
r'(?P<descr>.*)') |
|
656
|
|
|
m = re.match(pattern, line) |
|
657
|
|
|
mdict = m.groupdict() |
|
658
|
|
|
for key, value in mdict.items(): |
|
659
|
|
|
d[key] = value.strip() |
|
660
|
|
|
if key == 'unit': |
|
661
|
|
|
if d[key].endswith('.'): |
|
662
|
|
|
d[key] = d[key].strip('.') # see issue #36 |
|
663
|
|
|
return d |
|
664
|
|
|
|