1
|
|
|
import codecs |
2
|
|
|
import logging |
3
|
|
|
import os |
4
|
|
|
import re |
5
|
|
|
import textwrap |
6
|
|
|
import traceback |
7
|
|
|
|
8
|
|
|
import numpy as np |
9
|
|
|
|
10
|
|
|
from . import defaults |
11
|
|
|
|
12
|
|
|
# Convoluted import for StringIO in order to support: |
13
|
|
|
# |
14
|
|
|
# - Python 3 - io.StringIO |
15
|
|
|
# - Python 2 (optimized) - cStringIO.StringIO |
16
|
|
|
# - Python 2 (all) - StringIO.StringIO |
17
|
|
|
|
18
|
|
|
try: |
19
|
|
|
import cStringIO as StringIO |
20
|
|
|
except ImportError: |
21
|
|
|
try: # cStringIO not available on this system |
22
|
|
|
import StringIO |
23
|
|
|
except ImportError: # Python 3 |
24
|
|
|
from io import StringIO |
25
|
|
|
else: |
26
|
|
|
from StringIO import StringIO |
27
|
|
|
else: |
28
|
|
|
from StringIO import StringIO |
29
|
|
|
|
30
|
|
|
from . import defaults |
31
|
|
|
from . import exceptions |
32
|
|
|
from .las_items import HeaderItem, CurveItem, SectionItems, OrderedDict |
33
|
|
|
|
34
|
|
|
|
35
|
|
|
logger = logging.getLogger(__name__) |
36
|
|
|
|
37
|
|
|
URL_REGEXP = re.compile( |
38
|
|
|
r'^(?:http|ftp)s?://' # http:// or https:// |
39
|
|
|
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}' |
40
|
|
|
r'\.?|[A-Z0-9-]{2,}\.?)|' # (cont.) domain... |
41
|
|
|
r'localhost|' # localhost... |
42
|
|
|
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip |
43
|
|
|
r'(?::\d+)?' # optional port |
44
|
|
|
r'(?:/?|[/?]\S+)$', re.IGNORECASE) |
45
|
|
|
|
46
|
|
|
|
47
|
|
|
def open_file(file_ref, **encoding_kwargs): |
48
|
|
|
'''Open a file if necessary. |
49
|
|
|
|
50
|
|
|
If ``autodetect_encoding=True`` then either ``cchardet`` or ``chardet`` |
51
|
|
|
needs to be installed, or else an ``ImportError`` will be raised. |
52
|
|
|
|
53
|
|
|
Arguments: |
54
|
|
|
file_ref (file-like object, str): either a filename, an open file |
55
|
|
|
object, or a string containing the contents of a file. |
56
|
|
|
|
57
|
|
|
See :func:`lasio.reader.open_with_codecs` for keyword arguments that can be |
58
|
|
|
used here. |
59
|
|
|
|
60
|
|
|
Returns: |
61
|
|
|
tuple of an open file-like object, and the encoding that |
62
|
|
|
was used to decode it (if it were read from disk). |
63
|
|
|
|
64
|
|
|
''' |
65
|
|
|
encoding = None |
66
|
|
|
if isinstance(file_ref, str): # file_ref != file-like object, so what is it? |
67
|
|
|
lines = file_ref.splitlines() |
68
|
|
|
first_line = lines[0] |
69
|
|
|
if URL_REGEXP.match(first_line): # it's a URL |
70
|
|
|
logger.info('Loading URL {}'.format(first_line)) |
71
|
|
|
try: |
72
|
|
|
import urllib2 |
73
|
|
|
response = urllib2.urlopen(first_line) |
74
|
|
|
encoding = response.headers.getparam('charset') |
75
|
|
|
file_ref = StringIO(response.read()) |
76
|
|
|
logger.debug('Retrieved data had encoding {}'.format(encoding)) |
77
|
|
|
except ImportError: |
78
|
|
|
import urllib.request |
79
|
|
|
response = urllib.request.urlopen(file_ref) |
80
|
|
|
encoding = response.headers.get_content_charset() |
81
|
|
|
file_ref = StringIO(response.read().decode(encoding)) |
82
|
|
|
logger.debug('Retrieved data decoded via {}'.format(encoding)) |
83
|
|
|
elif len(lines) > 1: # it's LAS data as a string. |
84
|
|
|
file_ref = StringIO(file_ref) |
85
|
|
|
else: # it must be a filename |
86
|
|
|
file_ref, encoding = open_with_codecs(first_line, **encoding_kwargs) |
87
|
|
|
return file_ref, encoding |
88
|
|
|
|
89
|
|
|
|
90
|
|
|
def open_with_codecs(filename, encoding=None, encoding_errors='replace', |
91
|
|
|
autodetect_encoding=True, autodetect_encoding_chars=4000): |
92
|
|
|
''' |
93
|
|
|
Read Unicode data from file. |
94
|
|
|
|
95
|
|
|
Arguments: |
96
|
|
|
filename (str): path to file |
97
|
|
|
|
98
|
|
|
Keyword Arguments: |
99
|
|
|
encoding (str): character encoding to open file_ref with, using |
100
|
|
|
:func:`codecs.open`. |
101
|
|
|
encoding_errors (str): 'strict', 'replace' (default), 'ignore' - how to |
102
|
|
|
handle errors with encodings (see |
103
|
|
|
`this section |
104
|
|
|
<https://docs.python.org/3/library/codecs.html#codec-base-classes>`__ |
105
|
|
|
of the standard library's :mod:`codecs` module for more information) |
106
|
|
|
autodetect_encoding (str or bool): default True to use |
107
|
|
|
`chardet <https://github.com/chardet/chardet>`__/`cchardet |
108
|
|
|
<https://github.com/PyYoshi/cChardet>`__ to detect encoding. |
109
|
|
|
Note if set to False several common encodings will be tried but |
110
|
|
|
chardet won't be used. |
111
|
|
|
autodetect_encoding_chars (int/None): number of chars to read from LAS |
112
|
|
|
file for auto-detection of encoding. |
113
|
|
|
|
114
|
|
|
Returns: |
115
|
|
|
a unicode or string object |
116
|
|
|
|
117
|
|
|
This function is called by :func:`lasio.reader.open_file`. |
118
|
|
|
|
119
|
|
|
''' |
120
|
|
|
if autodetect_encoding_chars: |
121
|
|
|
nbytes = int(autodetect_encoding_chars) |
122
|
|
|
else: |
123
|
|
|
nbytes = None |
124
|
|
|
|
125
|
|
|
# Forget [c]chardet - if we can locate the BOM we just assume that's correct. |
126
|
|
|
nbytes_test = min(32, os.path.getsize(filename)) |
127
|
|
|
with open(filename, mode='rb') as test: |
128
|
|
|
raw = test.read(nbytes_test) |
129
|
|
|
if raw.startswith(codecs.BOM_UTF8): |
130
|
|
|
encoding = 'utf-8-sig' |
131
|
|
|
autodetect_encoding = False |
132
|
|
|
|
133
|
|
|
# If BOM wasn't found... |
134
|
|
|
if (autodetect_encoding) and (not encoding): |
135
|
|
|
with open(filename, mode='rb') as test: |
136
|
|
|
if nbytes is None: |
137
|
|
|
raw = test.read() |
138
|
|
|
else: |
139
|
|
|
raw = test.read(nbytes) |
140
|
|
|
encoding = get_encoding(autodetect_encoding, raw) |
141
|
|
|
autodetect_encoding = False |
142
|
|
|
|
143
|
|
|
# Or if no BOM found & chardet not installed |
144
|
|
|
if (not autodetect_encoding) and (not encoding): |
145
|
|
|
encoding = adhoc_test_encoding(filename) |
146
|
|
|
if encoding: |
147
|
|
|
logger.info('{} was found by ad hoc to work but note it might not' |
148
|
|
|
' be the correct encoding'.format(encoding)) |
149
|
|
|
|
150
|
|
|
# Now open and return the file-like object |
151
|
|
|
logger.info('Opening {} as {} and treating errors with "{}"'.format( |
152
|
|
|
filename, encoding, encoding_errors)) |
153
|
|
|
file_obj = codecs.open(filename, mode='r', encoding=encoding, |
154
|
|
|
errors=encoding_errors) |
155
|
|
|
return file_obj, encoding |
156
|
|
|
|
157
|
|
|
|
158
|
|
|
def adhoc_test_encoding(filename): |
159
|
|
|
test_encodings = ['ascii', 'windows-1252', 'latin-1'] |
160
|
|
|
for i in test_encodings: |
161
|
|
|
encoding = i |
162
|
|
|
with codecs.open(filename, mode='r', encoding=encoding) as f: |
163
|
|
|
try: |
164
|
|
|
f.readline() |
165
|
|
|
break |
166
|
|
|
except UnicodeDecodeError: |
167
|
|
|
logger.debug('{} tested, raised UnicodeDecodeError'.format(i)) |
168
|
|
|
pass |
169
|
|
|
encoding = None |
170
|
|
|
return encoding |
171
|
|
|
|
172
|
|
|
|
173
|
|
|
def get_encoding(auto, raw): |
174
|
|
|
''' |
175
|
|
|
Automatically detect character encoding. |
176
|
|
|
|
177
|
|
|
Arguments: |
178
|
|
|
auto (str): auto-detection of character encoding - can be either |
179
|
|
|
'chardet', 'cchardet', False, or True (the latter will pick the |
180
|
|
|
fastest available option) |
181
|
|
|
raw (bytes): array of bytes to detect from |
182
|
|
|
|
183
|
|
|
Returns: |
184
|
|
|
A string specifying the character encoding. |
185
|
|
|
|
186
|
|
|
''' |
187
|
|
|
if auto is True: |
188
|
|
|
try: |
189
|
|
|
import cchardet as chardet |
190
|
|
|
except ImportError: |
191
|
|
|
try: |
192
|
|
|
import chardet |
193
|
|
|
except ImportError: |
194
|
|
|
logger.debug('chardet or cchardet is recommended for automatic' |
195
|
|
|
' detection of character encodings. Instead trying some' |
196
|
|
|
' common encodings.') |
197
|
|
|
return None |
198
|
|
|
else: |
199
|
|
|
logger.debug('get_encoding Using chardet') |
200
|
|
|
method = 'chardet' |
201
|
|
|
else: |
202
|
|
|
logger.debug('get_encoding Using cchardet') |
203
|
|
|
method = 'cchardet' |
204
|
|
|
elif auto.lower() == 'chardet': |
205
|
|
|
import chardet |
206
|
|
|
logger.debug('get_encoding Using chardet') |
207
|
|
|
method = 'chardet' |
208
|
|
|
elif auto.lower() == 'cchardet': |
209
|
|
|
import cchardet as chardet |
210
|
|
|
logger.debug('get_encoding Using cchardet') |
211
|
|
|
method = 'cchardet' |
212
|
|
|
result = chardet.detect(raw) |
213
|
|
|
logger.debug('{} method detected encoding of {} at confidence {}'.format( |
214
|
|
|
method, result['encoding'], result['confidence'])) |
215
|
|
|
return result['encoding'] |
216
|
|
|
|
217
|
|
|
|
218
|
|
|
def read_file_contents(file_obj, regexp_subs, value_null_subs, |
219
|
|
|
ignore_data=False): |
220
|
|
|
'''Read file contents into memory. |
221
|
|
|
|
222
|
|
|
Arguments: |
223
|
|
|
file_obj (open file-like object) |
224
|
|
|
|
225
|
|
|
Keyword Arguments: |
226
|
|
|
null_subs (bool): True will substitute ``numpy.nan`` for invalid values |
227
|
|
|
ignore_data (bool): if True, do not read in the numerical data in the |
228
|
|
|
~ASCII section |
229
|
|
|
|
230
|
|
|
Returns: |
231
|
|
|
OrderedDict |
232
|
|
|
|
233
|
|
|
I think of the returned dictionary as a "raw section". The keys are |
234
|
|
|
the first line of the LAS section, including the tilde. Each value is |
235
|
|
|
a dict with either:: |
236
|
|
|
|
237
|
|
|
{"section_type": "header", |
238
|
|
|
"title": str, # title of section (including the ~) |
239
|
|
|
"lines": [str, ], # a list of the lines from the lAS file |
240
|
|
|
"line_nos": [int, ] # line nos from the original file |
241
|
|
|
} |
242
|
|
|
|
243
|
|
|
or:: |
244
|
|
|
|
245
|
|
|
{"section_type": "data", |
246
|
|
|
"title": str, # title of section (including the ~) |
247
|
|
|
"start_line": int, # location of data section (the title line) |
248
|
|
|
"ncols": int, # no. of columns on first line of data, |
249
|
|
|
"array": ndarray # 1-D numpy.ndarray, |
250
|
|
|
} |
251
|
|
|
|
252
|
|
|
''' |
253
|
|
|
sections = OrderedDict() |
254
|
|
|
sect_lines = [] |
255
|
|
|
sect_line_nos = [] |
256
|
|
|
sect_title_line = None |
257
|
|
|
section_exists = False |
258
|
|
|
|
259
|
|
|
for i, line in enumerate(file_obj): |
260
|
|
|
line = line.strip() |
261
|
|
|
if not line: |
262
|
|
|
continue |
263
|
|
|
if line.upper().startswith('~A'): |
264
|
|
|
# HARD CODED FOR VERSION 1.2 and 2.0; needs review for 3.0 |
265
|
|
|
# We have finished looking at the metadata and need |
266
|
|
|
# to start reading numerical data. |
267
|
|
|
if not sect_title_line is None: |
268
|
|
|
sections[sect_title_line] = { |
269
|
|
|
"section_type": "header", |
270
|
|
|
"title": sect_title_line, |
271
|
|
|
"lines": sect_lines, |
272
|
|
|
"line_nos": sect_line_nos, |
273
|
|
|
} |
274
|
|
|
if not ignore_data: |
275
|
|
|
try: |
276
|
|
|
data = read_data_section_iterative(file_obj, regexp_subs, value_null_subs) |
277
|
|
|
except: |
278
|
|
|
raise exceptions.LASDataError( |
279
|
|
|
traceback.format_exc()[:-1] + |
280
|
|
|
' in data section beginning line {}'.format(i + 1)) |
281
|
|
|
sections[line] = { |
282
|
|
|
"section_type": "data", |
283
|
|
|
"start_line": i, |
284
|
|
|
"title": line, |
285
|
|
|
"array": data, |
286
|
|
|
} |
287
|
|
|
logger.debug('Data section ["array"].shape = {}'.format(data.shape)) |
288
|
|
|
break |
289
|
|
|
|
290
|
|
|
elif line.startswith('~'): |
291
|
|
|
if section_exists: |
292
|
|
|
# We have ended a section and need to start the next |
293
|
|
|
sections[sect_title_line] = { |
294
|
|
|
"section_type": "header", |
295
|
|
|
"title": sect_title_line, |
296
|
|
|
"lines": sect_lines, |
297
|
|
|
"line_nos": sect_line_nos, |
298
|
|
|
} |
299
|
|
|
sect_lines = [] |
300
|
|
|
sect_line_nos = [] |
301
|
|
|
else: |
302
|
|
|
# We are entering into a section for the first time |
303
|
|
|
section_exists = True |
304
|
|
|
pass |
305
|
|
|
sect_title_line = line # either way... this is the case. |
306
|
|
|
|
307
|
|
|
else: |
308
|
|
|
# We are in the middle of a section. |
309
|
|
|
if not line.startswith("#"): # ignore commented-out lines.. for now. |
310
|
|
|
sect_lines.append(line) |
311
|
|
|
sect_line_nos.append(i + 1) |
312
|
|
|
|
313
|
|
|
# Find the number of columns in the data section(s). This is only |
314
|
|
|
# useful if WRAP = NO, but we do it for all since we don't yet know |
315
|
|
|
# what the wrap setting is. |
316
|
|
|
|
317
|
|
|
for section in sections.values(): |
318
|
|
|
if section["section_type"] == "data": |
319
|
|
|
section["ncols"] = None |
320
|
|
|
file_obj.seek(0) |
321
|
|
|
for i, line in enumerate(file_obj): |
322
|
|
|
if i == section["start_line"] + 1: |
323
|
|
|
for pattern, sub_str in regexp_subs: |
324
|
|
|
line = re.sub(pattern, sub_str, line) |
325
|
|
|
section["ncols"] = len(line.split()) |
326
|
|
|
break |
327
|
|
|
return sections |
328
|
|
|
|
329
|
|
|
|
330
|
|
|
def read_data_section_iterative(file_obj, regexp_subs, value_null_subs): |
331
|
|
|
'''Read data section into memory. |
332
|
|
|
|
333
|
|
|
Arguments: |
334
|
|
|
file_obj (open file-like object): should be positioned in line-by-line |
335
|
|
|
reading mode, with the last line read being the title of the |
336
|
|
|
~ASCII data section. |
337
|
|
|
regexp_subs (list): each item should be a tuple of the pattern and |
338
|
|
|
substitution string for a call to re.sub() on each line of the |
339
|
|
|
data section. See defaults.py READ_SUBS and NULL_SUBS for examples. |
340
|
|
|
value_null_subs (list): list of numerical values to be replaced by |
341
|
|
|
numpy.nan values. |
342
|
|
|
|
343
|
|
|
Returns: |
344
|
|
|
A 1-D numpy ndarray. |
345
|
|
|
|
346
|
|
|
''' |
347
|
|
|
def items(f): |
348
|
|
|
for line in f: |
349
|
|
|
for pattern, sub_str in regexp_subs: |
350
|
|
|
line = re.sub(pattern, sub_str, line) |
351
|
|
|
for item in line.split(): |
352
|
|
|
try: |
353
|
|
|
yield np.float64(item) |
354
|
|
|
except ValueError: |
355
|
|
|
yield item |
356
|
|
|
|
357
|
|
|
array = np.array([i for i in items(file_obj)]) |
358
|
|
|
for value in value_null_subs: |
359
|
|
|
array[array == value] = np.nan |
360
|
|
|
return array |
361
|
|
|
|
362
|
|
|
|
363
|
|
|
def get_substitutions(read_policy, null_policy): |
364
|
|
|
'''Parse read and null policy definitions into a list of regexp and value |
365
|
|
|
substitutions. |
366
|
|
|
|
367
|
|
|
Arguments: |
368
|
|
|
read_policy (str, list, or substitution): either (1) a string defined in |
369
|
|
|
defaults.READ_POLICIES; (2) a list of substitutions as defined by |
370
|
|
|
the keys of defaults.READ_SUBS; or (3) a list of actual substitutions |
371
|
|
|
similar to the values of defaults.READ_SUBS. You can mix (2) and (3) |
372
|
|
|
together if you want. |
373
|
|
|
null_policy (str, list, or sub): as for read_policy but for |
374
|
|
|
defaults.NULL_POLICIES and defaults.NULL_SUBS |
375
|
|
|
|
376
|
|
|
Returns: |
377
|
|
|
regexp_subs, value_null_subs, version_NULL - two lists and a bool. |
378
|
|
|
The first list is pairs of regexp patterns and substrs, and the second |
379
|
|
|
list is just a list of floats or integers. The bool is whether or not |
380
|
|
|
'NULL' was located as a substitution. |
381
|
|
|
|
382
|
|
|
''' |
383
|
|
|
regexp_subs = [] |
384
|
|
|
numerical_subs = [] |
385
|
|
|
version_NULL = False |
386
|
|
|
|
387
|
|
|
for policy_typ, policy, policy_subs, subs in ( |
388
|
|
|
('read', read_policy, defaults.READ_POLICIES, defaults.READ_SUBS), |
389
|
|
|
('null', null_policy, defaults.NULL_POLICIES, defaults.NULL_SUBS)): |
390
|
|
|
try: |
391
|
|
|
is_policy = policy in policy_subs |
392
|
|
|
except TypeError: |
393
|
|
|
is_policy = False |
394
|
|
|
if is_policy: |
395
|
|
|
logger.debug('using {} policy of "{}"'.format(policy_typ, policy)) |
396
|
|
|
all_subs = [] |
397
|
|
|
for sub in policy_subs[policy]: |
398
|
|
|
logger.debug('adding substitution {}'.format(sub)) |
399
|
|
|
if sub in subs: |
400
|
|
|
all_subs += subs[sub] |
401
|
|
|
if sub == 'NULL': |
402
|
|
|
logger.debug('located substition for LAS.version.NULL as True') |
403
|
|
|
version_NULL = True |
404
|
|
|
else: |
405
|
|
|
all_subs = [] |
406
|
|
|
for item in policy: |
407
|
|
|
if item in subs: |
408
|
|
|
all_subs += subs[item] |
409
|
|
|
if item == 'NULL': |
410
|
|
|
logger.debug('located substition for LAS.version.NULL as True') |
411
|
|
|
version_NULL = True |
412
|
|
|
else: |
413
|
|
|
all_subs.append(item) |
414
|
|
|
for item in all_subs: |
415
|
|
|
try: |
416
|
|
|
iter(item) |
417
|
|
|
except TypeError: |
418
|
|
|
logger.debug('added numerical substitution: {}'.format(item)) |
419
|
|
|
numerical_subs.append(item) |
420
|
|
|
else: |
421
|
|
|
logger.debug('added regexp substitution: pattern={} substr="{}"'.format(item[0], item[1])) |
422
|
|
|
regexp_subs.append(item) |
423
|
|
|
numerical_subs = [n for n in numerical_subs if not n is None] |
424
|
|
|
|
425
|
|
|
return regexp_subs, numerical_subs, version_NULL |
426
|
|
|
|
427
|
|
|
|
428
|
|
|
def parse_header_section(sectdict, version, ignore_header_errors=False, |
429
|
|
|
mnemonic_case='preserve'): |
430
|
|
|
'''Parse a header section dict into a SectionItems containing HeaderItems. |
431
|
|
|
|
432
|
|
|
Arguments: |
433
|
|
|
sectdict (dict): object returned from |
434
|
|
|
:func:`lasio.reader.read_file_contents` |
435
|
|
|
version (float): either 1.2 or 2.0 |
436
|
|
|
|
437
|
|
|
Keyword Arguments: |
438
|
|
|
ignore_header_errors (bool): if True, issue HeaderItem parse errors |
439
|
|
|
as :func:`logging.warning` calls instead of a |
440
|
|
|
:exc:`lasio.exceptions.LASHeaderError` exception. |
441
|
|
|
mnemonic_case (str): 'preserve': keep the case of HeaderItem mnemonics |
442
|
|
|
'upper': convert all HeaderItem mnemonics to uppercase |
443
|
|
|
'lower': convert all HeaderItem mnemonics to lowercase |
444
|
|
|
|
445
|
|
|
Returns: |
446
|
|
|
:class:`lasio.las_items.SectionItems` |
447
|
|
|
|
448
|
|
|
''' |
449
|
|
|
title = sectdict["title"] |
450
|
|
|
assert len(sectdict["lines"]) == len(sectdict["line_nos"]) |
451
|
|
|
parser = SectionParser(title, version=version) |
452
|
|
|
|
453
|
|
|
section = SectionItems() |
454
|
|
|
assert mnemonic_case in ('upper', 'lower', 'preserve') |
455
|
|
|
if not mnemonic_case == 'preserve': |
456
|
|
|
section.mnemonic_transforms = True |
457
|
|
|
|
458
|
|
|
for i in range(len(sectdict["lines"])): |
459
|
|
|
line = sectdict["lines"][i] |
460
|
|
|
j = sectdict["line_nos"][i] |
461
|
|
|
if not line: |
462
|
|
|
continue |
463
|
|
|
try: |
464
|
|
|
values = read_line(line) |
465
|
|
|
except: |
466
|
|
|
message = 'line {} (section {}): "{}"'.format( |
467
|
|
|
# traceback.format_exc().splitlines()[-1].strip('\n'), |
468
|
|
|
j, title, line) |
469
|
|
|
if ignore_header_errors: |
470
|
|
|
logger.warning(message) |
471
|
|
|
else: |
472
|
|
|
raise exceptions.LASHeaderError(message) |
473
|
|
|
else: |
474
|
|
|
if mnemonic_case == 'upper': |
475
|
|
|
values['name'] = values['name'].upper() |
476
|
|
|
elif mnemonic_case == 'lower': |
477
|
|
|
values['name'] = values['name'].lower() |
478
|
|
|
section.append(parser(**values)) |
479
|
|
|
return section |
480
|
|
|
|
481
|
|
|
|
482
|
|
|
|
483
|
|
|
class SectionParser(object): |
484
|
|
|
|
485
|
|
|
'''Parse lines from header sections. |
486
|
|
|
|
487
|
|
|
Arguments: |
488
|
|
|
title (str): title line of section. Used to understand different |
489
|
|
|
order formatting across the special sections ~C, ~P, ~W, and ~V, |
490
|
|
|
depending on version 1.2 or 2.0. |
491
|
|
|
|
492
|
|
|
Keyword Arguments: |
493
|
|
|
version (float): version to parse according to. Default is 1.2. |
494
|
|
|
|
495
|
|
|
''' |
496
|
|
|
|
497
|
|
|
def __init__(self, title, version=1.2): |
498
|
|
|
if title.upper().startswith('~C'): |
499
|
|
|
self.func = self.curves |
500
|
|
|
self.section_name2 = "Curves" |
501
|
|
|
elif title.upper().startswith('~P'): |
502
|
|
|
self.func = self.params |
503
|
|
|
self.section_name2 = "Parameter" |
504
|
|
|
elif title.upper().startswith('~W'): |
505
|
|
|
self.func = self.metadata |
506
|
|
|
self.section_name2 = "Well" |
507
|
|
|
elif title.upper().startswith('~V'): |
508
|
|
|
self.func = self.metadata |
509
|
|
|
self.section_name2 = "Version" |
510
|
|
|
|
511
|
|
|
|
512
|
|
|
self.version = version |
513
|
|
|
self.section_name = title |
514
|
|
|
|
515
|
|
|
defs = defaults.ORDER_DEFINITIONS |
516
|
|
|
section_orders = defs[self.version][self.section_name2] |
517
|
|
|
self.default_order = section_orders[0]# |
518
|
|
|
self.orders = {} |
519
|
|
|
for order, mnemonics in section_orders[1:]: |
520
|
|
|
for mnemonic in mnemonics: |
521
|
|
|
self.orders[mnemonic] = order |
522
|
|
|
|
523
|
|
|
def __call__(self, **keys): |
524
|
|
|
'''Return the correct object for this type of section. |
525
|
|
|
|
526
|
|
|
Refer to :meth:`lasio.reader.SectionParser.metadata`, |
527
|
|
|
:meth:`lasio.reader.SectionParser.params`, and |
528
|
|
|
:meth:`lasio.reader.SectionParser.curves` for the methods actually |
529
|
|
|
used by this routine. |
530
|
|
|
|
531
|
|
|
Keyword arguments should be the key:value pairs returned by |
532
|
|
|
:func:`lasio.reader.read_header_line`. |
533
|
|
|
|
534
|
|
|
''' |
535
|
|
|
item = self.func(**keys) |
536
|
|
|
return item |
537
|
|
|
|
538
|
|
|
def num(self, x, default=None): |
539
|
|
|
'''Attempt to parse a number. |
540
|
|
|
|
541
|
|
|
Arguments: |
542
|
|
|
x (str, int, float): potential number |
543
|
|
|
default (int, float, None): fall-back option |
544
|
|
|
|
545
|
|
|
Returns: |
546
|
|
|
int, float, or **default** - from most to least preferred types. |
547
|
|
|
|
548
|
|
|
''' |
549
|
|
|
if default is None: |
550
|
|
|
default = x |
551
|
|
|
|
552
|
|
|
# in case it is a string. |
553
|
|
|
try: |
554
|
|
|
pattern, sub = defaults.READ_SUBS['comma-decimal-mark'][0] |
555
|
|
|
x = re.sub(pattern, sub, x) |
556
|
|
|
except: |
557
|
|
|
pass |
558
|
|
|
|
559
|
|
|
try: |
560
|
|
|
return np.int(x) |
561
|
|
|
except: |
562
|
|
|
try: |
563
|
|
|
x = np.float(x) |
564
|
|
|
except: |
565
|
|
|
return default |
566
|
|
|
if np.isfinite(x): |
567
|
|
|
return x |
568
|
|
|
else: |
569
|
|
|
return default |
570
|
|
|
|
571
|
|
|
def metadata(self, **keys): |
572
|
|
|
'''Return HeaderItem correctly formatted according to the order |
573
|
|
|
prescribed for LAS v 1.2 or 2.0 for the ~W section. |
574
|
|
|
|
575
|
|
|
Keyword arguments should be the key:value pairs returned by |
576
|
|
|
:func:`lasio.reader.read_header_line`. |
577
|
|
|
|
578
|
|
|
''' |
579
|
|
|
key_order = self.orders.get(keys['name'], self.default_order) |
580
|
|
|
if key_order == 'value:descr': |
581
|
|
|
return HeaderItem( |
582
|
|
|
keys['name'], # mnemonic |
583
|
|
|
keys['unit'], # unit |
584
|
|
|
self.num(keys['value']), # value |
585
|
|
|
keys['descr'], # descr |
586
|
|
|
) |
587
|
|
|
elif key_order == 'descr:value': |
588
|
|
|
return HeaderItem( |
589
|
|
|
keys['name'], # mnemonic |
590
|
|
|
keys['unit'], # unit |
591
|
|
|
keys['descr'], # descr |
592
|
|
|
self.num(keys['value']), # value |
593
|
|
|
) |
594
|
|
|
|
595
|
|
|
def curves(self, **keys): |
596
|
|
|
'''Return CurveItem. |
597
|
|
|
|
598
|
|
|
Keyword arguments should be the key:value pairs returned by |
599
|
|
|
:func:`lasio.reader.read_header_line`. |
600
|
|
|
|
601
|
|
|
''' |
602
|
|
|
item = CurveItem( |
603
|
|
|
keys['name'], # mnemonic |
604
|
|
|
keys['unit'], # unit |
605
|
|
|
keys['value'], # value |
606
|
|
|
keys['descr'], # descr |
607
|
|
|
) |
608
|
|
|
return item |
609
|
|
|
|
610
|
|
|
def params(self, **keys): |
611
|
|
|
'''Return HeaderItem for ~P section (the same between 1.2 and 2.0 specs) |
612
|
|
|
|
613
|
|
|
Keyword arguments should be the key:value pairs returned by |
614
|
|
|
:func:`lasio.reader.read_header_line`. |
615
|
|
|
|
616
|
|
|
''' |
617
|
|
|
return HeaderItem( |
618
|
|
|
keys['name'], # mnemonic |
619
|
|
|
keys['unit'], # unit |
620
|
|
|
self.num(keys['value']), # value |
621
|
|
|
keys['descr'], # descr |
622
|
|
|
) |
623
|
|
|
|
624
|
|
|
|
625
|
|
|
def read_line(*args, **kwargs): |
626
|
|
|
'''Retained for backwards-compatibility. |
627
|
|
|
|
628
|
|
|
See :func:`lasio.reader.read_header_line`. |
629
|
|
|
|
630
|
|
|
''' |
631
|
|
|
return read_header_line(*args, **kwargs) |
632
|
|
|
|
633
|
|
|
|
634
|
|
|
def read_header_line(line, pattern=None): |
635
|
|
|
'''Read a line from a LAS header section. |
636
|
|
|
|
637
|
|
|
The line is parsed with a regular expression -- see LAS file specs for |
638
|
|
|
more details, but it should basically be in the format:: |
639
|
|
|
|
640
|
|
|
name.unit value : descr |
641
|
|
|
|
642
|
|
|
Arguments: |
643
|
|
|
line (str): line from a LAS header section |
644
|
|
|
|
645
|
|
|
Returns: |
646
|
|
|
A dictionary with keys 'name', 'unit', 'value', and 'descr', each |
647
|
|
|
containing a string as value. |
648
|
|
|
|
649
|
|
|
''' |
650
|
|
|
d = {'name': '', 'unit': '', 'value': '', 'descr': ''} |
651
|
|
|
if pattern is None: |
652
|
|
|
if not ':' in line: |
653
|
|
|
pattern = (r'\.?(?P<name>[^.]*)\.' + |
654
|
|
|
r'(?P<unit>[^\s:]*)' + |
655
|
|
|
r'(?P<value>[^:]*)') |
656
|
|
|
else: |
657
|
|
|
pattern = (r'\.?(?P<name>[^.]*)\.' + |
658
|
|
|
r'(?P<unit>[^\s:]*)' + |
659
|
|
|
r'(?P<value>[^:]*):' + |
660
|
|
|
r'(?P<descr>.*)') |
661
|
|
|
m = re.match(pattern, line) |
662
|
|
|
mdict = m.groupdict() |
663
|
|
|
for key, value in mdict.items(): |
664
|
|
|
d[key] = value.strip() |
665
|
|
|
if key == 'unit': |
666
|
|
|
if d[key].endswith('.'): |
667
|
|
|
d[key] = d[key].strip('.') # see issue #36 |
668
|
|
|
return d |
669
|
|
|
|