Passed
Push — master ( 897815...7a88ef )
by Stefan
04:11
created

scia_densities.read_from_textfile()   B

Complexity

Conditions 7

Size

Total Lines 70
Code Lines 45

Duplication

Lines 70
Ratio 100 %

Code Coverage

Tests 41
CRAP Score 7.0155

Importance

Changes 0
Metric Value
cc 7
eloc 45
nop 2
dl 70
loc 70
ccs 41
cts 44
cp 0.9318
crap 7.0155
rs 7.4
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
# -*- coding: utf-8 -*-
2
# vim:fileencoding=utf-8
3
#
4
# Copyright (c) 2015-2018 Stefan Bender
5
#
6
# This file is part of sciapy.
7
# sciapy is free software: you can redistribute it or modify it
8
# under the terms of the GNU General Public License as published by
9
# the Free Software Foundation, version 2.
10
# See accompanying LICENSE file or http://www.gnu.org/licenses/gpl-2.0.html.
11 1
"""SCIAMACHY level 2 number density retrieval results interface
12
13
Interface classes for the level 2 retrieval results from text (ascii)
14
files and netcdf files for further processing.
15
"""
16
17 1
from __future__ import absolute_import, division, print_function
18
19 1
import os
20 1
import re
21 1
import sys
22 1
import datetime as dt
23
24 1
import numpy as np
25 1
try:
26 1
	from netCDF4 import Dataset as netcdf_file
27 1
	fmtargs = {"format": "NETCDF4"}
28
except ImportError:
29
	try:
30
		from scipy.io.netcdf import netcdf_file
31
		fmtargs = {"version": 1}
32
	except ImportError:
33
		from pupynere import netcdf_file
34
		fmtargs = {"version": 1}
35
36 1
__all__ = ["scia_densities", "_UTC"]
37
38 1
try:
39 1
	_UTC = dt.timezone.utc
40
except AttributeError:
41
	# python 2.7
42
	class UTC(dt.tzinfo):
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable dt does not seem to be defined.
Loading history...
43
		def utcoffset(self, d):
44
			return dt.timedelta(0)
45
		def tzname(self, d):
46
			return "UTC"
47
		def dst(self, d):
48
			return dt.timedelta(0)
49
	_UTC = UTC()
50
51 1
_meas_dtypes = [
52
	# initial output << v1.0
53
	[('gp_id', int),
54
		('alt_max', float), ('alt', float), ('alt_min', float),
55
		('lat_max', float), ('lat', float), ('lat_min', float),
56
		('density', float), ('dens_err_meas', float),
57
		('dens_err_tot', float), ('dens_tot', float)],
58
	# < 1.0 (NO_emiss-178-g729efb0)
59
	[('gp_id', int),
60
		('alt_max', float), ('alt', float), ('alt_min', float),
61
		('lat_max', float), ('lat', float), ('lat_min', float),
62
		('longitude', float),
63
		('density', float), ('dens_err_meas', float),
64
		('dens_err_tot', float), ('dens_tot', float)],
65
	# < 1.5 (NO_emiss-183-gcaa9349)
66
	[('gp_id', int),
67
		('alt_max', float), ('alt', float), ('alt_min', float),
68
		('lat_max', float), ('lat', float), ('lat_min', float),
69
		('longitude', float),
70
		('density', float), ('dens_err_meas', float),
71
		('dens_err_tot', float), ('dens_tot', float),
72
		('apriori', float)],
73
	# >= 1.5 (NO-v1.5)
74
	[('gp_id', int),
75
		('alt_max', float), ('alt', float), ('alt_min', float),
76
		('lat_max', float), ('lat', float), ('lat_min', float),
77
		('longitude', float),
78
		('density', float), ('dens_err_meas', float),
79
		('dens_err_tot', float), ('dens_tot', float),
80
		('apriori', float), ('akdiag', float)],
81
]
82
83
84 1 View Code Duplication
class scia_densities(object):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
85
	"""SCIAMACHY orbital retrieved number densities
86
87
	Class interface to orbit-wise SCIAMACHY retrieval results.
88
	The attributes are based on the text file layout and are
89
	tied to the NO retrieval for now.
90
91
	Parameters
92
	----------
93
	ref_date: str, optional
94
		The reference date on which to base the date calculations on.
95
		Default: "2000-01-01"
96
	ver: str, optional
97
		Explicit density version, used for exporting the data.
98
		Not used if set to `None`.
99
		Default: `None`
100
	data_ver: str, optional
101
		Level 2 data version to use, as "ver" used for exporting.
102
		Not used if set to `None`.
103
		Default: `None`
104
105
	Attributes
106
	----------
107
	version
108
		file version string
109
	data_version
110
		level 2 data version
111
	date0
112
		reference date
113
	nalt
114
		number of altitudes in the orbit
115
	nlat
116
		number of latitudes in the orbit
117
	nlon
118
		number of longitudes in the orbit, if longitudes are available
119
	orbit
120
		SCIAMACHY/Envisat orbit number
121
	date
122
		number of days of the orbit counting from the reference date
123
		date0
124
	alts_min
125
	alts
126
	alts_max
127
		the altitude bins: minimum, central, and maximum altitude
128
	lats_min
129
	lats
130
	lats_max
131
		the latitude bins: minimum, central, and maximum latitude
132
	lons:
133
		the central longitude of the bins, only used if available
134
135
	densities
136
		NO number densities in the bins, (nlat, nalt) array_like
137
	dens_err_meas
138
		NO number densities measurement uncertainty,
139
		(nlat, nalt) array_like
140
	dens_err_tot
141
		NO number densities total uncertainty, (nlat, nalt) array_like
142
	dens_tot
143
		total number densities calculated and interpolated NRLMSIS-00
144
		values, (nlat, nalt) array_like
145
146
	apriori
147
		prior NO number densities, (nlat, nalt) array_like if available,
148
		otherwise `None`
149
	akdiag
150
		diagonal element of the averaging kernel matrix at the retrieval
151
		grid point. (nlat, nalt) array_like if available otherwise `None`
152
153
	Methods
154
	-------
155
	read_from_textfile
156
	read_from_netcdf
157
	read_from_file
158
	write_to_textfile
159
	write_to_netcdf
160
161
	Note
162
	----
163
	The variables are empty when initialized, use one of the
164
	read_from_...() methods to fill with actual data.
165
	"""
166 1
	def __init__(self, author="unknown", ref_date="2000-01-01", ver=None, data_ver=None):
167 1
		self.author = author
168 1
		self.version = ver
169 1
		self.data_version = data_ver
170 1
		self.date0 = dt.datetime.strptime(ref_date, "%Y-%m-%d").replace(tzinfo=_UTC)
171 1
		self.nalt = 0
172 1
		self.nlat = 0
173 1
		self.nlon = 0
174 1
		self.orbit = -1
175 1
		self.date = -1
176 1
		self.alts_min = np.array([])
177 1
		self.alts = np.array([])
178 1
		self.alts_max = np.array([])
179 1
		self.lats_min = np.array([])
180 1
		self.lats = np.array([])
181 1
		self.lats_max = np.array([])
182 1
		self.lons = np.array([])
183 1
		self.akdiag = None
184 1
		self.apriori = None
185
186 1
	def read_from_textfile(self, filename):
187
		"""Read NO densities from ascii table file
188
189
		Parameters
190
		----------
191
		filename: str, file object or io.TextIOBase.buffer
192
			The filename or stream to read the data from. For example
193
			to read from stdin in python 3, pass `sys.stdin.buffer`.
194
		"""
195 1
		def _unsrt_unique(a):
196 1
			return a[np.sort(np.unique(a, return_index=True)[1])]
197
198 1
		if hasattr(filename, 'seek'):
199
			f = filename
200
		else:
201 1
			f = open(filename, 'rb')
202
			# example filename:000NO_orbit_41467_20100203_Dichten.txt
203 1
			fn_fields = os.path.basename(filename).split('_')
204 1
			self.orbit = int(fn_fields[2])
205 1
			self.date = (dt.datetime.strptime(fn_fields[3], "%Y%m%d")
206
						.replace(tzinfo=_UTC) - self.date0).days
207 1
			if self.data_version is None:
208
				# try some heuristics to find the level 2 data version
209 1
				_dir = os.path.dirname(filename)
210 1
				_m = re.search(".*[_-]v([0-9]+[.].*)", _dir)
211 1
				if _m:
212 1
					self.data_version = _m.group(1)
213
				else:
214 1
					self.data_version = "unknown"
215 1
		data = f.readline().split()
216 1
		mydtype = _meas_dtypes[len(data) - 13]
217 1
		marr = np.genfromtxt(f, dtype=mydtype)
218 1
		f.close()
219
220
		# unique altitudes
221 1
		self.alts_min = _unsrt_unique(marr['alt_min'])
222 1
		self.alts = _unsrt_unique(marr['alt'])
223 1
		self.alts_max = _unsrt_unique(marr['alt_max'])
224
225
		# unique latitudes
226 1
		self.lats_min = _unsrt_unique(marr['lat_min'])
227 1
		self.lats = _unsrt_unique(marr['lat'])
228 1
		self.lats_max = _unsrt_unique(marr['lat_max'])
229
230
		# unique longitudes if available
231 1
		try:
232 1
			self.lons = _unsrt_unique(marr['longitude'])
233
		except ValueError:
234
			pass
235
236 1
		self.nalt = len(self.alts)
237 1
		self.nlat = len(self.lats)
238 1
		self.nlon = len(self.lons)
239
240
		# reorder by latitude first, then altitude
241 1
		self.densities = marr['density'].flatten().reshape(self.nalt, self.nlat).transpose()
242 1
		self.dens_err_meas = marr['dens_err_meas'].flatten().reshape(self.nalt, self.nlat).transpose()
243 1
		self.dens_err_tot = marr['dens_err_tot'].flatten().reshape(self.nalt, self.nlat).transpose()
244 1
		self.dens_tot = marr['dens_tot'].flatten().reshape(self.nalt, self.nlat).transpose()
245
246
		# apriori if available
247 1
		try:
248 1
			self.apriori = marr['apriori'].flatten().reshape(self.nalt, self.nlat).transpose()
249 1
		except ValueError:
250 1
			pass
251
		# akdiag if available
252 1
		try:
253 1
			self.akdiag = marr['akdiag'].flatten().reshape(self.nalt, self.nlat).transpose()
254 1
		except ValueError:
255 1
			pass
256
257 1
	def write_to_textfile(self, filename):
258
		"""Write NO densities to ascii table files
259
260
		Parameters
261
		----------
262
		filename: str or file object or io.TextIOBase.buffer
263
			The filename or stream to write the data to. For writing to
264
			stdout in python 3, pass `sys.stdout.buffer`.
265
		"""
266 1
		if hasattr(filename, 'seek'):
267
			f = filename
268
		else:
269 1
			f = open(filename, 'w')
270
271 1
		header = "%5s %13s %12s %13s %13s %12s %13s %13s  %13s %12s %12s %12s" % ("GP_ID",
272
				"Max_Hoehe[km]", "Hoehe[km]", "Min_Hoehe[km]",
273
				"Max_Breite[°]", "Breite[°]", "Min_Breite[°]",
274
				"Laenge[°]",
275
				"Dichte[cm^-3]", "Fehler Mess[cm^-3]",
276
				"Fehler tot[cm^-3]", "Gesamtdichte[cm^-3]")
277 1
		if self.apriori is not None:
278 1
			header = header + " %12s" % ("apriori[cm^-3]",)
279 1
		if self.akdiag is not None:
280 1
			header = header + " %12s" % ("AKdiag",)
281 1
		print(header, file=f)
282
283 1
		oformat = "%5i  %+1.5E %+1.5E  %+1.5E  %+1.5E %+1.5E  %+1.5E  %+1.5E   %+1.5E       %+1.5E      %+1.5E        %+1.5E"
284 1
		oformata = "  %+1.5E"
285
286 1
		for i, a in enumerate(self.alts):
287 1
			for j, b in enumerate(self.lats):
288 1
				print(oformat % (i * self.nlat + j,
289
					self.alts_max[i], a, self.alts_min[i],
290
					self.lats_max[j], b, self.lats_min[j], self.lons[j],
291
					self.densities[j, i], self.dens_err_meas[j, i],
292
					self.dens_err_tot[j, i], self.dens_tot[j, i]),
293
					end="", file=f)
294 1
				if self.apriori is not None:
295 1
					print(" " + oformata % self.apriori[j, i], end="", file=f)
296 1
				if self.akdiag is not None:
297 1
					print(" " + oformata % self.akdiag[j, i], end="", file=f)
298 1
				print("", file=f)
299
300 1
	def write_to_netcdf(self, filename, close=True):
301
		"""Write NO densities to netcdf files
302
303
		This function has no stream, i.e. file object, support.
304
305
		Parameters
306
		----------
307
		filename: str
308
			The name of the file to write the data to.
309
		close: bool, optional
310
			Whether or not to close the file after writing.
311
			Setting to `False` enables appending further data
312
			to the same file.
313
			Default: True
314
315
		Returns
316
		-------
317
		Nothing if `close` is True. If `close` is False, returns either an
318
		`netCDF4.Dataset`,
319
		`scipy.io.netcdf.netcdf_file` or
320
		`pupynere.netcdf_file` instance depending on availability.
321
		"""
322 1
		alts_min_out = np.asarray(self.alts_min).reshape(self.nalt)
323 1
		alts_out = np.asarray(self.alts).reshape(self.nalt)
324 1
		alts_max_out = np.asarray(self.alts_max).reshape(self.nalt)
325
326 1
		lats_min_out = np.asarray(self.lats_min).reshape(self.nlat)
327 1
		lats_out = np.asarray(self.lats).reshape(self.nlat)
328 1
		lats_max_out = np.asarray(self.lats_max).reshape(self.nlat)
329
330 1
		ncf = netcdf_file(filename, 'w', **fmtargs)
331
332 1
		if self.version is not None:
333 1
			ncf.version = self.version
334 1
		if self.data_version is not None:
335 1
			ncf.L2_data_version = self.data_version
336
		#ncf.creation_time = dt.datetime.utcnow().replace(tzinfo=_UTC).strftime("%a %b %d %Y %H:%M:%S %z (%Z)")
337 1
		ncf.creation_time = dt.datetime.utcnow().strftime("%a %b %d %Y %H:%M:%S +00:00 (UTC)")
338 1
		ncf.author = self.author
339
340
		# create netcdf file
341 1
		ncf.createDimension('altitude', self.nalt)
342 1
		ncf.createDimension('latitude', self.nlat)
343 1
		ncf.createDimension('time', None)
344
345 1
		forbit = ncf.createVariable('orbit', np.dtype('int64').char, ('time',))
346 1
		ftime = ncf.createVariable('time', np.dtype('int64').char, ('time',))
347
348 1
		falts_min = ncf.createVariable('alt_min', np.dtype('float64').char, ('altitude',))
349 1
		falts = ncf.createVariable('altitude', np.dtype('float64').char, ('altitude',))
350 1
		falts_max = ncf.createVariable('alt_max', np.dtype('float64').char, ('altitude',))
351 1
		flats_min = ncf.createVariable('lat_min', np.dtype('float64').char, ('latitude',))
352 1
		flats = ncf.createVariable('latitude', np.dtype('float64').char, ('latitude',))
353 1
		flats_max = ncf.createVariable('lat_max', np.dtype('float64').char, ('latitude',))
354
355 1
		falts_min.units = 'km'
356 1
		falts_min.positive = 'up'
357 1
		falts.units = 'km'
358 1
		falts.positive = 'up'
359 1
		falts_max.units = 'km'
360 1
		falts_max.positive = 'up'
361 1
		flats_min.units = 'degrees_north'
362 1
		flats.units = 'degrees_north'
363 1
		flats_max.units = 'degrees_north'
364
365 1
		forbit.units = '1'
366 1
		forbit.long_name = 'SCIAMACHY/Envisat orbit number'
367 1
		ftime.units = 'days since {0}'.format(self.date0.isoformat(sep=' '))
368 1
		ftime.standard_name = 'time'
369
370 1
		fdens = ncf.createVariable('density', np.dtype('float64').char, ('time', 'latitude', 'altitude'))
371 1
		fdens.units = 'cm^{-3}'
372 1
		fdens.standard_name = 'number_concentration_of_nitrogen_monoxide_molecules_in_air'
373 1
		fdens_err_meas = ncf.createVariable('error_meas', np.dtype('float64').char, ('time', 'latitude', 'altitude'))
374 1
		fdens_err_meas.units = 'cm^{-3}'
375 1
		fdens_err_meas.long_name = 'NO number density measurement error'
376 1
		fdens_err_tot = ncf.createVariable('error_tot', np.dtype('float64').char, ('time', 'latitude', 'altitude'))
377 1
		fdens_err_tot.units = 'cm^{-3}'
378 1
		fdens_err_tot.long_name = 'NO number density total error'
379 1
		fdens_tot = ncf.createVariable('density_air', np.dtype('float64').char, ('time', 'latitude', 'altitude'))
380 1
		fdens_tot.units = 'cm^{-3}'
381 1
		fdens_tot.long_name = 'approximate overall number concentration of air molecules (NRLMSIS-00)'
382
383 1
		ftime[:] = self.date
384 1
		forbit[:] = self.orbit
385
386 1
		falts_min[:] = alts_min_out
387 1
		falts[:] = alts_out
388 1
		falts_max[:] = alts_max_out
389 1
		flats_min[:] = lats_min_out
390 1
		flats[:] = lats_out
391 1
		flats_max[:] = lats_max_out
392
		# reorder by latitude first, then altitude
393 1
		fdens[0, :] = self.densities
394
		# reorder by latitude first, then altitude
395 1
		fdens_err_meas[0, :] = self.dens_err_meas
396 1
		fdens_err_tot[0, :] = self.dens_err_tot
397 1
		fdens_tot[0, :] = self.dens_tot
398
399
		# longitudes if they are available
400 1
		if self.nlon > 0:
401 1
			lons_out = np.asarray(self.lons).reshape(self.nlon)
402 1
			flons = ncf.createVariable('longitude', np.dtype('float64').char, ('time', 'latitude',))
403 1
			flons.units = 'degrees_east'
404 1
			flons[0, :] = lons_out
405
406 1
		if self.apriori is not None:
407 1
			fapriori = ncf.createVariable('apriori',
408
					np.dtype('float64').char, ('time', 'latitude', 'altitude'))
409 1
			fapriori.units = 'cm^{-3}'
410 1
			fapriori.long_name = 'apriori NO number density'
411 1
			fapriori[0, :] = self.apriori
412
413 1
		if self.akdiag is not None:
414 1
			fakdiag = ncf.createVariable('akm_diagonal',
415
					np.dtype('float64').char, ('time', 'latitude', 'altitude'))
416 1
			fakdiag.units = '1'
417 1
			fakdiag.long_name = 'averaging kernel matrix diagonal element'
418 1
			fakdiag[0, :] = self.akdiag
419 1
		if close:
420 1
			ncf.close()
421
		else:
422 1
			return ncf
423
424 1
	def read_from_netcdf(self, filename, close=True):
425
		"""Read NO densities from netcdf files
426
427
		This function has no stream, i.e. file object support.
428
429
		Parameters
430
		----------
431
		filename: str
432
			The filename to read the data from.
433
		close: bool, optional
434
			Whether or not to close the file after reading.
435
			Setting to `False` enables reading further data
436
			from the same file.
437
			Default: True
438
439
		Returns
440
		-------
441
		Nothing if `close` is True. If `close` is False, returns either an
442
		`netCDF4.Dataset`,
443
		`scipy.io.netcdf.netcdf_file` or
444
		`pupynere.netcdf_file` instance depending on availability.
445
		"""
446 1
		ncf = netcdf_file(filename, 'r')
447
448 1
		try:
449 1
			self.author = ncf.author
450
		except AttributeError:
451
			pass
452 1
		try:
453 1
			self.version = ncf.version
454 1
		except AttributeError:
455 1
			pass
456 1
		try:
457 1
			self.data_version = ncf.L2_data_version
458
		except AttributeError:
459
			pass
460
461 1
		self.nalt = len(ncf.dimensions['altitude'])
462 1
		self.nlat = len(ncf.dimensions['latitude'])
463
464 1
		self.alts_min = ncf.variables['alt_min'][:]
465 1
		self.alts = ncf.variables['altitude'][:]
466 1
		self.alts_max = ncf.variables['alt_max'][:]
467 1
		self.lats_min = ncf.variables['lat_min'][:]
468 1
		self.lats = ncf.variables['latitude'][:]
469 1
		self.lats_max = ncf.variables['lat_max'][:]
470
471 1
		self.date = ncf.variables['time'][:]
472 1
		self.orbit = ncf.variables['orbit'][:]
473
474 1
		self.densities = ncf.variables['density'][:]
475 1
		self.dens_err_meas = ncf.variables['error_meas'][:]
476 1
		self.dens_err_tot = ncf.variables['error_tot'][:]
477 1
		self.dens_tot = ncf.variables['density_air'][:]
478
479
		# longitudes if they are available
480 1
		try:
481 1
			self.lons = ncf.variables['longitude'][:]
482 1
			self.nlon = self.lons.shape[1]
483
		except:
484
			pass
485
486
		# apriori
487 1
		try:
488 1
			self.apriori = ncf.variables['apriori'][:]
489 1
		except:
490 1
			pass
491
492
		# akm diagonal elements
493 1
		try:
494 1
			self.akdiag = ncf.variables['akm_diagonal'][:]
495 1
		except:
496 1
			pass
497
498 1
		if close:
499 1
			ncf.close()
500
		else:
501
			return ncf
502
503 1
	def read_from_file(self, filename):
504
		"""Wrapper to read NO desnities from files
505
506
		Simple wrapper to delegate reading the data from either netcdf
507
		or ascii files. Poor man's logic: simply try netcdf first, and
508
		if that fails, read as ascii.
509
510
		Parameters
511
		----------
512
		filename: str
513
			The filename to read the data from.
514
		"""
515 1
		try:
516
			# try netcdf first
517 1
			self.read_from_netcdf(filename)
518 1
		except (IOError, OSError):
519
			# fall back to text file as a last resort
520 1
			self.read_from_textfile(filename)
521
522
523 1 View Code Duplication
def main(*args):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
524
	argc = len(sys.argv)
525
	if argc < 2:
526
		print("Not enough arguments, Usage:\n"
527
			"{0} [input] output [< input]".format(sys.argv[0]))
528
		sys.exit(1)
529
	elif argc < 3:
530
		try:
531
			infile = sys.stdin.buffer  # Python 3
532
		except AttributeError:
533
			infile = sys.stdin
534
		outfile = sys.argv[1]
535
	else:
536
		infile = sys.argv[1]
537
		outfile = sys.argv[2]
538
	sdl = scia_densities()
539
	sdl.read_from_file(infile)
0 ignored issues
show
introduced by
The variable infile does not seem to be defined for all execution paths.
Loading history...
540
	sdl.write_to_netcdf(outfile)
0 ignored issues
show
introduced by
The variable outfile does not seem to be defined for all execution paths.
Loading history...
541
542
543 1
if __name__ == "__main__":
544
	sys.exit(main())
545