Passed
Push — master ( 478ca5...fa390c )
by Stefan
03:58
created

sciapy.level2.density   A

Complexity

Total Complexity 38

Size/Duplication

Total Lines 542
Duplicated Lines 83.39 %

Test Coverage

Coverage 60.69%

Importance

Changes 0
Metric Value
eloc 301
dl 452
loc 542
rs 9.36
c 0
b 0
f 0
ccs 159
cts 262
cp 0.6069
wmc 38

10 Methods

Rating   Name   Duplication   Size   Complexity  
A UTC.dst() 0 2 1
A UTC.utcoffset() 0 2 1
A UTC.tzname() 0 2 1
A scia_densities.__init__() 18 18 1
B scia_densities.read_from_textfile() 63 63 6
B scia_densities.write_to_textfile() 42 42 8
A scia_density.__init__() 15 15 1
C scia_densities.write_to_netcdf() 123 123 7
A scia_densities.read_from_file() 18 18 2
B scia_densities.read_from_netcdf() 65 65 5

2 Functions

Rating   Name   Duplication   Size   Complexity  
A _unique_values() 0 4 1
A main() 18 18 4

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
# -*- coding: utf-8 -*-
2
# vim:fileencoding=utf-8
3
#
4
# Copyright (c) 2015-2018 Stefan Bender
5
#
6
# This file is part of sciapy.
7
# sciapy is free software: you can redistribute it or modify it
8
# under the terms of the GNU General Public License as published by
9
# the Free Software Foundation, version 2.
10
# See accompanying LICENSE file or http://www.gnu.org/licenses/gpl-2.0.html.
11 1
"""SCIAMACHY level 2 number density retrieval results interface
12
13
Interface classes for the level 2 retrieval results from text (ascii)
14
files and netcdf files for further processing.
15
"""
16
17 1
from __future__ import absolute_import, division, print_function
18
19 1
import os
20 1
import sys
21 1
import datetime as dt
22
23 1
import numpy as np
24 1
try:
25 1
	from netCDF4 import Dataset as netcdf_file
26 1
	fmtargs = {"format": "NETCDF4"}
27
except ImportError:
28
	try:
29
		from scipy.io.netcdf import netcdf_file
30
		fmtargs = {"version": 1}
31
	except ImportError:
32
		from pupynere import netcdf_file
33
		fmtargs = {"version": 1}
34
35 1
__all__ = ["scia_density", "scia_densities", "_UTC"]
36
37 1
try:
38 1
	_UTC = dt.timezone.utc
39
except AttributeError:
40
	# python 2.7
41
	class UTC(dt.tzinfo):
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable dt does not seem to be defined.
Loading history...
42
		def utcoffset(self, d):
43
			return dt.timedelta(0)
44
		def tzname(self, d):
45
			return "UTC"
46
		def dst(self, d):
47
			return dt.timedelta(0)
48
	_UTC = UTC()
49
50 1
_meas_dtypes = [[('gp_id', int),
51
		('alt_max', float), ('alt', float), ('alt_min', float),
52
		('lat_max', float), ('lat', float), ('lat_min', float),
53
		('density', float), ('dens_err_meas', float),
54
		('dens_err_tot', float), ('dens_tot', float)],
55
	[('gp_id', int),
56
		('alt_max', float), ('alt', float), ('alt_min', float),
57
		('lat_max', float), ('lat', float), ('lat_min', float),
58
		('longitude', float),
59
		('density', float), ('dens_err_meas', float),
60
		('dens_err_tot', float), ('dens_tot', float)],
61
	[('gp_id', int),
62
		('alt_max', float), ('alt', float), ('alt_min', float),
63
		('lat_max', float), ('lat', float), ('lat_min', float),
64
		('longitude', float),
65
		('density', float), ('dens_err_meas', float),
66
		('dens_err_tot', float), ('dens_tot', float),
67
		('apriori', float)],
68
	[('gp_id', int),
69
		('alt_max', float), ('alt', float), ('alt_min', float),
70
		('lat_max', float), ('lat', float), ('lat_min', float),
71
		('longitude', float),
72
		('density', float), ('dens_err_meas', float),
73
		('dens_err_tot', float), ('dens_tot', float),
74
		('apriori', float), ('akdiag', float)]]
75
76
77 1
def _unique_values(vals):
78 1
	ldum = []
79 1
	[ldum.append(i) for i in vals if not ldum.count(i)]
80 1
	return np.asarray(ldum).flatten()
81
82
83 1 View Code Duplication
class scia_density(object):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
84
	"""SCIAMACHY single scan retrieved number densities"""
85 1
	def __init__(self):
86
		self.gp_id = 0
87
		self.alt_min = 0.
88
		self.alt = 0.
89
		self.alt_max = 0.
90
		self.lat_min = 0.
91
		self.lat = 0.
92
		self.lat_max = 0.
93
		self.lon = 0.
94
		self.density = 0.
95
		self.dens_err_meas = 0.
96
		self.dens_err_tot = 0.
97
		self.dens_tot = 0.
98
		self.akdiag = 0.
99
		self.apriori = 0.
100
101
102 1 View Code Duplication
class scia_densities(object):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
103
	"""SCIAMACHY orbital retrieved number densities
104
105
	Class interface to orbit-wise SCIAMACHY retrieval results.
106
	The attributes are based on the text file layout and are
107
	tied to the NO retrieval for now.
108
109
	Parameters
110
	----------
111
	ref_date: str, optional
112
		The reference date on which to base the date calculations on.
113
		Default: "2000-01-01"
114
	ver: str, optional
115
		Explicit density version, used for exporting the data.
116
		Not used if set to `None`.
117
		Default: `None`
118
	data_ver: str, optional
119
		Level 2 data version to use, as "ver" used for exporting.
120
		Not used if set to `None`.
121
		Default: `None`
122
123
	Attributes
124
	----------
125
	version
126
		file version string
127
	data_version
128
		level 2 data version
129
	date0
130
		reference date
131
	nalt
132
		number of altitudes in the orbit
133
	nlat
134
		number of latitudes in the orbit
135
	nlon
136
		number of longitudes in the orbit, if longitudes are available
137
	orbit
138
		SCIAMACHY/Envisat orbit number
139
	date
140
		number of days of the orbit counting from the reference date
141
		date0
142
	alts_min
143
	alts
144
	alts_max
145
		the altitude bins: minimum, central, and maximum altitude
146
	lats_min
147
	lats
148
	lats_max
149
		the latitude bins: minimum, central, and maximum latitude
150
	lons:
151
		the central longitude of the bins, only used if available
152
153
	densities
154
		NO number densities in the bins, (nlat, nalt) array_like
155
	dens_err_meas
156
		NO number densities measurement uncertainty,
157
		(nlat, nalt) array_like
158
	dens_err_tot
159
		NO number densities total uncertainty, (nlat, nalt) array_like
160
	dens_tot
161
		total number densities calculated and interpolated NRLMSIS-00
162
		values, (nlat, nalt) array_like
163
164
	apriori
165
		prior NO number densities, (nlat, nalt) array_like if available,
166
		otherwise `None`
167
	akdiag
168
		diagonal element of the averaging kernel matrix at the retrieval
169
		grid point. (nlat, nalt) array_like if available otherwise `None`
170
171
	Methods
172
	-------
173
	read_from_textfile
174
	read_from_netcdf
175
	read_from_file
176
	write_to_textfile
177
	write_to_netcdf
178
179
	Note
180
	----
181
	The variables are empty when initialized, use one of the
182
	read_from_...() methods to fill with actual data.
183
	"""
184 1
	def __init__(self, ref_date="2000-01-01", ver=None, data_ver=None):
185 1
		self.version = ver
186 1
		self.data_version = data_ver
187 1
		self.date0 = dt.datetime.strptime(ref_date, "%Y-%m-%d").replace(tzinfo=_UTC)
188 1
		self.nalt = 0
189 1
		self.nlat = 0
190 1
		self.nlon = 0
191 1
		self.orbit = -1
192 1
		self.date = -1
193 1
		self.alts_min = np.array([])
194 1
		self.alts = np.array([])
195 1
		self.alts_max = np.array([])
196 1
		self.lats_min = np.array([])
197 1
		self.lats = np.array([])
198 1
		self.lats_max = np.array([])
199 1
		self.lons = np.array([])
200 1
		self.akdiag = None
201 1
		self.apriori = None
202
203 1
	def read_from_textfile(self, filename):
204
		"""Read NO densities from ascii table file
205
206
		Parameters
207
		----------
208
		filename: str, file object or io.TextIOBase.buffer
209
			The filename or stream to read the data from. For example
210
			to read from stdin in python 3, pass `sys.stdin.buffer`.
211
		"""
212 1
		if hasattr(filename, 'seek'):
213
			f = filename
214
		else:
215 1
			f = open(filename, 'rb')
216
			# example filename:000NO_orbit_41467_20100203_Dichten.txt
217 1
			fn_fields = os.path.basename(filename).split('_')
218 1
			self.orbit = int(fn_fields[2])
219 1
			self.date = (dt.datetime.strptime(fn_fields[3], "%Y%m%d")
220
						.replace(tzinfo=_UTC) - self.date0).days
221 1
			if self.data_version is None:
222
				# try some heuristics to find the level 2 data version
223 1
				self.data_version = os.path.dirname(filename).split('v')[-1]
224 1
		line = f.readline()
225 1
		data = line.split()
226 1
		mydtype = _meas_dtypes[len(data) - 13]
227 1
		marr = np.genfromtxt(f, dtype=mydtype)
228 1
		f.close()
229
230
		# unique altitudes
231 1
		self.alts_min = _unique_values(marr['alt_min'])
232 1
		self.alts = _unique_values(marr['alt'])
233 1
		self.alts_max = _unique_values(marr['alt_max'])
234
235
		# unique latitudes
236 1
		self.lats_min = _unique_values(marr['lat_min'])
237 1
		self.lats = _unique_values(marr['lat'])
238 1
		self.lats_max = _unique_values(marr['lat_max'])
239
240
		# unique longitudes if available
241 1
		try:
242 1
			self.lons = _unique_values(marr['longitude'])
243
		except:
244
			pass
245
246 1
		self.nalt = len(self.alts)
247 1
		self.nlat = len(self.lats)
248 1
		self.nlon = len(self.lons)
249
250
		# reorder by latitude first, then altitude
251 1
		self.densities = marr['density'].flatten().reshape(self.nalt, self.nlat).transpose()
252 1
		self.dens_err_meas = marr['dens_err_meas'].flatten().reshape(self.nalt, self.nlat).transpose()
253 1
		self.dens_err_tot = marr['dens_err_tot'].flatten().reshape(self.nalt, self.nlat).transpose()
254 1
		self.dens_tot = marr['dens_tot'].flatten().reshape(self.nalt, self.nlat).transpose()
255
256
		# apriori if available
257 1
		try:
258 1
			self.apriori = marr['apriori'].flatten().reshape(self.nalt, self.nlat).transpose()
259
		except:
260
			pass
261
		# akdiag if available
262 1
		try:
263 1
			self.akdiag = marr['akdiag'].flatten().reshape(self.nalt, self.nlat).transpose()
264
		except:
265
			pass
266
267 1
	def write_to_textfile(self, filename):
268
		"""Write NO densities to ascii table files
269
270
		Parameters
271
		----------
272
		filename: str or file object or io.TextIOBase.buffer
273
			The filename or stream to write the data to. For writing to
274
			stdout in python 3, pass `sys.stdout.buffer`.
275
		"""
276
		if hasattr(filename, 'seek'):
277
			f = filename
278
		else:
279
			f = open(filename, 'w')
280
281
		header = "%5s %13s %12s %13s %13s %12s %13s %13s  %13s %12s %12s %12s" % ("GP_ID",
282
				"Max_Hoehe[km]", "Hoehe[km]", "Min_Hoehe[km]",
283
				"Max_Breite[°]", "Breite[°]", "Min_Breite[°]",
284
				"Laenge[°]",
285
				"Dichte[cm^-3]", "Fehler Mess[cm^-3]",
286
				"Fehler tot[cm^-3]", "Gesamtdichte[cm^-3]")
287
		if self.apriori is not None:
288
			header = header + " %12s" % ("apriori[cm^-3]",)
289
		if self.akdiag is not None:
290
			header = header + " %12s" % ("AKdiag",)
291
		print(header, file=f)
292
293
		oformat = "%5i  %+1.5E %+1.5E  %+1.5E  %+1.5E %+1.5E  %+1.5E  %+1.5E   %+1.5E       %+1.5E      %+1.5E        %+1.5E"
294
		oformata = "  %+1.5E"
295
296
		for i, a in enumerate(self.alts):
297
			for j, b in enumerate(self.lats):
298
				print(oformat % (i * self.nlat + j,
299
					self.alts_max[i], a, self.alts_min[i],
300
					self.lats_max[j], b, self.lats_min[j], self.lons[j],
301
					self.densities[j, i], self.dens_err_meas[j, i],
302
					self.dens_err_tot[j, i], self.dens_tot[j, i]),
303
					end="", file=f)
304
				if self.apriori is not None:
305
					print(" " + oformata % self.apriori[j, i], end="", file=f)
306
				if self.akdiag is not None:
307
					print(" " + oformata % self.akdiag[j, i], end="", file=f)
308
				print("", file=f)
309
310 1
	def write_to_netcdf(self, filename, close=True):
311
		"""Write NO densities to netcdf files
312
313
		This function has no stream, i.e. file object, support.
314
315
		Parameters
316
		----------
317
		filename: str
318
			The name of the file to write the data to.
319
		close: bool, optional
320
			Whether or not to close the file after writing.
321
			Setting to `False` enables appending further data
322
			to the same file.
323
			Default: True
324
325
		Returns
326
		-------
327
		Nothing if `close` is True. If `close` is False, returns either an
328
		`netCDF4.Dataset`,
329
		`scipy.io.netcdf.netcdf_file` or
330
		`pupynere.netcdf_file` instance depending on availability.
331
		"""
332 1
		alts_min_out = np.asarray(self.alts_min).reshape(self.nalt)
333 1
		alts_out = np.asarray(self.alts).reshape(self.nalt)
334 1
		alts_max_out = np.asarray(self.alts_max).reshape(self.nalt)
335
336 1
		lats_min_out = np.asarray(self.lats_min).reshape(self.nlat)
337 1
		lats_out = np.asarray(self.lats).reshape(self.nlat)
338 1
		lats_max_out = np.asarray(self.lats_max).reshape(self.nlat)
339
340 1
		ncf = netcdf_file(filename, 'w', **fmtargs)
341
342 1
		if self.version is not None:
343 1
			ncf.version = self.version
344 1
		if self.data_version is not None:
345 1
			ncf.L2_data_version = self.data_version
346
		#ncf.creation_time = dt.datetime.utcnow().replace(tzinfo=_UTC).strftime("%a %b %d %Y %H:%M:%S %z (%Z)")
347 1
		ncf.creation_time = dt.datetime.utcnow().strftime("%a %b %d %Y %H:%M:%S +00:00 (UTC)")
348 1
		ncf.author = "Firstname Lastname"
349
350
		# create netcdf file
351 1
		ncf.createDimension('altitude', self.nalt)
352 1
		ncf.createDimension('latitude', self.nlat)
353 1
		ncf.createDimension('time', None)
354
355 1
		forbit = ncf.createVariable('orbit', np.dtype('int64').char, ('time',))
356 1
		ftime = ncf.createVariable('time', np.dtype('int64').char, ('time',))
357
358 1
		falts_min = ncf.createVariable('alt_min', np.dtype('float64').char, ('altitude',))
359 1
		falts = ncf.createVariable('altitude', np.dtype('float64').char, ('altitude',))
360 1
		falts_max = ncf.createVariable('alt_max', np.dtype('float64').char, ('altitude',))
361 1
		flats_min = ncf.createVariable('lat_min', np.dtype('float64').char, ('latitude',))
362 1
		flats = ncf.createVariable('latitude', np.dtype('float64').char, ('latitude',))
363 1
		flats_max = ncf.createVariable('lat_max', np.dtype('float64').char, ('latitude',))
364
365 1
		falts_min.units = 'km'
366 1
		falts_min.positive = 'up'
367 1
		falts.units = 'km'
368 1
		falts.positive = 'up'
369 1
		falts_max.units = 'km'
370 1
		falts_max.positive = 'up'
371 1
		flats_min.units = 'degrees_north'
372 1
		flats.units = 'degrees_north'
373 1
		flats_max.units = 'degrees_north'
374
375 1
		forbit.units = '1'
376 1
		forbit.long_name = 'SCIAMACHY/Envisat orbit number'
377 1
		ftime.units = 'days since {0}'.format(self.date0.isoformat(sep=' '))
378 1
		ftime.standard_name = 'time'
379
380 1
		fdens = ncf.createVariable('density', np.dtype('float64').char, ('time', 'latitude', 'altitude'))
381 1
		fdens.units = 'cm^{-3}'
382 1
		fdens.standard_name = 'number_concentration_of_nitrogen_monoxide_molecules_in_air'
383 1
		fdens_err_meas = ncf.createVariable('error_meas', np.dtype('float64').char, ('time', 'latitude', 'altitude'))
384 1
		fdens_err_meas.units = 'cm^{-3}'
385 1
		fdens_err_meas.long_name = 'NO number density measurement error'
386 1
		fdens_err_tot = ncf.createVariable('error_tot', np.dtype('float64').char, ('time', 'latitude', 'altitude'))
387 1
		fdens_err_tot.units = 'cm^{-3}'
388 1
		fdens_err_tot.long_name = 'NO number density total error'
389 1
		fdens_tot = ncf.createVariable('density_air', np.dtype('float64').char, ('time', 'latitude', 'altitude'))
390 1
		fdens_tot.units = 'cm^{-3}'
391 1
		fdens_tot.long_name = 'approximate overall number concentration of air molecules (NRLMSIS-00)'
392
393 1
		ftime[:] = self.date
394 1
		forbit[:] = self.orbit
395
396 1
		falts_min[:] = alts_min_out
397 1
		falts[:] = alts_out
398 1
		falts_max[:] = alts_max_out
399 1
		flats_min[:] = lats_min_out
400 1
		flats[:] = lats_out
401 1
		flats_max[:] = lats_max_out
402
		# reorder by latitude first, then altitude
403 1
		fdens[0, :] = self.densities
404
		# reorder by latitude first, then altitude
405 1
		fdens_err_meas[0, :] = self.dens_err_meas
406 1
		fdens_err_tot[0, :] = self.dens_err_tot
407 1
		fdens_tot[0, :] = self.dens_tot
408
409
		# longitudes if they are available
410 1
		if self.nlon > 0:
411 1
			lons_out = np.asarray(self.lons).reshape(self.nlon)
412 1
			flons = ncf.createVariable('longitude', np.dtype('float64').char, ('time', 'latitude',))
413 1
			flons.units = 'degrees_east'
414 1
			flons[0, :] = lons_out
415
416 1
		if self.apriori is not None:
417 1
			fapriori = ncf.createVariable('apriori',
418
					np.dtype('float64').char, ('time', 'latitude', 'altitude'))
419 1
			fapriori.units = 'cm^{-3}'
420 1
			fapriori.long_name = 'apriori NO number density'
421 1
			fapriori[0, :] = self.apriori
422
423 1
		if self.akdiag is not None:
424 1
			fakdiag = ncf.createVariable('akm_diagonal',
425
					np.dtype('float64').char, ('time', 'latitude', 'altitude'))
426 1
			fakdiag.units = '1'
427 1
			fakdiag.long_name = 'averaging kernel matrix diagonal element'
428 1
			fakdiag[0, :] = self.akdiag
429 1
		if close:
430
			ncf.close()
431
		else:
432 1
			return ncf
433
434 1
	def read_from_netcdf(self, filename, close=True):
435
		"""Read NO densities from netcdf files
436
437
		This function has no stream, i.e. file object support.
438
439
		Parameters
440
		----------
441
		filename: str
442
			The filename to read the data from.
443
		close: bool, optional
444
			Whether or not to close the file after reading.
445
			Setting to `False` enables reading further data
446
			from the same file.
447
			Default: True
448
449
		Returns
450
		-------
451
		Nothing if `close` is True. If `close` is False, returns either an
452
		`netCDF4.Dataset`,
453
		`scipy.io.netcdf.netcdf_file` or
454
		`pupynere.netcdf_file` instance depending on availability.
455
		"""
456 1
		ncf = netcdf_file(filename, 'r')
457
458
		self.nalt = ncf.dimensions['altitude']
459
		self.nlat = ncf.dimensions['latitude']
460
461
		self.alts_min = ncf.variables['alt_min'][:]
462
		self.alts = ncf.variables['altitude'][:]
463
		self.alts_max = ncf.variables['alt_max'][:]
464
		self.lats_min = ncf.variables['lat_min'][:]
465
		self.lats = ncf.variables['latitude'][:]
466
		self.lats_max = ncf.variables['lat_max'][:]
467
468
		self.date = ncf.variable['time'][:]
469
		self.orbit = ncf.variable['orbit'][:]
470
471
		self.densities = ncf.variables['density'][:]
472
		self.dens_err_meas = ncf.variables['error_meas'][:]
473
		self.dens_err_tot = ncf.variables['error_tot'][:]
474
		self.dens_tot = ncf.variables['density_air'][:]
475
476
		# longitudes if they are available
477
		try:
478
			self.nlon = ncf.dimensions['longitude']
479
			self.lons = ncf.variables['longitude'][:]
480
		except:
481
			pass
482
483
		# apriori
484
		try:
485
			self.apriori = ncf.variables['apriori'][:]
486
		except:
487
			pass
488
489
		# akm diagonal elements
490
		try:
491
			self.akdiag = ncf.variables['akm_diagonal'][:]
492
		except:
493
			pass
494
495
		if close:
496
			ncf.close()
497
		else:
498
			return ncf
499
500 1
	def read_from_file(self, filename):
501
		"""Wrapper to read NO desnities from files
502
503
		Simple wrapper to delegate reading the data from either netcdf
504
		or ascii files. Poor man's logic: simply try netcdf first, and
505
		if that fails, read as ascii.
506
507
		Parameters
508
		----------
509
		filename: str
510
			The filename to read the data from.
511
		"""
512 1
		try:
513
			# try netcdf first
514 1
			self.read_from_netcdf(filename)
515 1
		except:
516
			# fall back to text file as a last resort
517 1
			self.read_from_textfile(filename)
518
519
520 1 View Code Duplication
def main(*args):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
521
	argc = len(sys.argv)
522
	if argc < 2:
523
		print("Not enough arguments, Usage:\n"
524
			"{0} [input] output [< input]".format(sys.argv[0]))
525
		sys.exit(1)
526
	elif argc < 3:
527
		try:
528
			infile = sys.stdin.buffer  # Python 3
529
		except AttributeError:
530
			infile = sys.stdin
531
		outfile = sys.argv[1]
532
	else:
533
		infile = sys.argv[1]
534
		outfile = sys.argv[2]
535
	sdl = scia_densities()
536
	sdl.read_from_file(infile)
0 ignored issues
show
introduced by
The variable infile does not seem to be defined for all execution paths.
Loading history...
537
	sdl.write_to_netcdf(outfile)
0 ignored issues
show
introduced by
The variable outfile does not seem to be defined for all execution paths.
Loading history...
538
539
540 1
if __name__ == "__main__":
541
	sys.exit(main())
542