sciapy.regress.load_data.load_solar_gm_table() - Code Metrics - Inspection of "regress: Set encoding for newer numpy genfromtxt()" - st-bender/sciapy - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( 353449...a10708 )

by Stefan

created 2019-03-11 16:13 UTC

sciapy.regress.load_data.load_solar_gm_table() A

↳ Parent: sciapy.regress.load_data

Complexity

Conditions

Size

Total Lines	61
Code Lines	16

Duplication

Lines	61
Ratio	100 %

Code Coverage

Tests	9
CRAP Score	5.025

Importance

Changes

Metric	Value
cc	5
eloc	16
nop	5
dl	61
loc	61
ccs	9
cts	10
cp	0.9
crap	5.025
rs	9.1333
c	0
b	0
f	0

How to fix Long Method

# -*- coding: utf-8 -*-
# vim:fileencoding=utf-8
#
# Copyright (c) 2017-2018 Stefan Bender
#
# This module is part of sciapy.
# sciapy is free software: you can redistribute it or modify
# it under the terms of the GNU General Public License as published
# by the Free Software Foundation, version 2.
# See accompanying LICENSE file or http://www.gnu.org/licenses/gpl-2.0.html.
"""SCIAMACHY data interface

Data loading and selection routines for SCIAMACHY data regression fits.
Includes reading the solar and geomagnetic index files used as proxies.
"""

import logging

import numpy as np
import pandas as pd
import xarray as xr

from astropy.time import Time

__all__ = ["load_solar_gm_table", "load_scia_dzm"]

_NPV_MAJOR, _NPV_MINOR, _NPV_PATCH = np.__version__.split('.')

_seasons = {
	"summerNH": [
		slice("2002-03-20", "2002-09-25"),
		slice("2003-03-20", "2003-10-13"),
		slice("2004-03-20", "2004-10-20"),
		slice("2005-03-20", "2005-10-20"),
		slice("2006-03-20", "2006-10-20"),
		slice("2007-03-21", "2007-10-20"),
		slice("2008-03-20", "2008-10-20"),
		slice("2009-03-20", "2009-10-20"),
		slice("2010-03-20", "2010-10-20"),
		slice("2011-03-20", "2011-08-31"),
		slice("2012-03-20", "2012-04-07"),
	],
	"summerSH": [
		slice("2002-09-13", "2003-03-26"),
		slice("2003-09-13", "2004-04-01"),
		slice("2004-09-14", "2005-04-02"),
		slice("2005-09-13", "2006-04-02"),
		slice("2006-09-13", "2007-04-02"),
		slice("2007-09-13", "2008-04-01"),
		slice("2008-09-14", "2009-04-02"),
		slice("2009-09-03", "2010-04-02"),
		slice("2010-09-13", "2011-04-02"),
		slice("2011-09-13", "2012-04-01"),
	]
}

_SPEs = [pd.date_range("2002-04-20", "2002-05-01"),
		pd.date_range("2002-05-21", "2002-06-02"),
		pd.date_range("2002-07-15", "2002-07-27"),
		pd.date_range("2002-08-23", "2002-09-03"),
		pd.date_range("2002-09-06", "2002-09-17"),
		pd.date_range("2002-11-08", "2002-11-20"),
		pd.date_range("2003-05-27", "2003-06-07"),
		pd.date_range("2003-10-25", "2003-11-15"),
		pd.date_range("2004-07-24", "2004-08-05"),
		pd.date_range("2004-11-06", "2004-11-18"),
		pd.date_range("2005-01-15", "2005-01-27"),
		pd.date_range("2005-05-13", "2005-05-25"),
		pd.date_range("2005-07-13", "2005-08-05"),
		pd.date_range("2005-08-21", "2005-09-02"),
		pd.date_range("2005-09-07", "2005-09-21"),
		pd.date_range("2006-12-05", "2006-12-23"),
		pd.date_range("2012-01-22", "2012-02-07"),
		pd.date_range("2012-03-06", "2012-03-27")]


def load_dailymeanAE(
		filename="data/indices/AE_Kyoto_1980-2016_daily2_shift12h.dat",
		name="AE", tfmt="jyear"):
	from pkg_resources import resource_filename
	AEfilename = resource_filename("sciapy", filename)
	return load_solar_gm_table(AEfilename, cols=[0, 1],
			names=["time", name], tfmt=tfmt)


def load_dailymeanLya(filename="data/indices/lisird_lya3_1980-2017.dat",
		name="Lya", tfmt="jyear"):
	from pkg_resources import resource_filename
	Lyafilename = resource_filename("sciapy", filename)
	return load_solar_gm_table(Lyafilename, cols=[0, 1],
			names=["time", name], tfmt=tfmt)


def load_solar_gm_table(filename, cols, names, sep="\t", tfmt="jyear"):

	"""Load proxy tables from ascii files

	This function wraps :func:`numpy.genfromtxt()` [#]_ with
	pre-defined settings to match the file format.
	It explicitly returns the times as UTC decimal years or julian epochs.

	.. [#] https://docs.scipy.org/doc/numpy/reference/generated/numpy.genfromtxt.html

	Parameters
	----------
	filename: str
		The file to read
	cols: sequence
		The columns in the file to use, passed to `numpy.genfromtxt`'s
		`usecols` keyword. Should be at least of length 2, indicating
		time values in the first column, e.g., `cols=[0, 1]`.
	names: sequence
		The column names, passed as `names` to `numpy.genfromtxt()`.
		Should be at least of length 2, naming the proxy values in the
		second column, e.g., `names=["time", "proxy"]`.
	sep: str, optional
		The column separator character, passed as `sep`.
		Default: `\t`
	tfmt: str, optional
		The astropy.time "Time Format" for the time units,
		for example, "jyear", "decimalyear", "jd", "mjd", etc.
		See:
		http://docs.astropy.org/en/stable/time/index.html#time-format
		Default: "jyear"

	Returns
	-------
	times: array_like
		The proxy times according to the `tfmt` keyword (UTC) as
		:class:`numpy.ndarray` from the first column of "cols".
	values: tuple of array_like
		The proxy values combined into a structured array [#]_
		according to "names[1:]" and "cols[1:]" passed above.

	See Also
	--------
	:class:`astropy.time.Time`
	.. [#] https://docs.scipy.org/doc/numpy/user/basics.rec.html
	"""
	if len(cols) < 2 and len(names) < 2:
		raise ValueError(
				"Both `cols` and `names` should be at least of length 2.")
	encoding = {}
	# set the encoding for numpy >= 1.14.0
	if int(_NPV_MAJOR) >= 1 and int(_NPV_MINOR) >= 14:
		encoding = dict(encoding=None)
	tab = np.genfromtxt(filename,
			delimiter=sep,
			dtype=None,
			names=names,
			usecols=cols,
			**encoding)
	times = Time(tab[names[0]], scale="utc")
	ts = getattr(times, tfmt)
	return ts, tab[names[1:]]


def _greedy_select(ds, size, varname="NO_DENS_std", scale=1.):

	logging.info("Greedy subsampling to size: %s", size)
	var = np.ma.masked_array((ds[varname] * scale)**2, mask=[False])
	var_p = np.ma.masked_array((ds[varname] * scale)**2, mask=[False])
	sigma2_i = scale**2
	sigma2_ip = scale**2
	idxs = np.arange(len(var))
	for _ in range(size):
		max_entr_idx = np.ma.argmax(np.log(1. + var * sigma2_i))
		min_entr_idx = np.ma.argmin(np.log(1. + var_p * sigma2_ip))
		sigma2_i += 1. / var[max_entr_idx]
		sigma2_ip += 1. / var_p[min_entr_idx]
		var.mask[max_entr_idx] = True
		var_p.mask[max_entr_idx] = True
	return ds.isel(time=idxs[var.mask])

def _greedy_idxs_post(x, xerr, size):

	logging.info("Greedy subsampling to size: %s", size)
	var = np.ma.masked_array(xerr**2, mask=[False])
	var_p = np.ma.masked_array(xerr**2, mask=[False])
	sigma2_i = 1.
	sigma2_ip = 1.
	idxs = np.arange(len(var))
	for _ in range(size):
		max_entr_idx = np.ma.argmax(np.log(1. + var * sigma2_i))
		min_entr_idx = np.ma.argmin(np.log(1. + var_p * sigma2_ip))
		sigma2_i += 1. / var[max_entr_idx]
		sigma2_ip += 1. / var_p[min_entr_idx]
		var.mask[max_entr_idx] = True
		var_p.mask[max_entr_idx] = True
	return idxs[var.mask]

def load_scia_dzm(filename, alt, lat, tfmt="jyear",

		scale=1, subsample_factor=1, subsample_method="greedy",
		akd_threshold=0.002, cnt_threshold=0,
		center=False, season=None, SPEs=False):
	"""Load SCIAMACHY daily zonal mean data

	Interface function for SCIAMACHY daily zonal mean data files version 6.x.
	Uses :mod:`xarray` [#]_ to load and select the data. Possible selections are by
	hemispheric summer (NH summer ~ SH winter and SH summer ~ NH winter) and
	exclusion of strong solar proton events (SPE).

	.. [#] https://xarray.pydata.org

	Parameters
	----------
	filename: str
		The input filename
	alt: float
		The altitude
	lat: float
		The longitude
	tfmt: string, optional
		The astropy.time "Time Format" for the time units,
		for example, "jyear", "decimalyear", "jd", "mjd", etc.
		See:
		http://docs.astropy.org/en/stable/time/index.html#time-format
		Default: "jyear"
	scale: float, optional
		Scale factor of the data (default: 1)
	subsample_factor: int, optional
		Factor to subsample the data by (see `subsample_method`)
		(default: 1 (no subsampling))
	subsample_method: "equal", "greedy", or "random", optional
		Method for subsampling the data (see `subsample_factor`).
		"equal" for equally spaced subsampling,
		"greedy" for selecting the data based on their uncertainty,
		and "random" for uniform random subsampling.
		(default: "greedy")
	center: bool, optional
		Center the data by subtracting the global mean.
		(default: False)
	season: "summerNH", "summerSH", or `None`, optional
		Select the named season or `None` for all data (default: None)
	SPEs: bool, optional
		Set to `True` to exclude strong SPE events (default: False)

	Returns
	-------
	(times, dens, errs): tuple of (N,) array_like
		The measurement times according to the `tfmt` keyword,
		the number densities, and their uncertainties.
	"""
	logging.info("Opening dataset: '%s'", filename)
	NO_ds = xr.open_mfdataset(filename, decode_times=False,
				chunks={"time": 400, "latitude": 9, "altitude": 11})
	logging.info("done.")
	# Decode time coordinate for selection
	try:
		# xarray <= 0.9.6
		NO_ds["time"] = xr.conventions.decode_cf_variable(NO_ds.time)
	except TypeError:
		# xarray => 0.10.0
		NO_ds["time"] = xr.conventions.decode_cf_variable("time", NO_ds.time)

	NO_mean = 0.
	if center:
		NO_mean = NO_ds.NO_DENS.mean().values
		logging.info("Centering with global mean: %s", NO_mean)
	NO_tds = NO_ds.sel(latitude=lat, altitude=alt)

	# Exclude SPEs first if requested
	if SPEs:
		logging.info("Removing SPEs.")
		for spe in _SPEs:
			NO_tds = NO_tds.drop(
					NO_tds.sel(time=slice(spe[0], spe[-1])).time.values,
					dim="time")

	# Filter by season
	if season in _seasons.keys():
		logging.info("Restricting to season: %s", season)
		NO_tds = xr.concat([NO_tds.sel(time=s) for s in _seasons[season]],
					dim="time")
		NO_tds.load()
	else:
		logging.info("No season selected or unknown season, "
					"using all available data.")

	try:
		NO_counts = NO_tds.NO_DENS_cnt
	except AttributeError:
		NO_counts = NO_tds.counts

	# Select only useful data
	NO_tds = NO_tds.where(
				np.isfinite(NO_tds.NO_DENS) &
				(NO_tds.NO_DENS_std != 0) &
				(NO_tds.NO_AKDIAG > akd_threshold) &
				(NO_counts > cnt_threshold) &
				(NO_tds.NO_MASK == 0),
				drop=True)

	no_dens = scale * NO_tds.NO_DENS
	if center:
		no_dens -= scale * NO_mean
	no_errs = scale * NO_tds.NO_DENS_std / np.sqrt(NO_counts)
	logging.debug("no_dens.shape (ntime,): %s", no_dens.shape)

	no_sza = NO_tds.mean_SZA

	# Convert to astropy.Time for Julian epoch or decimal year
	no_t = Time(pd.to_datetime(NO_tds.time.values, utc=True).to_pydatetime())
	no_ys = getattr(no_t, tfmt)

	if subsample_factor > 1:
		new_data_size = no_dens.shape[0] // subsample_factor
		if subsample_method == "random":
			# random subsampling
			_idxs = np.random.choice(no_dens.shape[0],
					new_data_size, replace=False)
		elif subsample_method == "equal":
			# equally spaced subsampling
			_idxs = slice(0, no_dens.shape[0], subsample_factor)
		else:
			# "greedy" subsampling (default fall-back)
			_idxs = _greedy_idxs_post(no_dens, no_errs, new_data_size)
		return (no_ys[_idxs],
				no_dens.values[_idxs],
				no_errs.values[_idxs],
				no_sza.values[_idxs])

	return no_ys, no_dens.values, no_errs.values, no_sza.values


1			# -- coding: utf-8 --
2			# vim:fileencoding=utf-8
3			#
4			# Copyright (c) 2017-2018 Stefan Bender
5			#
6			# This module is part of sciapy.
7			# sciapy is free software: you can redistribute it or modify
8			# it under the terms of the GNU General Public License as published
9			# by the Free Software Foundation, version 2.
10			# See accompanying LICENSE file or http://www.gnu.org/licenses/gpl-2.0.html.
11	1		"""SCIAMACHY data interface
12
13			Data loading and selection routines for SCIAMACHY data regression fits.
14			Includes reading the solar and geomagnetic index files used as proxies.
15			"""
16
17	1		import logging
18
19	1		import numpy as np
20	1		import pandas as pd
21	1		import xarray as xr
22
23	1		from astropy.time import Time
24
25	1		__all__ = ["load_solar_gm_table", "load_scia_dzm"]
26
27	1		_NPV_MAJOR, _NPV_MINOR, _NPV_PATCH = np.__version__.split('.')
28
29	1		_seasons = {
30			"summerNH": [
31			slice("2002-03-20", "2002-09-25"),
32			slice("2003-03-20", "2003-10-13"),
33			slice("2004-03-20", "2004-10-20"),
34			slice("2005-03-20", "2005-10-20"),
35			slice("2006-03-20", "2006-10-20"),
36			slice("2007-03-21", "2007-10-20"),
37			slice("2008-03-20", "2008-10-20"),
38			slice("2009-03-20", "2009-10-20"),
39			slice("2010-03-20", "2010-10-20"),
40			slice("2011-03-20", "2011-08-31"),
41			slice("2012-03-20", "2012-04-07"),
42			],
43			"summerSH": [
44			slice("2002-09-13", "2003-03-26"),
45			slice("2003-09-13", "2004-04-01"),
46			slice("2004-09-14", "2005-04-02"),
47			slice("2005-09-13", "2006-04-02"),
48			slice("2006-09-13", "2007-04-02"),
49			slice("2007-09-13", "2008-04-01"),
50			slice("2008-09-14", "2009-04-02"),
51			slice("2009-09-03", "2010-04-02"),
52			slice("2010-09-13", "2011-04-02"),
53			slice("2011-09-13", "2012-04-01"),
54			]
55			}
56
57	1		_SPEs = [pd.date_range("2002-04-20", "2002-05-01"),
58			pd.date_range("2002-05-21", "2002-06-02"),
59			pd.date_range("2002-07-15", "2002-07-27"),
60			pd.date_range("2002-08-23", "2002-09-03"),
61			pd.date_range("2002-09-06", "2002-09-17"),
62			pd.date_range("2002-11-08", "2002-11-20"),
63			pd.date_range("2003-05-27", "2003-06-07"),
64			pd.date_range("2003-10-25", "2003-11-15"),
65			pd.date_range("2004-07-24", "2004-08-05"),
66			pd.date_range("2004-11-06", "2004-11-18"),
67			pd.date_range("2005-01-15", "2005-01-27"),
68			pd.date_range("2005-05-13", "2005-05-25"),
69			pd.date_range("2005-07-13", "2005-08-05"),
70			pd.date_range("2005-08-21", "2005-09-02"),
71			pd.date_range("2005-09-07", "2005-09-21"),
72			pd.date_range("2006-12-05", "2006-12-23"),
73			pd.date_range("2012-01-22", "2012-02-07"),
74			pd.date_range("2012-03-06", "2012-03-27")]
75
76
77	1		def load_dailymeanAE(
78			filename="data/indices/AE_Kyoto_1980-2016_daily2_shift12h.dat",
79			name="AE", tfmt="jyear"):
80	1		from pkg_resources import resource_filename
81	1		AEfilename = resource_filename("sciapy", filename)
82	1		return load_solar_gm_table(AEfilename, cols=[0, 1],
83			names=["time", name], tfmt=tfmt)
84
85
86	1		def load_dailymeanLya(filename="data/indices/lisird_lya3_1980-2017.dat",
87			name="Lya", tfmt="jyear"):
88	1		from pkg_resources import resource_filename
89	1		Lyafilename = resource_filename("sciapy", filename)
90	1		return load_solar_gm_table(Lyafilename, cols=[0, 1],
91			names=["time", name], tfmt=tfmt)
92
93
94	1	View Code Duplication	def load_solar_gm_table(filename, cols, names, sep="\t", tfmt="jyear"):
			0 ignored issues – show Duplication introduced 2018-07-02 09:55 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
95			"""Load proxy tables from ascii files
96
97			This function wraps :func:`numpy.genfromtxt()` [#]_ with
98			pre-defined settings to match the file format.
99			It explicitly returns the times as UTC decimal years or julian epochs.
100
101			.. [#] https://docs.scipy.org/doc/numpy/reference/generated/numpy.genfromtxt.html
102
103			Parameters
104			----------
105			filename: str
106			The file to read
107			cols: sequence
108			The columns in the file to use, passed to `numpy.genfromtxt`'s
109			`usecols` keyword. Should be at least of length 2, indicating
110			time values in the first column, e.g., `cols=[0, 1]`.
111			names: sequence
112			The column names, passed as `names` to `numpy.genfromtxt()`.
113			Should be at least of length 2, naming the proxy values in the
114			second column, e.g., `names=["time", "proxy"]`.
115			sep: str, optional
116			The column separator character, passed as `sep`.
117			Default: `\t`
118			tfmt: str, optional
119			The astropy.time "Time Format" for the time units,
120			for example, "jyear", "decimalyear", "jd", "mjd", etc.
121			See:
122			http://docs.astropy.org/en/stable/time/index.html#time-format
123			Default: "jyear"
124
125			Returns
126			-------
127			times: array_like
128			The proxy times according to the `tfmt` keyword (UTC) as
129			:class:`numpy.ndarray` from the first column of "cols".
130			values: tuple of array_like
131			The proxy values combined into a structured array [#]_
132			according to "names[1:]" and "cols[1:]" passed above.
133
134			See Also
135			--------
136			:class:`astropy.time.Time`
137			.. [#] https://docs.scipy.org/doc/numpy/user/basics.rec.html
138			"""
139	1		if len(cols) < 2 and len(names) < 2:
140			raise ValueError(
141			"Both `cols` and `names` should be at least of length 2.")
142	1		encoding = {}
143			# set the encoding for numpy >= 1.14.0
144	1		if int(_NPV_MAJOR) >= 1 and int(_NPV_MINOR) >= 14:
145	1		encoding = dict(encoding=None)
146	1		tab = np.genfromtxt(filename,
147			delimiter=sep,
148			dtype=None,
149			names=names,
150			usecols=cols,
151			**encoding)
152	1		times = Time(tab[names[0]], scale="utc")
153	1		ts = getattr(times, tfmt)
154	1		return ts, tab[names[1:]]
155
156
157	1	View Code Duplication	def _greedy_select(ds, size, varname="NO_DENS_std", scale=1.):
			0 ignored issues – show Duplication introduced 2018-07-02 09:55 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
158			logging.info("Greedy subsampling to size: %s", size)
159			var = np.ma.masked_array((ds[varname] * scale)**2, mask=[False])
160			var_p = np.ma.masked_array((ds[varname] * scale)**2, mask=[False])
161			sigma2_i = scale**2
162			sigma2_ip = scale**2
163			idxs = np.arange(len(var))
164			for _ in range(size):
165			max_entr_idx = np.ma.argmax(np.log(1. + var * sigma2_i))
166			min_entr_idx = np.ma.argmin(np.log(1. + var_p * sigma2_ip))
167			sigma2_i += 1. / var[max_entr_idx]
168			sigma2_ip += 1. / var_p[min_entr_idx]
169			var.mask[max_entr_idx] = True
170			var_p.mask[max_entr_idx] = True
171			return ds.isel(time=idxs[var.mask])
172
173	1	View Code Duplication	def _greedy_idxs_post(x, xerr, size):
			0 ignored issues – show Duplication introduced 2018-07-02 09:55 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
174			logging.info("Greedy subsampling to size: %s", size)
175			var = np.ma.masked_array(xerr**2, mask=[False])
176			var_p = np.ma.masked_array(xerr**2, mask=[False])
177			sigma2_i = 1.
178			sigma2_ip = 1.
179			idxs = np.arange(len(var))
180			for _ in range(size):
181			max_entr_idx = np.ma.argmax(np.log(1. + var * sigma2_i))
182			min_entr_idx = np.ma.argmin(np.log(1. + var_p * sigma2_ip))
183			sigma2_i += 1. / var[max_entr_idx]
184			sigma2_ip += 1. / var_p[min_entr_idx]
185			var.mask[max_entr_idx] = True
186			var_p.mask[max_entr_idx] = True
187			return idxs[var.mask]
188
189	1	View Code Duplication	def load_scia_dzm(filename, alt, lat, tfmt="jyear",
			0 ignored issues – show Duplication introduced 2018-07-02 09:55 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
190			scale=1, subsample_factor=1, subsample_method="greedy",
191			akd_threshold=0.002, cnt_threshold=0,
192			center=False, season=None, SPEs=False):
193			"""Load SCIAMACHY daily zonal mean data
194
195			Interface function for SCIAMACHY daily zonal mean data files version 6.x.
196			Uses :mod:`xarray` [#]_ to load and select the data. Possible selections are by
197			hemispheric summer (NH summer ~ SH winter and SH summer ~ NH winter) and
198			exclusion of strong solar proton events (SPE).
199
200			.. [#] https://xarray.pydata.org
201
202			Parameters
203			----------
204			filename: str
205			The input filename
206			alt: float
207			The altitude
208			lat: float
209			The longitude
210			tfmt: string, optional
211			The astropy.time "Time Format" for the time units,
212			for example, "jyear", "decimalyear", "jd", "mjd", etc.
213			See:
214			http://docs.astropy.org/en/stable/time/index.html#time-format
215			Default: "jyear"
216			scale: float, optional
217			Scale factor of the data (default: 1)
218			subsample_factor: int, optional
219			Factor to subsample the data by (see `subsample_method`)
220			(default: 1 (no subsampling))
221			subsample_method: "equal", "greedy", or "random", optional
222			Method for subsampling the data (see `subsample_factor`).
223			"equal" for equally spaced subsampling,
224			"greedy" for selecting the data based on their uncertainty,
225			and "random" for uniform random subsampling.
226			(default: "greedy")
227			center: bool, optional
228			Center the data by subtracting the global mean.
229			(default: False)
230			season: "summerNH", "summerSH", or `None`, optional
231			Select the named season or `None` for all data (default: None)
232			SPEs: bool, optional
233			Set to `True` to exclude strong SPE events (default: False)
234
235			Returns
236			-------
237			(times, dens, errs): tuple of (N,) array_like
238			The measurement times according to the `tfmt` keyword,
239			the number densities, and their uncertainties.
240			"""
241	1		logging.info("Opening dataset: '%s'", filename)
242	1		NO_ds = xr.open_mfdataset(filename, decode_times=False,
243			chunks={"time": 400, "latitude": 9, "altitude": 11})
244	1		logging.info("done.")
245			# Decode time coordinate for selection
246	1		try:
247			# xarray <= 0.9.6
248	1		NO_ds["time"] = xr.conventions.decode_cf_variable(NO_ds.time)
249	1		except TypeError:
250			# xarray => 0.10.0
251	1		NO_ds["time"] = xr.conventions.decode_cf_variable("time", NO_ds.time)
252
253	1		NO_mean = 0.
254	1		if center:
255	1		NO_mean = NO_ds.NO_DENS.mean().values
256	1		logging.info("Centering with global mean: %s", NO_mean)
257	1		NO_tds = NO_ds.sel(latitude=lat, altitude=alt)
258
259			# Exclude SPEs first if requested
260	1		if SPEs:
261	1		logging.info("Removing SPEs.")
262	1		for spe in _SPEs:
263	1		NO_tds = NO_tds.drop(
264			NO_tds.sel(time=slice(spe[0], spe[-1])).time.values,
265			dim="time")
266
267			# Filter by season
268	1		if season in _seasons.keys():
269	1		logging.info("Restricting to season: %s", season)
270	1		NO_tds = xr.concat([NO_tds.sel(time=s) for s in _seasons[season]],
271			dim="time")
272	1		NO_tds.load()
273			else:
274	1		logging.info("No season selected or unknown season, "
275			"using all available data.")
276
277	1		try:
278	1		NO_counts = NO_tds.NO_DENS_cnt
279			except AttributeError:
280			NO_counts = NO_tds.counts
281
282			# Select only useful data
283	1		NO_tds = NO_tds.where(
284			np.isfinite(NO_tds.NO_DENS) &
285			(NO_tds.NO_DENS_std != 0) &
286			(NO_tds.NO_AKDIAG > akd_threshold) &
287			(NO_counts > cnt_threshold) &
288			(NO_tds.NO_MASK == 0),
289			drop=True)
290
291	1		no_dens = scale * NO_tds.NO_DENS
292	1		if center:
293	1		no_dens -= scale * NO_mean
294	1		no_errs = scale * NO_tds.NO_DENS_std / np.sqrt(NO_counts)
295	1		logging.debug("no_dens.shape (ntime,): %s", no_dens.shape)
296
297	1		no_sza = NO_tds.mean_SZA
298
299			# Convert to astropy.Time for Julian epoch or decimal year
300	1		no_t = Time(pd.to_datetime(NO_tds.time.values, utc=True).to_pydatetime())
301	1		no_ys = getattr(no_t, tfmt)
302
303	1		if subsample_factor > 1:
304			new_data_size = no_dens.shape[0] // subsample_factor
305			if subsample_method == "random":
306			# random subsampling
307			_idxs = np.random.choice(no_dens.shape[0],
308			new_data_size, replace=False)
309			elif subsample_method == "equal":
310			# equally spaced subsampling
311			_idxs = slice(0, no_dens.shape[0], subsample_factor)
312			else:
313			# "greedy" subsampling (default fall-back)
314			_idxs = _greedy_idxs_post(no_dens, no_errs, new_data_size)
315			return (no_ys[_idxs],
316			no_dens.values[_idxs],
317			no_errs.values[_idxs],
318			no_sza.values[_idxs])
319
320			return no_ys, no_dens.values, no_errs.values, no_sza.values
321

st-bender / sciapy

Push — master ( 353449...a10708 )

sciapy.regress.load_data.load_solar_gm_table() A

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method

Long Method

Duplication Side-by-Side

Filter issues like