sciapy.regress.load_data.load_solar_gm_table() - Code Metrics - Inspection of "regress: Expand tilde in proxy path cli arguments" - st-bender/sciapy - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( e18d2d...77f7c6 )

by Stefan

created 2019-03-07 16:13 UTC

sciapy.regress.load_data.load_solar_gm_table() A

↳ Parent: sciapy.regress.load_data

Complexity

Conditions

Size

Total Lines	56
Code Lines	12

Duplication

Lines	56
Ratio	100 %

Code Coverage

Tests	6
CRAP Score	3.0261

Importance

Changes

Metric	Value
cc	3
eloc	12
nop	5
dl	56
loc	56
ccs	6
cts	7
cp	0.8571
crap	3.0261
rs	9.8
c	0
b	0
f	0

How to fix Long Method

# -*- coding: utf-8 -*-
# vim:fileencoding=utf-8
#
# Copyright (c) 2017-2018 Stefan Bender
#
# This module is part of sciapy.
# sciapy is free software: you can redistribute it or modify
# it under the terms of the GNU General Public License as published
# by the Free Software Foundation, version 2.
# See accompanying LICENSE file or http://www.gnu.org/licenses/gpl-2.0.html.
"""SCIAMACHY data interface

Data loading and selection routines for SCIAMACHY data regression fits.
Includes reading the solar and geomagnetic index files used as proxies.
"""

import logging

import numpy as np
import pandas as pd
import xarray as xr

from astropy.time import Time

__all__ = ["load_solar_gm_table", "load_scia_dzm"]

_seasons = {
	"summerNH": [
		slice("2002-03-20", "2002-09-25"),
		slice("2003-03-20", "2003-10-13"),
		slice("2004-03-20", "2004-10-20"),
		slice("2005-03-20", "2005-10-20"),
		slice("2006-03-20", "2006-10-20"),
		slice("2007-03-21", "2007-10-20"),
		slice("2008-03-20", "2008-10-20"),
		slice("2009-03-20", "2009-10-20"),
		slice("2010-03-20", "2010-10-20"),
		slice("2011-03-20", "2011-08-31"),
		slice("2012-03-20", "2012-04-07"),
	],
	"summerSH": [
		slice("2002-09-13", "2003-03-26"),
		slice("2003-09-13", "2004-04-01"),
		slice("2004-09-14", "2005-04-02"),
		slice("2005-09-13", "2006-04-02"),
		slice("2006-09-13", "2007-04-02"),
		slice("2007-09-13", "2008-04-01"),
		slice("2008-09-14", "2009-04-02"),
		slice("2009-09-03", "2010-04-02"),
		slice("2010-09-13", "2011-04-02"),
		slice("2011-09-13", "2012-04-01"),
	]
}

_SPEs = [pd.date_range("2002-04-20", "2002-05-01"),
		pd.date_range("2002-05-21", "2002-06-02"),
		pd.date_range("2002-07-15", "2002-07-27"),
		pd.date_range("2002-08-23", "2002-09-03"),
		pd.date_range("2002-09-06", "2002-09-17"),
		pd.date_range("2002-11-08", "2002-11-20"),
		pd.date_range("2003-05-27", "2003-06-07"),
		pd.date_range("2003-10-25", "2003-11-15"),
		pd.date_range("2004-07-24", "2004-08-05"),
		pd.date_range("2004-11-06", "2004-11-18"),
		pd.date_range("2005-01-15", "2005-01-27"),
		pd.date_range("2005-05-13", "2005-05-25"),
		pd.date_range("2005-07-13", "2005-08-05"),
		pd.date_range("2005-08-21", "2005-09-02"),
		pd.date_range("2005-09-07", "2005-09-21"),
		pd.date_range("2006-12-05", "2006-12-23"),
		pd.date_range("2012-01-22", "2012-02-07"),
		pd.date_range("2012-03-06", "2012-03-27")]


def load_dailymeanAE(
		filename="data/indices/AE_Kyoto_1980-2016_daily2_shift12h.dat",
		name="AE", tfmt="jyear"):
	from pkg_resources import resource_filename
	AEfilename = resource_filename("sciapy", filename)
	return load_solar_gm_table(AEfilename, cols=[0, 1],
			names=["time", name], tfmt=tfmt)


def load_dailymeanLya(filename="data/indices/lisird_lya3_1980-2017.dat",
		name="Lya", tfmt="jyear"):
	from pkg_resources import resource_filename
	Lyafilename = resource_filename("sciapy", filename)
	return load_solar_gm_table(Lyafilename, cols=[0, 1],
			names=["time", name], tfmt=tfmt)


def load_solar_gm_table(filename, cols, names, sep="\t", tfmt="jyear"):

	"""Load proxy tables from ascii files

	This function wraps :func:`numpy.genfromtxt()` [#]_ with
	pre-defined settings to match the file format.
	It explicitly returns the times as UTC decimal years or julian epochs.

	.. [#] https://docs.scipy.org/doc/numpy/reference/generated/numpy.genfromtxt.html

	Parameters
	----------
	filename: str
		The file to read
	cols: sequence
		The columns in the file to use, passed to `numpy.genfromtxt`'s
		`usecols` keyword. Should be at least of length 2, indicating
		time values in the first column, e.g., `cols=[0, 1]`.
	names: sequence
		The column names, passed as `names` to `numpy.genfromtxt()`.
		Should be at least of length 2, naming the proxy values in the
		second column, e.g., `names=["time", "proxy"]`.
	sep: str, optional
		The column separator character, passed as `sep`.
		Default: `\t`
	tfmt: str, optional
		The astropy.time "Time Format" for the time units,
		for example, "jyear", "decimalyear", "jd", "mjd", etc.
		See:
		http://docs.astropy.org/en/stable/time/index.html#time-format
		Default: "jyear"

	Returns
	-------
	times: array_like
		The proxy times according to the `tfmt` keyword (UTC) as
		:class:`numpy.ndarray` from the first column of "cols".
	values: tuple of array_like
		The proxy values combined into a structured array [#]_
		according to "names[1:]" and "cols[1:]" passed above.

	See Also
	--------
	:class:`astropy.time.Time`
	.. [#] https://docs.scipy.org/doc/numpy/user/basics.rec.html
	"""
	if len(cols) < 2 and len(names) < 2:
		raise ValueError(
				"Both `cols` and `names` should be at least of length 2.")
	tab = np.genfromtxt(filename,
			delimiter=sep,
			dtype=None,
			names=names,
			usecols=cols)
	times = Time(tab[names[0]], scale="utc")
	ts = getattr(times, tfmt)
	return ts, tab[names[1:]]


def _greedy_select(ds, size, varname="NO_DENS_std", scale=1.):

	logging.info("Greedy subsampling to size: %s", size)
	var = np.ma.masked_array((ds[varname] * scale)**2, mask=[False])
	var_p = np.ma.masked_array((ds[varname] * scale)**2, mask=[False])
	sigma2_i = scale**2
	sigma2_ip = scale**2
	idxs = np.arange(len(var))
	for _ in range(size):
		max_entr_idx = np.ma.argmax(np.log(1. + var * sigma2_i))
		min_entr_idx = np.ma.argmin(np.log(1. + var_p * sigma2_ip))
		sigma2_i += 1. / var[max_entr_idx]
		sigma2_ip += 1. / var_p[min_entr_idx]
		var.mask[max_entr_idx] = True
		var_p.mask[max_entr_idx] = True
	return ds.isel(time=idxs[var.mask])

def _greedy_idxs_post(x, xerr, size):

	logging.info("Greedy subsampling to size: %s", size)
	var = np.ma.masked_array(xerr**2, mask=[False])
	var_p = np.ma.masked_array(xerr**2, mask=[False])
	sigma2_i = 1.
	sigma2_ip = 1.
	idxs = np.arange(len(var))
	for _ in range(size):
		max_entr_idx = np.ma.argmax(np.log(1. + var * sigma2_i))
		min_entr_idx = np.ma.argmin(np.log(1. + var_p * sigma2_ip))
		sigma2_i += 1. / var[max_entr_idx]
		sigma2_ip += 1. / var_p[min_entr_idx]
		var.mask[max_entr_idx] = True
		var_p.mask[max_entr_idx] = True
	return idxs[var.mask]

def load_scia_dzm(filename, alt, lat, tfmt="jyear",

		scale=1, subsample_factor=1, subsample_method="greedy",
		akd_threshold=0.002, cnt_threshold=0,
		center=False, season=None, SPEs=False):
	"""Load SCIAMACHY daily zonal mean data

	Interface function for SCIAMACHY daily zonal mean data files version 6.x.
	Uses :mod:`xarray` [#]_ to load and select the data. Possible selections are by
	hemispheric summer (NH summer ~ SH winter and SH summer ~ NH winter) and
	exclusion of strong solar proton events (SPE).

	.. [#] https://xarray.pydata.org

	Parameters
	----------
	filename: str
		The input filename
	alt: float
		The altitude
	lat: float
		The longitude
	tfmt: string, optional
		The astropy.time "Time Format" for the time units,
		for example, "jyear", "decimalyear", "jd", "mjd", etc.
		See:
		http://docs.astropy.org/en/stable/time/index.html#time-format
		Default: "jyear"
	scale: float, optional
		Scale factor of the data (default: 1)
	subsample_factor: int, optional
		Factor to subsample the data by (see `subsample_method`)
		(default: 1 (no subsampling))
	subsample_method: "equal", "greedy", or "random", optional
		Method for subsampling the data (see `subsample_factor`).
		"equal" for equally spaced subsampling,
		"greedy" for selecting the data based on their uncertainty,
		and "random" for uniform random subsampling.
		(default: "greedy")
	center: bool, optional
		Center the data by subtracting the global mean.
		(default: False)
	season: "summerNH", "summerSH", or `None`, optional
		Select the named season or `None` for all data (default: None)
	SPEs: bool, optional
		Set to `True` to exclude strong SPE events (default: False)

	Returns
	-------
	(times, dens, errs): tuple of (N,) array_like
		The measurement times according to the `tfmt` keyword,
		the number densities, and their uncertainties.
	"""
	logging.info("Opening dataset: '%s'", filename)
	NO_ds = xr.open_mfdataset(filename, decode_times=False,
				chunks={"time": 400, "latitude": 9, "altitude": 11})
	logging.info("done.")
	# Decode time coordinate for selection
	try:
		# xarray <= 0.9.6
		NO_ds["time"] = xr.conventions.decode_cf_variable(NO_ds.time)
	except TypeError:
		# xarray => 0.10.0
		NO_ds["time"] = xr.conventions.decode_cf_variable("time", NO_ds.time)

	NO_mean = 0.
	if center:
		NO_mean = NO_ds.NO_DENS.mean().values
		logging.info("Centering with global mean: %s", NO_mean)
	NO_tds = NO_ds.sel(latitude=lat, altitude=alt)

	# Exclude SPEs first if requested
	if SPEs:
		logging.info("Removing SPEs.")
		for spe in _SPEs:
			NO_tds = NO_tds.drop(
					NO_tds.sel(time=slice(spe[0], spe[-1])).time.values,
					dim="time")

	# Filter by season
	if season in _seasons.keys():
		logging.info("Restricting to season: %s", season)
		NO_tds = xr.concat([NO_tds.sel(time=s) for s in _seasons[season]],
					dim="time")
		NO_tds.load()
	else:
		logging.info("No season selected or unknown season, "
					"using all available data.")

	try:
		NO_counts = NO_tds.NO_DENS_cnt
	except AttributeError:
		NO_counts = NO_tds.counts

	# Select only useful data
	NO_tds = NO_tds.where(
				np.isfinite(NO_tds.NO_DENS) &
				(NO_tds.NO_DENS_std != 0) &
				(NO_tds.NO_AKDIAG > akd_threshold) &
				(NO_counts > cnt_threshold) &
				(NO_tds.NO_MASK == 0),
				drop=True)

	no_dens = scale * NO_tds.NO_DENS
	if center:
		no_dens -= scale * NO_mean
	no_errs = scale * NO_tds.NO_DENS_std / np.sqrt(NO_counts)
	logging.debug("no_dens.shape (ntime,): %s", no_dens.shape)

	no_sza = NO_tds.mean_SZA

	# Convert to astropy.Time for Julian epoch or decimal year
	no_t = Time(pd.to_datetime(NO_tds.time.values, utc=True).to_pydatetime())
	no_ys = getattr(no_t, tfmt)

	if subsample_factor > 1:
		new_data_size = no_dens.shape[0] // subsample_factor
		if subsample_method == "random":
			# random subsampling
			_idxs = np.random.choice(no_dens.shape[0],
					new_data_size, replace=False)
		elif subsample_method == "equal":
			# equally spaced subsampling
			_idxs = slice(0, no_dens.shape[0], subsample_factor)
		else:
			# "greedy" subsampling (default fall-back)
			_idxs = _greedy_idxs_post(no_dens, no_errs, new_data_size)
		return (no_ys[_idxs],
				no_dens.values[_idxs],
				no_errs.values[_idxs],
				no_sza.values[_idxs])

	return no_ys, no_dens.values, no_errs.values, no_sza.values


1			# -- coding: utf-8 --
2			# vim:fileencoding=utf-8
3			#
4			# Copyright (c) 2017-2018 Stefan Bender
5			#
6			# This module is part of sciapy.
7			# sciapy is free software: you can redistribute it or modify
8			# it under the terms of the GNU General Public License as published
9			# by the Free Software Foundation, version 2.
10			# See accompanying LICENSE file or http://www.gnu.org/licenses/gpl-2.0.html.
11	1		"""SCIAMACHY data interface
12
13			Data loading and selection routines for SCIAMACHY data regression fits.
14			Includes reading the solar and geomagnetic index files used as proxies.
15			"""
16
17	1		import logging
18
19	1		import numpy as np
20	1		import pandas as pd
21	1		import xarray as xr
22
23	1		from astropy.time import Time
24
25	1		__all__ = ["load_solar_gm_table", "load_scia_dzm"]
26
27	1		_seasons = {
28			"summerNH": [
29			slice("2002-03-20", "2002-09-25"),
30			slice("2003-03-20", "2003-10-13"),
31			slice("2004-03-20", "2004-10-20"),
32			slice("2005-03-20", "2005-10-20"),
33			slice("2006-03-20", "2006-10-20"),
34			slice("2007-03-21", "2007-10-20"),
35			slice("2008-03-20", "2008-10-20"),
36			slice("2009-03-20", "2009-10-20"),
37			slice("2010-03-20", "2010-10-20"),
38			slice("2011-03-20", "2011-08-31"),
39			slice("2012-03-20", "2012-04-07"),
40			],
41			"summerSH": [
42			slice("2002-09-13", "2003-03-26"),
43			slice("2003-09-13", "2004-04-01"),
44			slice("2004-09-14", "2005-04-02"),
45			slice("2005-09-13", "2006-04-02"),
46			slice("2006-09-13", "2007-04-02"),
47			slice("2007-09-13", "2008-04-01"),
48			slice("2008-09-14", "2009-04-02"),
49			slice("2009-09-03", "2010-04-02"),
50			slice("2010-09-13", "2011-04-02"),
51			slice("2011-09-13", "2012-04-01"),
52			]
53			}
54
55	1		_SPEs = [pd.date_range("2002-04-20", "2002-05-01"),
56			pd.date_range("2002-05-21", "2002-06-02"),
57			pd.date_range("2002-07-15", "2002-07-27"),
58			pd.date_range("2002-08-23", "2002-09-03"),
59			pd.date_range("2002-09-06", "2002-09-17"),
60			pd.date_range("2002-11-08", "2002-11-20"),
61			pd.date_range("2003-05-27", "2003-06-07"),
62			pd.date_range("2003-10-25", "2003-11-15"),
63			pd.date_range("2004-07-24", "2004-08-05"),
64			pd.date_range("2004-11-06", "2004-11-18"),
65			pd.date_range("2005-01-15", "2005-01-27"),
66			pd.date_range("2005-05-13", "2005-05-25"),
67			pd.date_range("2005-07-13", "2005-08-05"),
68			pd.date_range("2005-08-21", "2005-09-02"),
69			pd.date_range("2005-09-07", "2005-09-21"),
70			pd.date_range("2006-12-05", "2006-12-23"),
71			pd.date_range("2012-01-22", "2012-02-07"),
72			pd.date_range("2012-03-06", "2012-03-27")]
73
74
75	1		def load_dailymeanAE(
76			filename="data/indices/AE_Kyoto_1980-2016_daily2_shift12h.dat",
77			name="AE", tfmt="jyear"):
78	1		from pkg_resources import resource_filename
79	1		AEfilename = resource_filename("sciapy", filename)
80	1		return load_solar_gm_table(AEfilename, cols=[0, 1],
81			names=["time", name], tfmt=tfmt)
82
83
84	1		def load_dailymeanLya(filename="data/indices/lisird_lya3_1980-2017.dat",
85			name="Lya", tfmt="jyear"):
86	1		from pkg_resources import resource_filename
87	1		Lyafilename = resource_filename("sciapy", filename)
88	1		return load_solar_gm_table(Lyafilename, cols=[0, 1],
89			names=["time", name], tfmt=tfmt)
90
91
92	1	View Code Duplication	def load_solar_gm_table(filename, cols, names, sep="\t", tfmt="jyear"):
			0 ignored issues – show Duplication introduced 2018-07-02 09:55 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
93			"""Load proxy tables from ascii files
94
95			This function wraps :func:`numpy.genfromtxt()` [#]_ with
96			pre-defined settings to match the file format.
97			It explicitly returns the times as UTC decimal years or julian epochs.
98
99			.. [#] https://docs.scipy.org/doc/numpy/reference/generated/numpy.genfromtxt.html
100
101			Parameters
102			----------
103			filename: str
104			The file to read
105			cols: sequence
106			The columns in the file to use, passed to `numpy.genfromtxt`'s
107			`usecols` keyword. Should be at least of length 2, indicating
108			time values in the first column, e.g., `cols=[0, 1]`.
109			names: sequence
110			The column names, passed as `names` to `numpy.genfromtxt()`.
111			Should be at least of length 2, naming the proxy values in the
112			second column, e.g., `names=["time", "proxy"]`.
113			sep: str, optional
114			The column separator character, passed as `sep`.
115			Default: `\t`
116			tfmt: str, optional
117			The astropy.time "Time Format" for the time units,
118			for example, "jyear", "decimalyear", "jd", "mjd", etc.
119			See:
120			http://docs.astropy.org/en/stable/time/index.html#time-format
121			Default: "jyear"
122
123			Returns
124			-------
125			times: array_like
126			The proxy times according to the `tfmt` keyword (UTC) as
127			:class:`numpy.ndarray` from the first column of "cols".
128			values: tuple of array_like
129			The proxy values combined into a structured array [#]_
130			according to "names[1:]" and "cols[1:]" passed above.
131
132			See Also
133			--------
134			:class:`astropy.time.Time`
135			.. [#] https://docs.scipy.org/doc/numpy/user/basics.rec.html
136			"""
137	1		if len(cols) < 2 and len(names) < 2:
138			raise ValueError(
139			"Both `cols` and `names` should be at least of length 2.")
140	1		tab = np.genfromtxt(filename,
141			delimiter=sep,
142			dtype=None,
143			names=names,
144			usecols=cols)
145	1		times = Time(tab[names[0]], scale="utc")
146	1		ts = getattr(times, tfmt)
147	1		return ts, tab[names[1:]]
148
149
150	1	View Code Duplication	def _greedy_select(ds, size, varname="NO_DENS_std", scale=1.):
			0 ignored issues – show Duplication introduced 2018-07-02 09:55 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
151			logging.info("Greedy subsampling to size: %s", size)
152			var = np.ma.masked_array((ds[varname] * scale)**2, mask=[False])
153			var_p = np.ma.masked_array((ds[varname] * scale)**2, mask=[False])
154			sigma2_i = scale**2
155			sigma2_ip = scale**2
156			idxs = np.arange(len(var))
157			for _ in range(size):
158			max_entr_idx = np.ma.argmax(np.log(1. + var * sigma2_i))
159			min_entr_idx = np.ma.argmin(np.log(1. + var_p * sigma2_ip))
160			sigma2_i += 1. / var[max_entr_idx]
161			sigma2_ip += 1. / var_p[min_entr_idx]
162			var.mask[max_entr_idx] = True
163			var_p.mask[max_entr_idx] = True
164			return ds.isel(time=idxs[var.mask])
165
166	1	View Code Duplication	def _greedy_idxs_post(x, xerr, size):
			0 ignored issues – show Duplication introduced 2018-07-02 09:55 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
167			logging.info("Greedy subsampling to size: %s", size)
168			var = np.ma.masked_array(xerr**2, mask=[False])
169			var_p = np.ma.masked_array(xerr**2, mask=[False])
170			sigma2_i = 1.
171			sigma2_ip = 1.
172			idxs = np.arange(len(var))
173			for _ in range(size):
174			max_entr_idx = np.ma.argmax(np.log(1. + var * sigma2_i))
175			min_entr_idx = np.ma.argmin(np.log(1. + var_p * sigma2_ip))
176			sigma2_i += 1. / var[max_entr_idx]
177			sigma2_ip += 1. / var_p[min_entr_idx]
178			var.mask[max_entr_idx] = True
179			var_p.mask[max_entr_idx] = True
180			return idxs[var.mask]
181
182	1	View Code Duplication	def load_scia_dzm(filename, alt, lat, tfmt="jyear",
			0 ignored issues – show Duplication introduced 2018-07-02 09:55 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
183			scale=1, subsample_factor=1, subsample_method="greedy",
184			akd_threshold=0.002, cnt_threshold=0,
185			center=False, season=None, SPEs=False):
186			"""Load SCIAMACHY daily zonal mean data
187
188			Interface function for SCIAMACHY daily zonal mean data files version 6.x.
189			Uses :mod:`xarray` [#]_ to load and select the data. Possible selections are by
190			hemispheric summer (NH summer ~ SH winter and SH summer ~ NH winter) and
191			exclusion of strong solar proton events (SPE).
192
193			.. [#] https://xarray.pydata.org
194
195			Parameters
196			----------
197			filename: str
198			The input filename
199			alt: float
200			The altitude
201			lat: float
202			The longitude
203			tfmt: string, optional
204			The astropy.time "Time Format" for the time units,
205			for example, "jyear", "decimalyear", "jd", "mjd", etc.
206			See:
207			http://docs.astropy.org/en/stable/time/index.html#time-format
208			Default: "jyear"
209			scale: float, optional
210			Scale factor of the data (default: 1)
211			subsample_factor: int, optional
212			Factor to subsample the data by (see `subsample_method`)
213			(default: 1 (no subsampling))
214			subsample_method: "equal", "greedy", or "random", optional
215			Method for subsampling the data (see `subsample_factor`).
216			"equal" for equally spaced subsampling,
217			"greedy" for selecting the data based on their uncertainty,
218			and "random" for uniform random subsampling.
219			(default: "greedy")
220			center: bool, optional
221			Center the data by subtracting the global mean.
222			(default: False)
223			season: "summerNH", "summerSH", or `None`, optional
224			Select the named season or `None` for all data (default: None)
225			SPEs: bool, optional
226			Set to `True` to exclude strong SPE events (default: False)
227
228			Returns
229			-------
230			(times, dens, errs): tuple of (N,) array_like
231			The measurement times according to the `tfmt` keyword,
232			the number densities, and their uncertainties.
233			"""
234	1		logging.info("Opening dataset: '%s'", filename)
235	1		NO_ds = xr.open_mfdataset(filename, decode_times=False,
236			chunks={"time": 400, "latitude": 9, "altitude": 11})
237	1		logging.info("done.")
238			# Decode time coordinate for selection
239	1		try:
240			# xarray <= 0.9.6
241	1		NO_ds["time"] = xr.conventions.decode_cf_variable(NO_ds.time)
242	1		except TypeError:
243			# xarray => 0.10.0
244	1		NO_ds["time"] = xr.conventions.decode_cf_variable("time", NO_ds.time)
245
246	1		NO_mean = 0.
247	1		if center:
248	1		NO_mean = NO_ds.NO_DENS.mean().values
249	1		logging.info("Centering with global mean: %s", NO_mean)
250	1		NO_tds = NO_ds.sel(latitude=lat, altitude=alt)
251
252			# Exclude SPEs first if requested
253	1		if SPEs:
254	1		logging.info("Removing SPEs.")
255	1		for spe in _SPEs:
256	1		NO_tds = NO_tds.drop(
257			NO_tds.sel(time=slice(spe[0], spe[-1])).time.values,
258			dim="time")
259
260			# Filter by season
261	1		if season in _seasons.keys():
262	1		logging.info("Restricting to season: %s", season)
263	1		NO_tds = xr.concat([NO_tds.sel(time=s) for s in _seasons[season]],
264			dim="time")
265	1		NO_tds.load()
266			else:
267	1		logging.info("No season selected or unknown season, "
268			"using all available data.")
269
270	1		try:
271	1		NO_counts = NO_tds.NO_DENS_cnt
272			except AttributeError:
273			NO_counts = NO_tds.counts
274
275			# Select only useful data
276	1		NO_tds = NO_tds.where(
277			np.isfinite(NO_tds.NO_DENS) &
278			(NO_tds.NO_DENS_std != 0) &
279			(NO_tds.NO_AKDIAG > akd_threshold) &
280			(NO_counts > cnt_threshold) &
281			(NO_tds.NO_MASK == 0),
282			drop=True)
283
284	1		no_dens = scale * NO_tds.NO_DENS
285	1		if center:
286	1		no_dens -= scale * NO_mean
287	1		no_errs = scale * NO_tds.NO_DENS_std / np.sqrt(NO_counts)
288	1		logging.debug("no_dens.shape (ntime,): %s", no_dens.shape)
289
290	1		no_sza = NO_tds.mean_SZA
291
292			# Convert to astropy.Time for Julian epoch or decimal year
293	1		no_t = Time(pd.to_datetime(NO_tds.time.values, utc=True).to_pydatetime())
294	1		no_ys = getattr(no_t, tfmt)
295
296	1		if subsample_factor > 1:
297			new_data_size = no_dens.shape[0] // subsample_factor
298			if subsample_method == "random":
299			# random subsampling
300			_idxs = np.random.choice(no_dens.shape[0],
301			new_data_size, replace=False)
302			elif subsample_method == "equal":
303			# equally spaced subsampling
304			_idxs = slice(0, no_dens.shape[0], subsample_factor)
305			else:
306			# "greedy" subsampling (default fall-back)
307			_idxs = _greedy_idxs_post(no_dens, no_errs, new_data_size)
308			return (no_ys[_idxs],
309			no_dens.values[_idxs],
310			no_errs.values[_idxs],
311			no_sza.values[_idxs])
312
313			return no_ys, no_dens.values, no_errs.values, no_sza.values
314

st-bender / sciapy

Push — master ( e18d2d...77f7c6 )

sciapy.regress.load_data.load_solar_gm_table() A

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method

Long Method

Duplication Side-by-Side

Filter issues like