sciapy.regress.load_data - Code Metrics - Inspection of "regress: Update generated tutorial notebook .rst" - st-bender/sciapy - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( b988a0...edd9bf )

by Stefan

created 2019-02-21 16:01 UTC

sciapy.regress.load_data A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	304
Duplicated Lines	68.75 %

Test Coverage

Coverage

61.46%

Importance

Changes

Metric	Value
eloc	161
dl	209
loc	304
ccs	59
cts	96
cp	0.6146
rs	10
c	0
b	0
f	0
wmc	18

6 Functions

Rating	Name	Duplication	Size	Complexity
A	load_dailymeanLya()	0	6	1
A	load_dailymeanAE()	0	7	1
A	_greedy_idxs_post()	15	15	2
A	load_solar_gm_table()	47	47	1
A	_greedy_select()	15	15	2
C	load_scia_dzm()	132	132	11

How to fix Duplicated Code

# -*- coding: utf-8 -*-
# vim:fileencoding=utf-8
#
# Copyright (c) 2017-2018 Stefan Bender
#
# This module is part of sciapy.
# sciapy is free software: you can redistribute it or modify
# it under the terms of the GNU General Public License as published
# by the Free Software Foundation, version 2.
# See accompanying LICENSE file or http://www.gnu.org/licenses/gpl-2.0.html.
"""SCIAMACHY data interface

Data loading and selection routines for SCIAMACHY data regression fits.
Includes reading the solar and geomagnetic index files used as proxies.
"""

import logging

import numpy as np
import pandas as pd
import xarray as xr

from astropy.time import Time

__all__ = ["load_solar_gm_table", "load_scia_dzm"]

_seasons = {
	"summerNH": [
		slice("2002-03-20", "2002-09-25"),
		slice("2003-03-20", "2003-10-13"),
		slice("2004-03-20", "2004-10-20"),
		slice("2005-03-20", "2005-10-20"),
		slice("2006-03-20", "2006-10-20"),
		slice("2007-03-21", "2007-10-20"),
		slice("2008-03-20", "2008-10-20"),
		slice("2009-03-20", "2009-10-20"),
		slice("2010-03-20", "2010-10-20"),
		slice("2011-03-20", "2011-08-31"),
		slice("2012-03-20", "2012-04-07"),
	],
	"summerSH": [
		slice("2002-09-13", "2003-03-26"),
		slice("2003-09-13", "2004-04-01"),
		slice("2004-09-14", "2005-04-02"),
		slice("2005-09-13", "2006-04-02"),
		slice("2006-09-13", "2007-04-02"),
		slice("2007-09-13", "2008-04-01"),
		slice("2008-09-14", "2009-04-02"),
		slice("2009-09-03", "2010-04-02"),
		slice("2010-09-13", "2011-04-02"),
		slice("2011-09-13", "2012-04-01"),
	]
}

_SPEs = [pd.date_range("2002-04-20", "2002-05-01"),
		pd.date_range("2002-05-21", "2002-06-02"),
		pd.date_range("2002-07-15", "2002-07-27"),
		pd.date_range("2002-08-23", "2002-09-03"),
		pd.date_range("2002-09-06", "2002-09-17"),
		pd.date_range("2002-11-08", "2002-11-20"),
		pd.date_range("2003-05-27", "2003-06-07"),
		pd.date_range("2003-10-25", "2003-11-15"),
		pd.date_range("2004-07-24", "2004-08-05"),
		pd.date_range("2004-11-06", "2004-11-18"),
		pd.date_range("2005-01-15", "2005-01-27"),
		pd.date_range("2005-05-13", "2005-05-25"),
		pd.date_range("2005-07-13", "2005-08-05"),
		pd.date_range("2005-08-21", "2005-09-02"),
		pd.date_range("2005-09-07", "2005-09-21"),
		pd.date_range("2006-12-05", "2006-12-23"),
		pd.date_range("2012-01-22", "2012-02-07"),
		pd.date_range("2012-03-06", "2012-03-27")]


def load_dailymeanAE(
		filename="data/indices/AE_Kyoto_1980-2016_daily2_shift12h.dat",
		name="AE", tfmt="jyear"):
	from pkg_resources import resource_filename
	AEfilename = resource_filename("sciapy", filename)
	return load_solar_gm_table(AEfilename, cols=[0, 1],
			names=["time", name], tfmt=tfmt)


def load_dailymeanLya(filename="data/indices/lisird_lya3_1980-2017.dat",
		name="Lya", tfmt="jyear"):
	from pkg_resources import resource_filename
	Lyafilename = resource_filename("sciapy", filename)
	return load_solar_gm_table(Lyafilename, cols=[0, 1],
			names=["time", name], tfmt=tfmt)


def load_solar_gm_table(filename, cols, names, sep="\t", tfmt="jyear"):

	"""Load proxy tables from ascii files

	This function wraps :func:`pandas.read_table()` [#]_ with
	pre-defined settings to match the file format.
	It explicitly returns the times as UTC decimal years or julian epochs.

	.. [#] https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_table.html

	Parameters
	----------
	filename: str
		The file to read
	cols: array-like or callable
		The columns in the file to use, passed to `pandas.read_table`'s
		`usecols` keyword.
	names: array-like
		The column names, passed as `names` to `pandas.read_table()`.
	sep: str, optional
		The column separator character, passed as `sep`.
		Default: `\t`
	tfmt: str, optional
		The astropy.time "Time Format" for the time units,
		for example, "jyear", "decimalyear", "jd", "mjd", etc.
		See:
		http://docs.astropy.org/en/stable/time/index.html#time-format
		Default: "jyear"

	Returns
	-------
	(times, table): tuple
		The proxy times according to the `tfmt` keyword (UTC),
		the proxy values as a :class:`pandas.DataFrame` returned by
		:func:`pandas.read_table()`.

	See Also
	--------
	pandas.read_table
	:class:`pandas.DataFrame`
	:class:`astropy.time.Time`
	"""
	tab = pd.read_table(filename, index_col=0, parse_dates=[0],
			comment="#",
			sep=sep, usecols=cols, names=names)
	times = Time(tab.tz_localize("UTC").index.to_pydatetime())
	ts = getattr(times, tfmt)
	return ts, tab

def _greedy_select(ds, size, varname="NO_DENS_std", scale=1.):

	logging.info("Greedy subsampling to size: %s", size)
	var = np.ma.masked_array((ds[varname] * scale)**2, mask=[False])
	var_p = np.ma.masked_array((ds[varname] * scale)**2, mask=[False])
	sigma2_i = scale**2
	sigma2_ip = scale**2
	idxs = np.arange(len(var))
	for _ in range(size):
		max_entr_idx = np.ma.argmax(np.log(1. + var * sigma2_i))
		min_entr_idx = np.ma.argmin(np.log(1. + var_p * sigma2_ip))
		sigma2_i += 1. / var[max_entr_idx]
		sigma2_ip += 1. / var_p[min_entr_idx]
		var.mask[max_entr_idx] = True
		var_p.mask[max_entr_idx] = True
	return ds.isel(time=idxs[var.mask])

def _greedy_idxs_post(x, xerr, size):

	logging.info("Greedy subsampling to size: %s", size)
	var = np.ma.masked_array(xerr**2, mask=[False])
	var_p = np.ma.masked_array(xerr**2, mask=[False])
	sigma2_i = 1.
	sigma2_ip = 1.
	idxs = np.arange(len(var))
	for _ in range(size):
		max_entr_idx = np.ma.argmax(np.log(1. + var * sigma2_i))
		min_entr_idx = np.ma.argmin(np.log(1. + var_p * sigma2_ip))
		sigma2_i += 1. / var[max_entr_idx]
		sigma2_ip += 1. / var_p[min_entr_idx]
		var.mask[max_entr_idx] = True
		var_p.mask[max_entr_idx] = True
	return idxs[var.mask]

def load_scia_dzm(filename, alt, lat, tfmt="jyear",

		scale=1, subsample_factor=1, subsample_method="greedy",
		akd_threshold=0.002, cnt_threshold=0,
		center=False, season=None, SPEs=False):
	"""Load SCIAMACHY daily zonal mean data

	Interface function for SCIAMACHY daily zonal mean data files version 6.x.
	Uses :mod:`xarray` [#]_ to load and select the data. Possible selections are by
	hemispheric summer (NH summer ~ SH winter and SH summer ~ NH winter) and
	exclusion of strong solar proton events (SPE).

	.. [#] https://xarray.pydata.org

	Parameters
	----------
	filename: str
		The input filename
	alt: float
		The altitude
	lat: float
		The longitude
	tfmt: string, optional
		The astropy.time "Time Format" for the time units,
		for example, "jyear", "decimalyear", "jd", "mjd", etc.
		See:
		http://docs.astropy.org/en/stable/time/index.html#time-format
		Default: "jyear"
	scale: float, optional
		Scale factor of the data (default: 1)
	subsample_factor: int, optional
		Factor to subsample the data by (see `subsample_method`)
		(default: 1 (no subsampling))
	subsample_method: "equal", "greedy", or "random", optional
		Method for subsampling the data (see `subsample_factor`).
		"equal" for equally spaced subsampling,
		"greedy" for selecting the data based on their uncertainty,
		and "random" for uniform random subsampling.
		(default: "greedy")
	center: bool, optional
		Center the data by subtracting the global mean.
		(default: False)
	season: "summerNH", "summerSH", or `None`, optional
		Select the named season or `None` for all data (default: None)
	SPEs: bool, optional
		Set to `True` to exclude strong SPE events (default: False)

	Returns
	-------
	(times, dens, errs): tuple of (N,) array_like
		The measurement times according to the `tfmt` keyword,
		the number densities, and their uncertainties.
	"""
	logging.info("Opening dataset: '%s'", filename)
	NO_ds = xr.open_mfdataset(filename, decode_times=False,
				chunks={"time": 400, "latitude": 9, "altitude": 11})
	logging.info("done.")
	# Decode time coordinate for selection
	try:
		# xarray <= 0.9.6
		NO_ds["time"] = xr.conventions.decode_cf_variable(NO_ds.time)
	except TypeError:
		# xarray => 0.10.0
		NO_ds["time"] = xr.conventions.decode_cf_variable("time", NO_ds.time)

	NO_mean = 0.
	if center:
		NO_mean = NO_ds.NO_DENS.mean().values
		logging.info("Centering with global mean: %s", NO_mean)
	NO_tds = NO_ds.sel(latitude=lat, altitude=alt)

	# Exclude SPEs first if requested
	if SPEs:
		logging.info("Removing SPEs.")
		for spe in _SPEs:
			NO_tds = NO_tds.drop(
					NO_tds.sel(time=slice(spe[0], spe[-1])).time.values,
					dim="time")

	# Filter by season
	if season in _seasons.keys():
		logging.info("Restricting to season: %s", season)
		NO_tds = xr.concat([NO_tds.sel(time=s) for s in _seasons[season]],
					dim="time")
		NO_tds.load()
	else:
		logging.info("No season selected or unknown season, "
					"using all available data.")

	try:
		NO_counts = NO_tds.NO_DENS_cnt
	except AttributeError:
		NO_counts = NO_tds.counts

	# Select only useful data
	NO_tds = NO_tds.where(
				np.isfinite(NO_tds.NO_DENS) &
				(NO_tds.NO_DENS_std != 0) &
				(NO_tds.NO_AKDIAG > akd_threshold) &
				(NO_counts > cnt_threshold) &
				(NO_tds.NO_MASK == 0),
				drop=True)

	no_dens = scale * NO_tds.NO_DENS
	if center:
		no_dens -= scale * NO_mean
	no_errs = scale * NO_tds.NO_DENS_std / np.sqrt(NO_counts)
	logging.debug("no_dens.shape (ntime,): %s", no_dens.shape)

	no_sza = NO_tds.mean_SZA

	# Convert to astropy.Time for Julian epoch or decimal year
	no_t = Time(pd.to_datetime(NO_tds.time.values, utc=True).to_pydatetime())
	no_ys = getattr(no_t, tfmt)

	if subsample_factor > 1:
		new_data_size = no_dens.shape[0] // subsample_factor
		if subsample_method == "random":
			# random subsampling
			_idxs = np.random.choice(no_dens.shape[0],
					new_data_size, replace=False)
		elif subsample_method == "equal":
			# equally spaced subsampling
			_idxs = slice(0, no_dens.shape[0], subsample_factor)
		else:
			# "greedy" subsampling (default fall-back)
			_idxs = _greedy_idxs_post(no_dens, no_errs, new_data_size)
		return (no_ys[_idxs],
				no_dens.values[_idxs],
				no_errs.values[_idxs],
				no_sza.values[_idxs])

	return no_ys, no_dens.values, no_errs.values, no_sza.values


1			# -- coding: utf-8 --
2			# vim:fileencoding=utf-8
3			#
4			# Copyright (c) 2017-2018 Stefan Bender
5			#
6			# This module is part of sciapy.
7			# sciapy is free software: you can redistribute it or modify
8			# it under the terms of the GNU General Public License as published
9			# by the Free Software Foundation, version 2.
10			# See accompanying LICENSE file or http://www.gnu.org/licenses/gpl-2.0.html.
11	1		"""SCIAMACHY data interface
12
13			Data loading and selection routines for SCIAMACHY data regression fits.
14			Includes reading the solar and geomagnetic index files used as proxies.
15			"""
16
17	1		import logging
18
19	1		import numpy as np
20	1		import pandas as pd
21	1		import xarray as xr
22
23	1		from astropy.time import Time
24
25	1		__all__ = ["load_solar_gm_table", "load_scia_dzm"]
26
27	1		_seasons = {
28			"summerNH": [
29			slice("2002-03-20", "2002-09-25"),
30			slice("2003-03-20", "2003-10-13"),
31			slice("2004-03-20", "2004-10-20"),
32			slice("2005-03-20", "2005-10-20"),
33			slice("2006-03-20", "2006-10-20"),
34			slice("2007-03-21", "2007-10-20"),
35			slice("2008-03-20", "2008-10-20"),
36			slice("2009-03-20", "2009-10-20"),
37			slice("2010-03-20", "2010-10-20"),
38			slice("2011-03-20", "2011-08-31"),
39			slice("2012-03-20", "2012-04-07"),
40			],
41			"summerSH": [
42			slice("2002-09-13", "2003-03-26"),
43			slice("2003-09-13", "2004-04-01"),
44			slice("2004-09-14", "2005-04-02"),
45			slice("2005-09-13", "2006-04-02"),
46			slice("2006-09-13", "2007-04-02"),
47			slice("2007-09-13", "2008-04-01"),
48			slice("2008-09-14", "2009-04-02"),
49			slice("2009-09-03", "2010-04-02"),
50			slice("2010-09-13", "2011-04-02"),
51			slice("2011-09-13", "2012-04-01"),
52			]
53			}
54
55	1		_SPEs = [pd.date_range("2002-04-20", "2002-05-01"),
56			pd.date_range("2002-05-21", "2002-06-02"),
57			pd.date_range("2002-07-15", "2002-07-27"),
58			pd.date_range("2002-08-23", "2002-09-03"),
59			pd.date_range("2002-09-06", "2002-09-17"),
60			pd.date_range("2002-11-08", "2002-11-20"),
61			pd.date_range("2003-05-27", "2003-06-07"),
62			pd.date_range("2003-10-25", "2003-11-15"),
63			pd.date_range("2004-07-24", "2004-08-05"),
64			pd.date_range("2004-11-06", "2004-11-18"),
65			pd.date_range("2005-01-15", "2005-01-27"),
66			pd.date_range("2005-05-13", "2005-05-25"),
67			pd.date_range("2005-07-13", "2005-08-05"),
68			pd.date_range("2005-08-21", "2005-09-02"),
69			pd.date_range("2005-09-07", "2005-09-21"),
70			pd.date_range("2006-12-05", "2006-12-23"),
71			pd.date_range("2012-01-22", "2012-02-07"),
72			pd.date_range("2012-03-06", "2012-03-27")]
73
74
75	1		def load_dailymeanAE(
76			filename="data/indices/AE_Kyoto_1980-2016_daily2_shift12h.dat",
77			name="AE", tfmt="jyear"):
78	1		from pkg_resources import resource_filename
79	1		AEfilename = resource_filename("sciapy", filename)
80	1		return load_solar_gm_table(AEfilename, cols=[0, 1],
81			names=["time", name], tfmt=tfmt)
82
83
84	1		def load_dailymeanLya(filename="data/indices/lisird_lya3_1980-2017.dat",
85			name="Lya", tfmt="jyear"):
86	1		from pkg_resources import resource_filename
87	1		Lyafilename = resource_filename("sciapy", filename)
88	1		return load_solar_gm_table(Lyafilename, cols=[0, 1],
89			names=["time", name], tfmt=tfmt)
90
91
92	1	View Code Duplication	def load_solar_gm_table(filename, cols, names, sep="\t", tfmt="jyear"):
			0 ignored issues – show Duplication introduced 2018-07-02 09:55 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
93			"""Load proxy tables from ascii files
94
95			This function wraps :func:`pandas.read_table()` [#]_ with
96			pre-defined settings to match the file format.
97			It explicitly returns the times as UTC decimal years or julian epochs.
98
99			.. [#] https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_table.html
100
101			Parameters
102			----------
103			filename: str
104			The file to read
105			cols: array-like or callable
106			The columns in the file to use, passed to `pandas.read_table`'s
107			`usecols` keyword.
108			names: array-like
109			The column names, passed as `names` to `pandas.read_table()`.
110			sep: str, optional
111			The column separator character, passed as `sep`.
112			Default: `\t`
113			tfmt: str, optional
114			The astropy.time "Time Format" for the time units,
115			for example, "jyear", "decimalyear", "jd", "mjd", etc.
116			See:
117			http://docs.astropy.org/en/stable/time/index.html#time-format
118			Default: "jyear"
119
120			Returns
121			-------
122			(times, table): tuple
123			The proxy times according to the `tfmt` keyword (UTC),
124			the proxy values as a :class:`pandas.DataFrame` returned by
125			:func:`pandas.read_table()`.
126
127			See Also
128			--------
129			pandas.read_table
130			:class:`pandas.DataFrame`
131			:class:`astropy.time.Time`
132			"""
133	1		tab = pd.read_table(filename, index_col=0, parse_dates=[0],
134			comment="#",
135			sep=sep, usecols=cols, names=names)
136	1		times = Time(tab.tz_localize("UTC").index.to_pydatetime())
137	1		ts = getattr(times, tfmt)
138	1		return ts, tab
139
140	1	View Code Duplication	def _greedy_select(ds, size, varname="NO_DENS_std", scale=1.):
			0 ignored issues – show Duplication introduced 2018-07-02 09:55 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
141			logging.info("Greedy subsampling to size: %s", size)
142			var = np.ma.masked_array((ds[varname] * scale)**2, mask=[False])
143			var_p = np.ma.masked_array((ds[varname] * scale)**2, mask=[False])
144			sigma2_i = scale**2
145			sigma2_ip = scale**2
146			idxs = np.arange(len(var))
147			for _ in range(size):
148			max_entr_idx = np.ma.argmax(np.log(1. + var * sigma2_i))
149			min_entr_idx = np.ma.argmin(np.log(1. + var_p * sigma2_ip))
150			sigma2_i += 1. / var[max_entr_idx]
151			sigma2_ip += 1. / var_p[min_entr_idx]
152			var.mask[max_entr_idx] = True
153			var_p.mask[max_entr_idx] = True
154			return ds.isel(time=idxs[var.mask])
155
156	1	View Code Duplication	def _greedy_idxs_post(x, xerr, size):
			0 ignored issues – show Duplication introduced 2018-07-02 09:55 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
157			logging.info("Greedy subsampling to size: %s", size)
158			var = np.ma.masked_array(xerr**2, mask=[False])
159			var_p = np.ma.masked_array(xerr**2, mask=[False])
160			sigma2_i = 1.
161			sigma2_ip = 1.
162			idxs = np.arange(len(var))
163			for _ in range(size):
164			max_entr_idx = np.ma.argmax(np.log(1. + var * sigma2_i))
165			min_entr_idx = np.ma.argmin(np.log(1. + var_p * sigma2_ip))
166			sigma2_i += 1. / var[max_entr_idx]
167			sigma2_ip += 1. / var_p[min_entr_idx]
168			var.mask[max_entr_idx] = True
169			var_p.mask[max_entr_idx] = True
170			return idxs[var.mask]
171
172	1	View Code Duplication	def load_scia_dzm(filename, alt, lat, tfmt="jyear",
			0 ignored issues – show Duplication introduced 2018-07-02 09:55 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
173			scale=1, subsample_factor=1, subsample_method="greedy",
174			akd_threshold=0.002, cnt_threshold=0,
175			center=False, season=None, SPEs=False):
176			"""Load SCIAMACHY daily zonal mean data
177
178			Interface function for SCIAMACHY daily zonal mean data files version 6.x.
179			Uses :mod:`xarray` [#]_ to load and select the data. Possible selections are by
180			hemispheric summer (NH summer ~ SH winter and SH summer ~ NH winter) and
181			exclusion of strong solar proton events (SPE).
182
183			.. [#] https://xarray.pydata.org
184
185			Parameters
186			----------
187			filename: str
188			The input filename
189			alt: float
190			The altitude
191			lat: float
192			The longitude
193			tfmt: string, optional
194			The astropy.time "Time Format" for the time units,
195			for example, "jyear", "decimalyear", "jd", "mjd", etc.
196			See:
197			http://docs.astropy.org/en/stable/time/index.html#time-format
198			Default: "jyear"
199			scale: float, optional
200			Scale factor of the data (default: 1)
201			subsample_factor: int, optional
202			Factor to subsample the data by (see `subsample_method`)
203			(default: 1 (no subsampling))
204			subsample_method: "equal", "greedy", or "random", optional
205			Method for subsampling the data (see `subsample_factor`).
206			"equal" for equally spaced subsampling,
207			"greedy" for selecting the data based on their uncertainty,
208			and "random" for uniform random subsampling.
209			(default: "greedy")
210			center: bool, optional
211			Center the data by subtracting the global mean.
212			(default: False)
213			season: "summerNH", "summerSH", or `None`, optional
214			Select the named season or `None` for all data (default: None)
215			SPEs: bool, optional
216			Set to `True` to exclude strong SPE events (default: False)
217
218			Returns
219			-------
220			(times, dens, errs): tuple of (N,) array_like
221			The measurement times according to the `tfmt` keyword,
222			the number densities, and their uncertainties.
223			"""
224	1		logging.info("Opening dataset: '%s'", filename)
225	1		NO_ds = xr.open_mfdataset(filename, decode_times=False,
226			chunks={"time": 400, "latitude": 9, "altitude": 11})
227	1		logging.info("done.")
228			# Decode time coordinate for selection
229	1		try:
230			# xarray <= 0.9.6
231	1		NO_ds["time"] = xr.conventions.decode_cf_variable(NO_ds.time)
232	1		except TypeError:
233			# xarray => 0.10.0
234	1		NO_ds["time"] = xr.conventions.decode_cf_variable("time", NO_ds.time)
235
236	1		NO_mean = 0.
237	1		if center:
238	1		NO_mean = NO_ds.NO_DENS.mean().values
239	1		logging.info("Centering with global mean: %s", NO_mean)
240	1		NO_tds = NO_ds.sel(latitude=lat, altitude=alt)
241
242			# Exclude SPEs first if requested
243	1		if SPEs:
244	1		logging.info("Removing SPEs.")
245	1		for spe in _SPEs:
246	1		NO_tds = NO_tds.drop(
247			NO_tds.sel(time=slice(spe[0], spe[-1])).time.values,
248			dim="time")
249
250			# Filter by season
251	1		if season in _seasons.keys():
252	1		logging.info("Restricting to season: %s", season)
253	1		NO_tds = xr.concat([NO_tds.sel(time=s) for s in _seasons[season]],
254			dim="time")
255	1		NO_tds.load()
256			else:
257	1		logging.info("No season selected or unknown season, "
258			"using all available data.")
259
260	1		try:
261	1		NO_counts = NO_tds.NO_DENS_cnt
262			except AttributeError:
263			NO_counts = NO_tds.counts
264
265			# Select only useful data
266	1		NO_tds = NO_tds.where(
267			np.isfinite(NO_tds.NO_DENS) &
268			(NO_tds.NO_DENS_std != 0) &
269			(NO_tds.NO_AKDIAG > akd_threshold) &
270			(NO_counts > cnt_threshold) &
271			(NO_tds.NO_MASK == 0),
272			drop=True)
273
274	1		no_dens = scale * NO_tds.NO_DENS
275	1		if center:
276	1		no_dens -= scale * NO_mean
277	1		no_errs = scale * NO_tds.NO_DENS_std / np.sqrt(NO_counts)
278	1		logging.debug("no_dens.shape (ntime,): %s", no_dens.shape)
279
280	1		no_sza = NO_tds.mean_SZA
281
282			# Convert to astropy.Time for Julian epoch or decimal year
283	1		no_t = Time(pd.to_datetime(NO_tds.time.values, utc=True).to_pydatetime())
284	1		no_ys = getattr(no_t, tfmt)
285
286	1		if subsample_factor > 1:
287			new_data_size = no_dens.shape[0] // subsample_factor
288			if subsample_method == "random":
289			# random subsampling
290			_idxs = np.random.choice(no_dens.shape[0],
291			new_data_size, replace=False)
292			elif subsample_method == "equal":
293			# equally spaced subsampling
294			_idxs = slice(0, no_dens.shape[0], subsample_factor)
295			else:
296			# "greedy" subsampling (default fall-back)
297			_idxs = _greedy_idxs_post(no_dens, no_errs, new_data_size)
298			return (no_ys[_idxs],
299			no_dens.values[_idxs],
300			no_errs.values[_idxs],
301			no_sza.values[_idxs])
302
303			return no_ys, no_dens.values, no_errs.values, no_sza.values
304

st-bender / sciapy

Push — master ( b988a0...edd9bf )

sciapy.regress.load_data A

Complexity

Size/Duplication

Test Coverage

Importance

6 Functions

How to fix Duplicated Code

Duplicated Code

Duplication Side-by-Side

Filter issues like