Passed
Push — master ( 0ac1d4...1ad9b4 )
by Stefan
03:45
created

sciapy.level2.binning.bin_lat_timeavg()   B

Complexity

Conditions 8

Size

Total Lines 81
Code Lines 31

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 16
CRAP Score 8.512

Importance

Changes 0
Metric Value
cc 8
eloc 31
nop 9
dl 0
loc 81
ccs 16
cts 20
cp 0.8
crap 8.512
rs 7.2693
c 0
b 0
f 0

How to fix   Long Method    Many Parameters   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
# -*- coding: utf-8 -*-
2
# vim:fileencoding=utf-8
3
#
4
# Copyright (c) 2017 Stefan Bender
5
#
6
# This file is part of sciapy.
7
# sciapy is free software: you can redistribute it or modify it
8
# under the terms of the GNU General Public License as published by
9
# the Free Software Foundation, version 2.
10
# See accompanying LICENSE file or http://www.gnu.org/licenses/gpl-2.0.html.
11 1
"""SCIAMACHY level 2 binning
12
"""
13
14 1
__all__ = ["bin_lat_timeavg"]
15
16 1
import logging
17
18 1
import numpy as np
19 1
import xarray as xr
20
21 1
def _bin_stats(ds,
22
		binvar="latitude", tvar="time",
23
		area_weighted=True, stacked="stacked_time_latitude",
24
		set_attrs=True):
25
	"""Helper function for (weighted) bin statistics
26
	"""
27 1
	if not hasattr(ds, stacked):
28 1
		stacked = "stacked_{0}_{1}".format(tvar, binvar)
29 1
		ds = ds.stack(**{stacked: (tvar, binvar)})
30 1
	_weights = (np.cos(np.radians(ds[binvar])) if area_weighted
31
				else xr.ones_like(ds[binvar]))
32
	# normalize weights (sum(weights) = 1)
33 1
	_weights /= _weights.sum(dim=stacked)
34 1
	_ssqw = (_weights**2).sum(dim=stacked)
35 1
	mean_ds = (ds * _weights).sum(dim=stacked) if area_weighted \
36
			else ds.mean(dim=stacked)
37 1
	mean_ds["wsqsum"] = _ssqw
38 1
	mean_ds["wsqsum"].attrs = {
39
			"long_name": "sum of squared weights",
40
			"units": "1"}
41
	# unbiased standard deviations
42 1
	var_ds = ((_weights * (ds - mean_ds)**2).sum(dim=stacked) /
43
			(1. - _ssqw)) if area_weighted else ds.var(dim=stacked, ddof=1)
44 1
	sdev_ds = np.sqrt(var_ds)
45 1
	cnts_ds = ds.count(dim=stacked)
46 1
	if set_attrs:
47 1
		for var in ds.data_vars:
48 1
			sdev_ds[var].attrs = ds[var].attrs
49 1
			cnts_ds[var].attrs = ds[var].attrs
50 1
			cnts_ds[var].attrs.update({"units": "1"})
51 1
			for key in ["long_name", "standard_name"]:
52 1
				try:
53 1
					sdev_ds[var].attrs.update({
54
						key: ds[var].attrs[key] + " standard deviation"})
55
					cnts_ds[var].attrs.update({
56
						key: ds[var].attrs[key] + " counts"})
57 1
				except KeyError:
58 1
					pass
59 1
	sdev_ds = sdev_ds.rename({v: v + "_std" for v in sdev_ds.data_vars})
60 1
	cnts_ds = cnts_ds.rename({v: v + "_cnt" for v in cnts_ds.data_vars})
61 1
	return xr.merge([mean_ds, sdev_ds, cnts_ds])
62
63 1
def bin_lat_timeavg(ds, binvar="latitude", tvar="time",
64
		bins=np.r_[-90:91:5], labels=None, area_weighted=True,
65
		keep_attrs=True,
66
		load=True, save_progress=False):
67
	"""Latitudinally bin and time average xarray dataset(s)
68
69
	Time-averages the variables in an :class:`xarray.Dataset` in the given
70
	latitude bins. This should be applied to daily-binned datasets from
71
	a groupby object (via .apply()) to yield daily zonal means.
72
73
	The type of latitudes is selected by passing the appropriate
74
	`binvar` and must be a variable in the data set.
75
	Area weighting (cos(latitude)) is also supported.
76
77
	Parameters
78
	----------
79
	ds : xarray.Dataset or xarray.DatasetGroupBy instance
80
		The dataset (or GroupBy) instance to bin latitudinally.
81
	binvar : str
82
		The name of the variable used for binning, default: "latitude".
83
	tvar : str
84
		The name of the time variable of the GroupBy object,
85
		default: "time".
86
	bins : numpy.ndarray
87
		The (latitudinal) bin edges, default: `np.r_[-90:91:5]`.
88
	labels : list or None
89
		The bin labels, if set to `None` (the default), the labels are
90
		set to the central bin values.
91
	area_weighted : bool
92
		Use area weighted averages, default: `True`.
93
	keep_attrs : bool
94
		Keep the global and variable attributes from the data set,
95
		default: `True`.
96
	load : bool
97
		Loads the data into memory before binning, speeds it up considerably
98
		provided that the it fits into memory, default: `True`
99
	save_progress : bool
100
		Saves the individual binned files to netcdf, to enable recovering from
101
		interrupted runs, default: `False`
102
103
	Returns
104
	-------
105
	ds : xarray.Dataset
106
		The binned and time-averaged dataset together with the (unbiased)
107
		standard deviations of the variables as `<variable>_std` and the
108
		number of averaged values as `<variable>_cnt`.
109
	"""
110 1
	if load:
111
		# load the chunk into memory to speed up binning
112 1
		ds.load()
113
	# adjust the time variable
114 1
	if np.issubdtype(ds[tvar].values[0], np.floating):
115
		# convert floats to datetime first (probably MLT states)
116 1
		date = (
117
			xr.conventions.decode_cf(ds[[tvar]])[tvar]
118
			.values[0]
119
			.astype("datetime64[D]")
120
		)
121
	else:
122
		date = ds[tvar].values[0].astype('datetime64[D]')
123 1
	if not hasattr(ds, binvar):
124
		# nothing to bin
125
		logging.warn("skipping %s", date)
126
		return ds
127 1
	logging.info("processing %s", date)
128 1
	if labels is None:
129 1
		labels = 0.5 * (bins[1:] + bins[:-1])
130
	# stack and bin and delegate to the statistics helper function
131 1
	ds_out = (ds.stack(stacked_time_latitude=("time", "latitude"))
132
				.groupby_bins(binvar, bins, labels=labels)
133
				.apply(_bin_stats,
134
					binvar=binvar, tvar=tvar,
135
					area_weighted=area_weighted,
136
					set_attrs=keep_attrs))
137 1
	if keep_attrs:
138 1
		ds_out.attrs = ds.attrs
139 1
		for var in ds.data_vars:
140 1
			ds_out[var].attrs = ds[var].attrs
141 1
	if save_progress:
142
		ds_out.to_netcdf("tmp_binavg-{0}.nc".format(date))
143
	return ds_out
144