Passed
Push — master ( 0ac1d4...1ad9b4 )
by Stefan
03:45
created

sciapy.level2.binning   A

Complexity

Total Complexity 17

Size/Duplication

Total Lines 144
Duplicated Lines 0 %

Test Coverage

Coverage 90.38%

Importance

Changes 0
Metric Value
eloc 73
dl 0
loc 144
ccs 47
cts 52
cp 0.9038
rs 10
c 0
b 0
f 0
wmc 17

2 Functions

Rating   Name   Duplication   Size   Complexity  
C _bin_stats() 0 41 9
B bin_lat_timeavg() 0 81 8
1
# -*- coding: utf-8 -*-
2
# vim:fileencoding=utf-8
3
#
4
# Copyright (c) 2017 Stefan Bender
5
#
6
# This file is part of sciapy.
7
# sciapy is free software: you can redistribute it or modify it
8
# under the terms of the GNU General Public License as published by
9
# the Free Software Foundation, version 2.
10
# See accompanying LICENSE file or http://www.gnu.org/licenses/gpl-2.0.html.
11 1
"""SCIAMACHY level 2 binning
12
"""
13
14 1
__all__ = ["bin_lat_timeavg"]
15
16 1
import logging
17
18 1
import numpy as np
19 1
import xarray as xr
20
21 1
def _bin_stats(ds,
22
		binvar="latitude", tvar="time",
23
		area_weighted=True, stacked="stacked_time_latitude",
24
		set_attrs=True):
25
	"""Helper function for (weighted) bin statistics
26
	"""
27 1
	if not hasattr(ds, stacked):
28 1
		stacked = "stacked_{0}_{1}".format(tvar, binvar)
29 1
		ds = ds.stack(**{stacked: (tvar, binvar)})
30 1
	_weights = (np.cos(np.radians(ds[binvar])) if area_weighted
31
				else xr.ones_like(ds[binvar]))
32
	# normalize weights (sum(weights) = 1)
33 1
	_weights /= _weights.sum(dim=stacked)
34 1
	_ssqw = (_weights**2).sum(dim=stacked)
35 1
	mean_ds = (ds * _weights).sum(dim=stacked) if area_weighted \
36
			else ds.mean(dim=stacked)
37 1
	mean_ds["wsqsum"] = _ssqw
38 1
	mean_ds["wsqsum"].attrs = {
39
			"long_name": "sum of squared weights",
40
			"units": "1"}
41
	# unbiased standard deviations
42 1
	var_ds = ((_weights * (ds - mean_ds)**2).sum(dim=stacked) /
43
			(1. - _ssqw)) if area_weighted else ds.var(dim=stacked, ddof=1)
44 1
	sdev_ds = np.sqrt(var_ds)
45 1
	cnts_ds = ds.count(dim=stacked)
46 1
	if set_attrs:
47 1
		for var in ds.data_vars:
48 1
			sdev_ds[var].attrs = ds[var].attrs
49 1
			cnts_ds[var].attrs = ds[var].attrs
50 1
			cnts_ds[var].attrs.update({"units": "1"})
51 1
			for key in ["long_name", "standard_name"]:
52 1
				try:
53 1
					sdev_ds[var].attrs.update({
54
						key: ds[var].attrs[key] + " standard deviation"})
55
					cnts_ds[var].attrs.update({
56
						key: ds[var].attrs[key] + " counts"})
57 1
				except KeyError:
58 1
					pass
59 1
	sdev_ds = sdev_ds.rename({v: v + "_std" for v in sdev_ds.data_vars})
60 1
	cnts_ds = cnts_ds.rename({v: v + "_cnt" for v in cnts_ds.data_vars})
61 1
	return xr.merge([mean_ds, sdev_ds, cnts_ds])
62
63 1
def bin_lat_timeavg(ds, binvar="latitude", tvar="time",
64
		bins=np.r_[-90:91:5], labels=None, area_weighted=True,
65
		keep_attrs=True,
66
		load=True, save_progress=False):
67
	"""Latitudinally bin and time average xarray dataset(s)
68
69
	Time-averages the variables in an :class:`xarray.Dataset` in the given
70
	latitude bins. This should be applied to daily-binned datasets from
71
	a groupby object (via .apply()) to yield daily zonal means.
72
73
	The type of latitudes is selected by passing the appropriate
74
	`binvar` and must be a variable in the data set.
75
	Area weighting (cos(latitude)) is also supported.
76
77
	Parameters
78
	----------
79
	ds : xarray.Dataset or xarray.DatasetGroupBy instance
80
		The dataset (or GroupBy) instance to bin latitudinally.
81
	binvar : str
82
		The name of the variable used for binning, default: "latitude".
83
	tvar : str
84
		The name of the time variable of the GroupBy object,
85
		default: "time".
86
	bins : numpy.ndarray
87
		The (latitudinal) bin edges, default: `np.r_[-90:91:5]`.
88
	labels : list or None
89
		The bin labels, if set to `None` (the default), the labels are
90
		set to the central bin values.
91
	area_weighted : bool
92
		Use area weighted averages, default: `True`.
93
	keep_attrs : bool
94
		Keep the global and variable attributes from the data set,
95
		default: `True`.
96
	load : bool
97
		Loads the data into memory before binning, speeds it up considerably
98
		provided that the it fits into memory, default: `True`
99
	save_progress : bool
100
		Saves the individual binned files to netcdf, to enable recovering from
101
		interrupted runs, default: `False`
102
103
	Returns
104
	-------
105
	ds : xarray.Dataset
106
		The binned and time-averaged dataset together with the (unbiased)
107
		standard deviations of the variables as `<variable>_std` and the
108
		number of averaged values as `<variable>_cnt`.
109
	"""
110 1
	if load:
111
		# load the chunk into memory to speed up binning
112 1
		ds.load()
113
	# adjust the time variable
114 1
	if np.issubdtype(ds[tvar].values[0], np.floating):
115
		# convert floats to datetime first (probably MLT states)
116 1
		date = (
117
			xr.conventions.decode_cf(ds[[tvar]])[tvar]
118
			.values[0]
119
			.astype("datetime64[D]")
120
		)
121
	else:
122
		date = ds[tvar].values[0].astype('datetime64[D]')
123 1
	if not hasattr(ds, binvar):
124
		# nothing to bin
125
		logging.warn("skipping %s", date)
126
		return ds
127 1
	logging.info("processing %s", date)
128 1
	if labels is None:
129 1
		labels = 0.5 * (bins[1:] + bins[:-1])
130
	# stack and bin and delegate to the statistics helper function
131 1
	ds_out = (ds.stack(stacked_time_latitude=("time", "latitude"))
132
				.groupby_bins(binvar, bins, labels=labels)
133
				.apply(_bin_stats,
134
					binvar=binvar, tvar=tvar,
135
					area_weighted=area_weighted,
136
					set_attrs=keep_attrs))
137 1
	if keep_attrs:
138 1
		ds_out.attrs = ds.attrs
139 1
		for var in ds.data_vars:
140 1
			ds_out[var].attrs = ds[var].attrs
141 1
	if save_progress:
142
		ds_out.to_netcdf("tmp_binavg-{0}.nc".format(date))
143
	return ds_out
144