Passed
Push — master ( d1ac5b...af9f8c )
by Stefan
03:57
created

sciapy.level2.binning   A

Complexity

Total Complexity 18

Size/Duplication

Total Lines 149
Duplicated Lines 0 %

Test Coverage

Coverage 90.91%

Importance

Changes 0
Metric Value
eloc 77
dl 0
loc 149
ccs 50
cts 55
cp 0.9091
rs 10
c 0
b 0
f 0
wmc 18

2 Functions

Rating   Name   Duplication   Size   Complexity  
C bin_lat_timeavg() 0 86 9
C _bin_stats() 0 41 9
1
# -*- coding: utf-8 -*-
2
# vim:fileencoding=utf-8
3
#
4
# Copyright (c) 2017 Stefan Bender
5
#
6
# This file is part of sciapy.
7
# sciapy is free software: you can redistribute it or modify it
8
# under the terms of the GNU General Public License as published by
9
# the Free Software Foundation, version 2.
10
# See accompanying LICENSE file or http://www.gnu.org/licenses/gpl-2.0.html.
11 1
"""SCIAMACHY level 2 binning
12
"""
13
14 1
__all__ = ["bin_lat_timeavg"]
15
16 1
import logging
17
18 1
import numpy as np
19 1
import xarray as xr
20
21 1
def _bin_stats(ds,
22
		binvar="latitude", tvar="time",
23
		area_weighted=True, stacked="stacked_time_latitude",
24
		set_attrs=True):
25
	"""Helper function for (weighted) bin statistics
26
	"""
27 1
	if not hasattr(ds, stacked):
28 1
		stacked = "stacked_{0}_{1}".format(tvar, binvar)
29 1
		ds = ds.stack(**{stacked: (tvar, binvar)})
30 1
	_weights = (np.cos(np.radians(ds[binvar])) if area_weighted
31
				else xr.ones_like(ds[binvar]))
32
	# normalize weights (sum(weights) = 1)
33 1
	_weights /= _weights.sum(dim=stacked)
34 1
	_ssqw = (_weights**2).sum(dim=stacked)
35 1
	mean_ds = (ds * _weights).sum(dim=stacked) if area_weighted \
36
			else ds.mean(dim=stacked)
37 1
	mean_ds["wsqsum"] = _ssqw
38 1
	mean_ds["wsqsum"].attrs = {
39
			"long_name": "sum of squared weights",
40
			"units": "1"}
41
	# unbiased standard deviations
42 1
	var_ds = ((_weights * (ds - mean_ds)**2).sum(dim=stacked) /
43
			(1. - _ssqw)) if area_weighted else ds.var(dim=stacked, ddof=1)
44 1
	sdev_ds = np.sqrt(var_ds)
45 1
	cnts_ds = ds.count(dim=stacked)
46 1
	if set_attrs:
47 1
		for var in ds.data_vars:
48 1
			sdev_ds[var].attrs = ds[var].attrs
49 1
			cnts_ds[var].attrs = ds[var].attrs
50 1
			cnts_ds[var].attrs.update({"units": "1"})
51 1
			for key in ["long_name", "standard_name"]:
52 1
				try:
53 1
					sdev_ds[var].attrs.update({
54
						key: ds[var].attrs[key] + " standard deviation"})
55
					cnts_ds[var].attrs.update({
56
						key: ds[var].attrs[key] + " counts"})
57 1
				except KeyError:
58 1
					pass
59 1
	sdev_ds = sdev_ds.rename({v: v + "_std" for v in sdev_ds.data_vars})
60 1
	cnts_ds = cnts_ds.rename({v: v + "_cnt" for v in cnts_ds.data_vars})
61 1
	return xr.merge([mean_ds, sdev_ds, cnts_ds])
62
63 1
def bin_lat_timeavg(ds, binvar="latitude", tvar="time",
64
		bins=np.r_[-90:91:5], labels=None, area_weighted=True,
65
		keep_attrs=True,
66
		load=True, save_progress=False):
67
	"""Latitudinally bin and time average xarray dataset(s)
68
69
	Time-averages the variables in an :class:`xarray.Dataset` in the given
70
	latitude bins. This should be applied to daily-binned datasets from
71
	a groupby object (via .apply()) to yield daily zonal means.
72
73
	The type of latitudes is selected by passing the appropriate
74
	`binvar` and must be a variable in the data set.
75
	Area weighting (cos(latitude)) is also supported.
76
77
	Parameters
78
	----------
79
	ds : xarray.Dataset or xarray.DatasetGroupBy instance
80
		The dataset (or GroupBy) instance to bin latitudinally.
81
	binvar : str
82
		The name of the variable used for binning, default: "latitude".
83
	tvar : str
84
		The name of the time variable of the GroupBy object,
85
		default: "time".
86
	bins : numpy.ndarray
87
		The (latitudinal) bin edges, default: `np.r_[-90:91:5]`.
88
	labels : list or None
89
		The bin labels, if set to `None` (the default), the labels are
90
		set to the central bin values.
91
	area_weighted : bool
92
		Use area weighted averages, default: `True`.
93
	keep_attrs : bool
94
		Keep the global and variable attributes from the data set,
95
		default: `True`.
96
	load : bool
97
		Loads the data into memory before binning, speeds it up considerably
98
		provided that the it fits into memory, default: `True`
99
	save_progress : bool
100
		Saves the individual binned files to netcdf, to enable recovering from
101
		interrupted runs, default: `False`
102
103
	Returns
104
	-------
105
	ds : xarray.Dataset
106
		The binned and time-averaged dataset together with the (unbiased)
107
		standard deviations of the variables as `<variable>_std` and the
108
		number of averaged values as `<variable>_cnt`.
109
	"""
110 1
	if load:
111
		# load the chunk into memory to speed up binning
112 1
		ds.load()
113
	# adjust the time variable
114 1
	if np.issubdtype(ds[tvar].values[0], np.floating):
115
		# convert floats to datetime first (probably MLT states)
116 1
		try:
117
			# xarray <= 0.9.6
118 1
			date = (xr.conventions.decode_cf_variable(ds[tvar])
119
				.values[0]
120
				.astype("datetime64[D]"))
121 1
		except TypeError:
122
			# xarray => 0.10.0
123 1
			date = (xr.conventions.decode_cf_variable(tvar, ds[tvar])
124
				.values[0]
125
				.astype("datetime64[D]"))
126
	else:
127
		date = ds[tvar].values[0].astype('datetime64[D]')
128 1
	if not hasattr(ds, binvar):
129
		# nothing to bin
130
		logging.warn("skipping %s", date)
131
		return ds
132 1
	logging.info("processing %s", date)
133 1
	if labels is None:
134 1
		labels = 0.5 * (bins[1:] + bins[:-1])
135
	# stack and bin and delegate to the statistics helper function
136 1
	ds_out = (ds.stack(stacked_time_latitude=("time", "latitude"))
137
				.groupby_bins(binvar, bins, labels=labels)
138
				.apply(_bin_stats,
139
					binvar=binvar, tvar=tvar,
140
					area_weighted=area_weighted,
141
					set_attrs=keep_attrs))
142 1
	if keep_attrs:
143 1
		ds_out.attrs = ds.attrs
144 1
		for var in ds.data_vars:
145 1
			ds_out[var].attrs = ds[var].attrs
146 1
	if save_progress:
147
		ds_out.to_netcdf("tmp_binavg-{0}.nc".format(date))
148
	return ds_out
149