1
|
|
|
# -*- coding: utf-8 -*- |
2
|
|
|
# vim:fileencoding=utf-8 |
3
|
|
|
# |
4
|
|
|
# Copyright (c) 2017 Stefan Bender |
5
|
|
|
# |
6
|
|
|
# This file is part of sciapy. |
7
|
|
|
# sciapy is free software: you can redistribute it or modify it |
8
|
|
|
# under the terms of the GNU General Public License as published by |
9
|
|
|
# the Free Software Foundation, version 2. |
10
|
|
|
# See accompanying LICENSE file or http://www.gnu.org/licenses/gpl-2.0.html. |
11
|
1 |
|
"""SCIAMACHY level 2 binning |
12
|
|
|
""" |
13
|
|
|
|
14
|
1 |
|
__all__ = ["bin_lat_timeavg"] |
15
|
|
|
|
16
|
1 |
|
import logging |
17
|
|
|
|
18
|
1 |
|
import numpy as np |
19
|
1 |
|
import xarray as xr |
20
|
|
|
|
21
|
1 |
|
def _bin_stats(ds, |
22
|
|
|
binvar="latitude", tvar="time", |
23
|
|
|
area_weighted=True, stacked="stacked_time_latitude", |
24
|
|
|
set_attrs=True): |
25
|
|
|
"""Helper function for (weighted) bin statistics |
26
|
|
|
""" |
27
|
1 |
|
if not hasattr(ds, stacked): |
28
|
1 |
|
stacked = "stacked_{0}_{1}".format(tvar, binvar) |
29
|
1 |
|
ds = ds.stack(**{stacked: (tvar, binvar)}) |
30
|
1 |
|
_weights = (np.cos(np.radians(ds[binvar])) if area_weighted |
31
|
|
|
else xr.ones_like(ds[binvar])) |
32
|
|
|
# normalize weights (sum(weights) = 1) |
33
|
1 |
|
_weights /= _weights.sum(dim=stacked) |
34
|
1 |
|
_ssqw = (_weights**2).sum(dim=stacked) |
35
|
1 |
|
mean_ds = (ds * _weights).sum(dim=stacked) if area_weighted \ |
36
|
|
|
else ds.mean(dim=stacked) |
37
|
1 |
|
mean_ds["wsqsum"] = _ssqw |
38
|
1 |
|
mean_ds["wsqsum"].attrs = { |
39
|
|
|
"long_name": "sum of squared weights", |
40
|
|
|
"units": "1"} |
41
|
|
|
# unbiased standard deviations |
42
|
1 |
|
var_ds = ((_weights * (ds - mean_ds)**2).sum(dim=stacked) / |
43
|
|
|
(1. - _ssqw)) if area_weighted else ds.var(dim=stacked, ddof=1) |
44
|
1 |
|
sdev_ds = np.sqrt(var_ds) |
45
|
1 |
|
cnts_ds = ds.count(dim=stacked) |
46
|
1 |
|
if set_attrs: |
47
|
1 |
|
for var in ds.data_vars: |
48
|
1 |
|
sdev_ds[var].attrs = ds[var].attrs |
49
|
1 |
|
cnts_ds[var].attrs = ds[var].attrs |
50
|
1 |
|
cnts_ds[var].attrs.update({"units": "1"}) |
51
|
1 |
|
for key in ["long_name", "standard_name"]: |
52
|
1 |
|
try: |
53
|
1 |
|
sdev_ds[var].attrs.update({ |
54
|
|
|
key: ds[var].attrs[key] + " standard deviation"}) |
55
|
|
|
cnts_ds[var].attrs.update({ |
56
|
|
|
key: ds[var].attrs[key] + " counts"}) |
57
|
1 |
|
except KeyError: |
58
|
1 |
|
pass |
59
|
1 |
|
sdev_ds = sdev_ds.rename({v: v + "_std" for v in sdev_ds.data_vars}) |
60
|
1 |
|
cnts_ds = cnts_ds.rename({v: v + "_cnt" for v in cnts_ds.data_vars}) |
61
|
1 |
|
return xr.merge([mean_ds, sdev_ds, cnts_ds]) |
62
|
|
|
|
63
|
1 |
|
def bin_lat_timeavg(ds, binvar="latitude", tvar="time", |
64
|
|
|
bins=np.r_[-90:91:5], labels=None, area_weighted=True, |
65
|
|
|
keep_attrs=True, |
66
|
|
|
load=True, save_progress=False): |
67
|
|
|
"""Latitudinally bin and time average xarray dataset(s) |
68
|
|
|
|
69
|
|
|
Time-averages the variables in an :class:`xarray.Dataset` in the given |
70
|
|
|
latitude bins. This should be applied to daily-binned datasets from |
71
|
|
|
a groupby object (via .apply()) to yield daily zonal means. |
72
|
|
|
|
73
|
|
|
The type of latitudes is selected by passing the appropriate |
74
|
|
|
`binvar` and must be a variable in the data set. |
75
|
|
|
Area weighting (cos(latitude)) is also supported. |
76
|
|
|
|
77
|
|
|
Parameters |
78
|
|
|
---------- |
79
|
|
|
ds : xarray.Dataset or xarray.DatasetGroupBy instance |
80
|
|
|
The dataset (or GroupBy) instance to bin latitudinally. |
81
|
|
|
binvar : str |
82
|
|
|
The name of the variable used for binning, default: "latitude". |
83
|
|
|
tvar : str |
84
|
|
|
The name of the time variable of the GroupBy object, |
85
|
|
|
default: "time". |
86
|
|
|
bins : numpy.ndarray |
87
|
|
|
The (latitudinal) bin edges, default: `np.r_[-90:91:5]`. |
88
|
|
|
labels : list or None |
89
|
|
|
The bin labels, if set to `None` (the default), the labels are |
90
|
|
|
set to the central bin values. |
91
|
|
|
area_weighted : bool |
92
|
|
|
Use area weighted averages, default: `True`. |
93
|
|
|
keep_attrs : bool |
94
|
|
|
Keep the global and variable attributes from the data set, |
95
|
|
|
default: `True`. |
96
|
|
|
load : bool |
97
|
|
|
Loads the data into memory before binning, speeds it up considerably |
98
|
|
|
provided that the it fits into memory, default: `True` |
99
|
|
|
save_progress : bool |
100
|
|
|
Saves the individual binned files to netcdf, to enable recovering from |
101
|
|
|
interrupted runs, default: `False` |
102
|
|
|
|
103
|
|
|
Returns |
104
|
|
|
------- |
105
|
|
|
ds : xarray.Dataset |
106
|
|
|
The binned and time-averaged dataset together with the (unbiased) |
107
|
|
|
standard deviations of the variables as `<variable>_std` and the |
108
|
|
|
number of averaged values as `<variable>_cnt`. |
109
|
|
|
""" |
110
|
1 |
|
if load: |
111
|
|
|
# load the chunk into memory to speed up binning |
112
|
1 |
|
ds.load() |
113
|
|
|
# adjust the time variable |
114
|
1 |
|
if np.issubdtype(ds[tvar].values[0], np.floating): |
115
|
|
|
# convert floats to datetime first (probably MLT states) |
116
|
1 |
|
try: |
117
|
|
|
# xarray <= 0.9.6 |
118
|
1 |
|
date = (xr.conventions.decode_cf_variable(ds[tvar]) |
119
|
|
|
.values[0] |
120
|
|
|
.astype("datetime64[D]")) |
121
|
1 |
|
except TypeError: |
122
|
|
|
# xarray => 0.10.0 |
123
|
1 |
|
date = (xr.conventions.decode_cf_variable(tvar, ds[tvar]) |
124
|
|
|
.values[0] |
125
|
|
|
.astype("datetime64[D]")) |
126
|
|
|
else: |
127
|
|
|
date = ds[tvar].values[0].astype('datetime64[D]') |
128
|
1 |
|
if not hasattr(ds, binvar): |
129
|
|
|
# nothing to bin |
130
|
|
|
logging.warn("skipping %s", date) |
131
|
|
|
return ds |
132
|
1 |
|
logging.info("processing %s", date) |
133
|
1 |
|
if labels is None: |
134
|
1 |
|
labels = 0.5 * (bins[1:] + bins[:-1]) |
135
|
|
|
# stack and bin and delegate to the statistics helper function |
136
|
1 |
|
ds_out = (ds.stack(stacked_time_latitude=("time", "latitude")) |
137
|
|
|
.groupby_bins(binvar, bins, labels=labels) |
138
|
|
|
.apply(_bin_stats, |
139
|
|
|
binvar=binvar, tvar=tvar, |
140
|
|
|
area_weighted=area_weighted, |
141
|
|
|
set_attrs=keep_attrs)) |
142
|
1 |
|
if keep_attrs: |
143
|
1 |
|
ds_out.attrs = ds.attrs |
144
|
1 |
|
for var in ds.data_vars: |
145
|
1 |
|
ds_out[var].attrs = ds[var].attrs |
146
|
1 |
|
if save_progress: |
147
|
|
|
ds_out.to_netcdf("tmp_binavg-{0}.nc".format(date)) |
148
|
|
|
return ds_out |
149
|
|
|
|