| 1 |  |  | # -*- coding: utf-8 -*- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  | # vim:fileencoding=utf-8 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | # | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  | # Copyright (c) 2017-2018 Stefan Bender | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | # | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  | # This module is part of sciapy. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  | # sciapy is free software: you can redistribute it or modify | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  | # it under the terms of the GNU General Public License as published | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  | # by the Free Software Foundation, version 2. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  | # See accompanying LICENSE file or http://www.gnu.org/licenses/gpl-2.0.html. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 | 1 |  | """SCIAMACHY data interface | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  | Data loading and selection routines for SCIAMACHY data regression fits. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  | Includes reading the solar and geomagnetic index files used as proxies. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  | """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 | 1 |  | import logging | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 | 1 |  | import numpy as np | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 | 1 |  | import pandas as pd | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 | 1 |  | import xarray as xr | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 | 1 |  | from astropy.time import Time | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 | 1 |  | __all__ = ["load_solar_gm_table", "load_scia_dzm"] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 | 1 |  | _NPV_MAJOR, _NPV_MINOR, _NPV_PATCH = np.__version__.split('.') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 | 1 |  | _seasons = { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  | 	"summerNH": [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  | 		slice("2002-03-20", "2002-09-25"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  | 		slice("2003-03-20", "2003-10-13"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  | 		slice("2004-03-20", "2004-10-20"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  | 		slice("2005-03-20", "2005-10-20"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  | 		slice("2006-03-20", "2006-10-20"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  | 		slice("2007-03-21", "2007-10-20"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  | 		slice("2008-03-20", "2008-10-20"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  | 		slice("2009-03-20", "2009-10-20"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  | 		slice("2010-03-20", "2010-10-20"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  | 		slice("2011-03-20", "2011-08-31"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  | 		slice("2012-03-20", "2012-04-07"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  | 	], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  | 	"summerSH": [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  | 		slice("2002-09-13", "2003-03-26"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  | 		slice("2003-09-13", "2004-04-01"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  | 		slice("2004-09-14", "2005-04-02"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  | 		slice("2005-09-13", "2006-04-02"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  | 		slice("2006-09-13", "2007-04-02"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  | 		slice("2007-09-13", "2008-04-01"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  | 		slice("2008-09-14", "2009-04-02"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  | 		slice("2009-09-03", "2010-04-02"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  | 		slice("2010-09-13", "2011-04-02"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  | 		slice("2011-09-13", "2012-04-01"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  | 	] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  | } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 | 1 |  | _SPEs = [pd.date_range("2002-04-20", "2002-05-01"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  | 		pd.date_range("2002-05-21", "2002-06-02"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  | 		pd.date_range("2002-07-15", "2002-07-27"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  | 		pd.date_range("2002-08-23", "2002-09-03"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  | 		pd.date_range("2002-09-06", "2002-09-17"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  | 		pd.date_range("2002-11-08", "2002-11-20"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  | 		pd.date_range("2003-05-27", "2003-06-07"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  | 		pd.date_range("2003-10-25", "2003-11-15"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  | 		pd.date_range("2004-07-24", "2004-08-05"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  | 		pd.date_range("2004-11-06", "2004-11-18"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  | 		pd.date_range("2005-01-15", "2005-01-27"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  | 		pd.date_range("2005-05-13", "2005-05-25"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  | 		pd.date_range("2005-07-13", "2005-08-05"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  | 		pd.date_range("2005-08-21", "2005-09-02"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  | 		pd.date_range("2005-09-07", "2005-09-21"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  | 		pd.date_range("2006-12-05", "2006-12-23"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  | 		pd.date_range("2012-01-22", "2012-02-07"), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  | 		pd.date_range("2012-03-06", "2012-03-27")] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |  | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 76 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 77 | 1 |  | def load_dailymeanAE( | 
            
                                                                        
                            
            
                                    
            
            
                | 78 |  |  | 		filename="data/indices/AE_Kyoto_1980-2018_daily2_shift12h.dat", | 
            
                                                                        
                            
            
                                    
            
            
                | 79 |  |  | 		name="AE", tfmt="jyear"): | 
            
                                                                        
                            
            
                                    
            
            
                | 80 | 1 |  | 	from pkg_resources import resource_filename | 
            
                                                                        
                            
            
                                    
            
            
                | 81 | 1 |  | 	AEfilename = resource_filename("sciapy", filename) | 
            
                                                                        
                            
            
                                    
            
            
                | 82 | 1 |  | 	return load_solar_gm_table(AEfilename, cols=[0, 1], | 
            
                                                                        
                            
            
                                    
            
            
                | 83 |  |  | 			names=["time", name], tfmt=tfmt) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 | 1 |  | def load_dailymeanLya(filename="data/indices/lisird_lya3_1980-2021.dat", | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  | 		name="Lya", tfmt="jyear"): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 | 1 |  | 	from pkg_resources import resource_filename | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 | 1 |  | 	Lyafilename = resource_filename("sciapy", filename) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 | 1 |  | 	return load_solar_gm_table(Lyafilename, cols=[0, 1], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  | 			names=["time", name], tfmt=tfmt) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 | 1 |  | def load_solar_gm_table(filename, cols, names, sep="\t", tfmt="jyear"): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  | 	"""Load proxy tables from ascii files | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  | 	This function wraps :func:`numpy.genfromtxt()` [#]_ with | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  | 	pre-defined settings to match the file format. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  | 	It explicitly returns the times as UTC decimal years or julian epochs. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  | 	.. [#] https://docs.scipy.org/doc/numpy/reference/generated/numpy.genfromtxt.html | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  | 	Parameters | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  | 	---------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  | 	filename: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  | 		The file to read | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  | 	cols: sequence | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  | 		The columns in the file to use, passed to `numpy.genfromtxt`'s | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  | 		`usecols` keyword. Should be at least of length 2, indicating | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  | 		time values in the first column, e.g., `cols=[0, 1]`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  | 	names: sequence | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  | 		The column names, passed as `names` to `numpy.genfromtxt()`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  | 		Should be at least of length 2, naming the proxy values in the | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  | 		second column, e.g., `names=["time", "proxy"]`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  | 	sep: str, optional | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  | 		The column separator character, passed as `sep`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  | 		Default: `\t` | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  | 	tfmt: str, optional | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  | 		The astropy.time "Time Format" for the time units, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  | 		for example, "jyear", "decimalyear", "jd", "mjd", etc. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  | 		See: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  | 		http://docs.astropy.org/en/stable/time/index.html#time-format | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  | 		Default: "jyear" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  | 	Returns | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  | 	------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  | 	times: array_like | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 |  |  | 		The proxy times according to the `tfmt` keyword (UTC) as | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 |  |  | 		:class:`numpy.ndarray` from the first column of "cols". | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  | 	values: tuple of array_like | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  | 		The proxy values combined into a structured array | 
            
                                                                                                            
                            
            
                                    
            
            
                | 132 |  |  | 		(:class:`numpy.recarray`) with fields set | 
            
                                                                                                            
                            
            
                                    
            
            
                | 133 |  |  | 		according to "names[1:]" and "cols[1:]" passed above. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 134 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 135 |  |  | 	See Also | 
            
                                                                                                            
                            
            
                                    
            
            
                | 136 |  |  | 	-------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 137 |  |  | 	:class:`astropy.time.Time` | 
            
                                                                                                            
                            
            
                                    
            
            
                | 138 |  |  | 	:class:`numpy.recarray` | 
            
                                                                                                            
                            
            
                                    
            
            
                | 139 |  |  | 	""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 140 | 1 |  | 	if len(cols) < 2 and len(names) < 2: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 141 |  |  | 		raise ValueError( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 142 |  |  | 				"Both `cols` and `names` should be at least of length 2.") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 143 | 1 |  | 	encoding = {} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 144 |  |  | 	# set the encoding for numpy >= 1.14.0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 145 | 1 |  | 	if int(_NPV_MAJOR) >= 1 and int(_NPV_MINOR) >= 14: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 146 | 1 |  | 		encoding = dict(encoding=None) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 147 | 1 |  | 	tab = np.genfromtxt(filename, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 148 |  |  | 			delimiter=sep, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 149 |  |  | 			dtype=None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 150 |  |  | 			names=names, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 151 |  |  | 			usecols=cols, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 152 |  |  | 			**encoding) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 153 | 1 |  | 	times = Time(tab[names[0]], scale="utc") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 154 | 1 |  | 	ts = getattr(times, tfmt) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 155 | 1 |  | 	return ts, tab[names[1:]] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 156 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 157 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 158 | 1 |  | def _greedy_select(ds, size, varname="NO_DENS_std", scale=1.): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 159 |  |  | 	logging.info("Greedy subsampling to size: %s", size) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 160 |  |  | 	var = np.ma.masked_array((ds[varname] * scale)**2, mask=[False]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 161 |  |  | 	var_p = np.ma.masked_array((ds[varname] * scale)**2, mask=[False]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 162 |  |  | 	sigma2_i = scale**2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 163 |  |  | 	sigma2_ip = scale**2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 164 |  |  | 	idxs = np.arange(len(var)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 165 |  |  | 	for _ in range(size): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 166 |  |  | 		max_entr_idx = np.ma.argmax(np.log(1. + var * sigma2_i)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 167 |  |  | 		min_entr_idx = np.ma.argmin(np.log(1. + var_p * sigma2_ip)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 168 |  |  | 		sigma2_i += 1. / var[max_entr_idx] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 169 |  |  | 		sigma2_ip += 1. / var_p[min_entr_idx] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 170 |  |  | 		var.mask[max_entr_idx] = True | 
            
                                                                                                            
                            
            
                                    
            
            
                | 171 |  |  | 		var_p.mask[max_entr_idx] = True | 
            
                                                                                                            
                            
            
                                    
            
            
                | 172 |  |  | 	return ds.isel(time=idxs[var.mask]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 173 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 174 | 1 |  | def _greedy_idxs_post(x, xerr, size): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 175 |  |  | 	logging.info("Greedy subsampling to size: %s", size) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 176 |  |  | 	var = np.ma.masked_array(xerr**2, mask=[False]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 177 |  |  | 	var_p = np.ma.masked_array(xerr**2, mask=[False]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 178 |  |  | 	sigma2_i = 1. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 179 |  |  | 	sigma2_ip = 1. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 180 |  |  | 	idxs = np.arange(len(var)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 181 |  |  | 	for _ in range(size): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 182 |  |  | 		max_entr_idx = np.ma.argmax(np.log(1. + var * sigma2_i)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 183 |  |  | 		min_entr_idx = np.ma.argmin(np.log(1. + var_p * sigma2_ip)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 184 |  |  | 		sigma2_i += 1. / var[max_entr_idx] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 185 |  |  | 		sigma2_ip += 1. / var_p[min_entr_idx] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 186 |  |  | 		var.mask[max_entr_idx] = True | 
            
                                                                                                            
                            
            
                                    
            
            
                | 187 |  |  | 		var_p.mask[max_entr_idx] = True | 
            
                                                                                                            
                            
            
                                    
            
            
                | 188 |  |  | 	return idxs[var.mask] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 189 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 190 | 1 |  | def load_scia_dzm(filename, alt, lat, tfmt="jyear", | 
            
                                                                                                            
                            
            
                                    
            
            
                | 191 |  |  | 		scale=1, subsample_factor=1, subsample_method="greedy", | 
            
                                                                                                            
                            
            
                                    
            
            
                | 192 |  |  | 		akd_threshold=0.002, cnt_threshold=0, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 193 |  |  | 		center=False, season=None, SPEs=False): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 194 |  |  | 	"""Load SCIAMACHY daily zonal mean data | 
            
                                                                                                            
                            
            
                                    
            
            
                | 195 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 196 |  |  | 	Interface function for SCIAMACHY daily zonal mean data files version 6.x. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 197 |  |  | 	Uses :mod:`xarray` [#]_ to load and select the data. Possible selections are by | 
            
                                                                                                            
                            
            
                                    
            
            
                | 198 |  |  | 	hemispheric summer (NH summer ~ SH winter and SH summer ~ NH winter) and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 199 |  |  | 	exclusion of strong solar proton events (SPE). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 200 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 201 |  |  | 	.. [#] https://xarray.pydata.org | 
            
                                                                                                            
                            
            
                                    
            
            
                | 202 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 203 |  |  | 	Parameters | 
            
                                                                                                            
                            
            
                                    
            
            
                | 204 |  |  | 	---------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 205 |  |  | 	filename: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 206 |  |  | 		The input filename | 
            
                                                                                                            
                            
            
                                    
            
            
                | 207 |  |  | 	alt: float | 
            
                                                                                                            
                            
            
                                    
            
            
                | 208 |  |  | 		The altitude | 
            
                                                                                                            
                            
            
                                    
            
            
                | 209 |  |  | 	lat: float | 
            
                                                                                                            
                            
            
                                    
            
            
                | 210 |  |  | 		The longitude | 
            
                                                                                                            
                            
            
                                    
            
            
                | 211 |  |  | 	tfmt: string, optional | 
            
                                                                                                            
                            
            
                                    
            
            
                | 212 |  |  | 		The astropy.time "Time Format" for the time units, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 213 |  |  | 		for example, "jyear", "decimalyear", "jd", "mjd", etc. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 214 |  |  | 		See: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 215 |  |  | 		http://docs.astropy.org/en/stable/time/index.html#time-format | 
            
                                                                                                            
                            
            
                                    
            
            
                | 216 |  |  | 		Default: "jyear" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 217 |  |  | 	scale: float, optional | 
            
                                                                                                            
                            
            
                                    
            
            
                | 218 |  |  | 		Scale factor of the data (default: 1) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 219 |  |  | 	subsample_factor: int, optional | 
            
                                                                                                            
                            
            
                                    
            
            
                | 220 |  |  | 		Factor to subsample the data by (see `subsample_method`) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 221 |  |  | 		(default: 1 (no subsampling)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 222 |  |  | 	subsample_method: "equal", "greedy", or "random", optional | 
            
                                                                                                            
                            
            
                                    
            
            
                | 223 |  |  | 		Method for subsampling the data (see `subsample_factor`). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 224 |  |  | 		"equal" for equally spaced subsampling, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 225 |  |  | 		"greedy" for selecting the data based on their uncertainty, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 226 |  |  | 		and "random" for uniform random subsampling. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 227 |  |  | 		(default: "greedy") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 228 |  |  | 	center: bool, optional | 
            
                                                                                                            
                            
            
                                    
            
            
                | 229 |  |  | 		Center the data by subtracting the global mean. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 230 |  |  | 		(default: False) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 231 |  |  | 	season: "summerNH", "summerSH", or `None`, optional | 
            
                                                                                                            
                            
            
                                    
            
            
                | 232 |  |  | 		Select the named season or `None` for all data (default: None) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 233 |  |  | 	SPEs: bool, optional | 
            
                                                                                                            
                            
            
                                    
            
            
                | 234 |  |  | 		Set to `True` to exclude strong SPE events (default: False) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 235 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 236 |  |  | 	Returns | 
            
                                                                                                            
                            
            
                                    
            
            
                | 237 |  |  | 	------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 238 |  |  | 	(times, dens, errs): tuple of (N,) array_like | 
            
                                                                                                            
                            
            
                                    
            
            
                | 239 |  |  | 		The measurement times according to the `tfmt` keyword, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 240 |  |  | 		the number densities, and their uncertainties. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 241 |  |  | 	""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 242 | 1 |  | 	logging.info("Opening dataset: '%s'", filename) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 243 | 1 |  | 	NO_ds = xr.open_mfdataset(filename, decode_times=False, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 244 |  |  | 				chunks={"time": 400, "latitude": 9, "altitude": 11}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 245 | 1 |  | 	logging.info("done.") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 246 |  |  | 	# Decode time coordinate for selection | 
            
                                                                                                            
                            
            
                                    
            
            
                | 247 | 1 |  | 	NO_ds["time"] = xr.conventions.decode_cf(NO_ds[["time"]]).time | 
            
                                                                                                            
                            
            
                                    
            
            
                | 248 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 249 | 1 |  | 	NO_mean = 0. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 250 | 1 |  | 	if center: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 251 | 1 |  | 		NO_mean = NO_ds.NO_DENS.mean().values | 
            
                                                                                                            
                            
            
                                    
            
            
                | 252 | 1 |  | 		logging.info("Centering with global mean: %s", NO_mean) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 253 | 1 |  | 	NO_tds = NO_ds.sel(latitude=lat, altitude=alt) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 254 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 255 |  |  | 	# Exclude SPEs first if requested | 
            
                                                                                                            
                            
            
                                    
            
            
                | 256 | 1 |  | 	if SPEs: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 257 | 1 |  | 		logging.info("Removing SPEs.") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 258 | 1 |  | 		for spe in _SPEs: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 259 | 1 |  | 			NO_tds = NO_tds.drop( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 260 |  |  | 					NO_tds.sel(time=slice(spe[0], spe[-1])).time.values, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 261 |  |  | 					dim="time") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 262 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 263 |  |  | 	# Filter by season | 
            
                                                                                                            
                            
            
                                    
            
            
                | 264 | 1 |  | 	if season in _seasons.keys(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 265 | 1 |  | 		logging.info("Restricting to season: %s", season) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 266 | 1 |  | 		NO_tds = xr.concat([NO_tds.sel(time=s) for s in _seasons[season]], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 267 |  |  | 					dim="time") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 268 | 1 |  | 		NO_tds.load() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 269 |  |  | 	else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 270 | 1 |  | 		logging.info("No season selected or unknown season, " | 
            
                                                                                                            
                            
            
                                    
            
            
                | 271 |  |  | 					"using all available data.") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 272 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 273 | 1 |  | 	try: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 274 | 1 |  | 		NO_counts = NO_tds.NO_DENS_cnt | 
            
                                                                                                            
                            
            
                                    
            
            
                | 275 |  |  | 	except AttributeError: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 276 |  |  | 		NO_counts = NO_tds.counts | 
            
                                                                                                            
                            
            
                                    
            
            
                | 277 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 278 |  |  | 	# Select only useful data | 
            
                                                                                                            
                            
            
                                    
            
            
                | 279 | 1 |  | 	NO_tds = NO_tds.where( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 280 |  |  | 				np.isfinite(NO_tds.NO_DENS) & | 
            
                                                                                                            
                            
            
                                    
            
            
                | 281 |  |  | 				(NO_tds.NO_DENS_std != 0) & | 
            
                                                                                                            
                            
            
                                    
            
            
                | 282 |  |  | 				(NO_tds.NO_AKDIAG > akd_threshold) & | 
            
                                                                                                            
                            
            
                                    
            
            
                | 283 |  |  | 				(NO_counts > cnt_threshold) & | 
            
                                                                                                            
                            
            
                                    
            
            
                | 284 |  |  | 				(NO_tds.NO_MASK == 0), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 285 |  |  | 				drop=True) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 286 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 287 | 1 |  | 	no_dens = scale * NO_tds.NO_DENS | 
            
                                                                                                            
                            
            
                                    
            
            
                | 288 | 1 |  | 	if center: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 289 | 1 |  | 		no_dens -= scale * NO_mean | 
            
                                                                                                            
                            
            
                                    
            
            
                | 290 | 1 |  | 	no_errs = scale * NO_tds.NO_DENS_std / np.sqrt(NO_counts) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 291 | 1 |  | 	logging.debug("no_dens.shape (ntime,): %s", no_dens.shape) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 292 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 293 | 1 |  | 	no_sza = NO_tds.mean_SZA | 
            
                                                                                                            
                            
            
                                    
            
            
                | 294 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 295 |  |  | 	# Convert to astropy.Time for Julian epoch or decimal year | 
            
                                                                                                            
                            
            
                                    
            
            
                | 296 | 1 |  | 	if NO_tds.time.size > 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 297 | 1 |  | 		no_t = Time(pd.to_datetime(NO_tds.time.values, utc=True).to_pydatetime(), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 298 |  |  | 					format="datetime", scale="utc") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 299 | 1 |  | 		no_ys = getattr(no_t, tfmt) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 300 |  |  | 	else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 301 | 1 |  | 		no_ys = np.empty_like(NO_tds.time.values, dtype=np.float64) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 302 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 303 | 1 |  | 	if subsample_factor > 1: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 304 |  |  | 		new_data_size = no_dens.shape[0] // subsample_factor | 
            
                                                                                                            
                            
            
                                    
            
            
                | 305 |  |  | 		if subsample_method == "random": | 
            
                                                                                                            
                            
            
                                    
            
            
                | 306 |  |  | 			# random subsampling | 
            
                                                                                                            
                            
            
                                    
            
            
                | 307 |  |  | 			_idxs = np.random.choice(no_dens.shape[0], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 308 |  |  | 					new_data_size, replace=False) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 309 |  |  | 		elif subsample_method == "equal": | 
            
                                                                                                            
                            
            
                                    
            
            
                | 310 |  |  | 			# equally spaced subsampling | 
            
                                                                                                            
                            
            
                                    
            
            
                | 311 |  |  | 			_idxs = slice(0, no_dens.shape[0], subsample_factor) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 312 |  |  | 		else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 313 |  |  | 			# "greedy" subsampling (default fall-back) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 314 |  |  | 			_idxs = _greedy_idxs_post(no_dens, no_errs, new_data_size) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 315 |  |  | 		return (no_ys[_idxs], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 316 |  |  | 				no_dens.values[_idxs], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 317 |  |  | 				no_errs.values[_idxs], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 318 |  |  | 				no_sza.values[_idxs]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 319 |  |  |  | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 320 |  |  | 	return no_ys, no_dens.values, no_errs.values, no_sza.values | 
            
                                                        
            
                                    
            
            
                | 321 |  |  |  |