Conditions | 11 |
Total Lines | 131 |
Code Lines | 61 |
Lines | 0 |
Ratio | 0 % |
Tests | 34 |
CRAP Score | 12.1094 |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like sciapy.regress.load_data.load_scia_dzm() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.
There are several approaches to avoid long parameter lists:
1 | # -*- coding: utf-8 -*- |
||
190 | 1 | def load_scia_dzm(filename, alt, lat, tfmt="jyear", |
|
191 | scale=1, subsample_factor=1, subsample_method="greedy", |
||
192 | akd_threshold=0.002, cnt_threshold=0, |
||
193 | center=False, season=None, SPEs=False): |
||
194 | """Load SCIAMACHY daily zonal mean data |
||
195 | |||
196 | Interface function for SCIAMACHY daily zonal mean data files version 6.x. |
||
197 | Uses :mod:`xarray` [#]_ to load and select the data. Possible selections are by |
||
198 | hemispheric summer (NH summer ~ SH winter and SH summer ~ NH winter) and |
||
199 | exclusion of strong solar proton events (SPE). |
||
200 | |||
201 | .. [#] https://xarray.pydata.org |
||
202 | |||
203 | Parameters |
||
204 | ---------- |
||
205 | filename: str |
||
206 | The input filename |
||
207 | alt: float |
||
208 | The altitude |
||
209 | lat: float |
||
210 | The longitude |
||
211 | tfmt: string, optional |
||
212 | The astropy.time "Time Format" for the time units, |
||
213 | for example, "jyear", "decimalyear", "jd", "mjd", etc. |
||
214 | See: |
||
215 | http://docs.astropy.org/en/stable/time/index.html#time-format |
||
216 | Default: "jyear" |
||
217 | scale: float, optional |
||
218 | Scale factor of the data (default: 1) |
||
219 | subsample_factor: int, optional |
||
220 | Factor to subsample the data by (see `subsample_method`) |
||
221 | (default: 1 (no subsampling)) |
||
222 | subsample_method: "equal", "greedy", or "random", optional |
||
223 | Method for subsampling the data (see `subsample_factor`). |
||
224 | "equal" for equally spaced subsampling, |
||
225 | "greedy" for selecting the data based on their uncertainty, |
||
226 | and "random" for uniform random subsampling. |
||
227 | (default: "greedy") |
||
228 | center: bool, optional |
||
229 | Center the data by subtracting the global mean. |
||
230 | (default: False) |
||
231 | season: "summerNH", "summerSH", or `None`, optional |
||
232 | Select the named season or `None` for all data (default: None) |
||
233 | SPEs: bool, optional |
||
234 | Set to `True` to exclude strong SPE events (default: False) |
||
235 | |||
236 | Returns |
||
237 | ------- |
||
238 | (times, dens, errs): tuple of (N,) array_like |
||
239 | The measurement times according to the `tfmt` keyword, |
||
240 | the number densities, and their uncertainties. |
||
241 | """ |
||
242 | 1 | logging.info("Opening dataset: '%s'", filename) |
|
243 | 1 | NO_ds = xr.open_mfdataset(filename, decode_times=False, |
|
244 | chunks={"time": 400, "latitude": 9, "altitude": 11}) |
||
245 | 1 | logging.info("done.") |
|
246 | # Decode time coordinate for selection |
||
247 | 1 | NO_ds["time"] = xr.conventions.decode_cf(NO_ds[["time"]]).time |
|
248 | |||
249 | 1 | NO_mean = 0. |
|
250 | 1 | if center: |
|
251 | 1 | NO_mean = NO_ds.NO_DENS.mean().values |
|
252 | 1 | logging.info("Centering with global mean: %s", NO_mean) |
|
253 | 1 | NO_tds = NO_ds.sel(latitude=lat, altitude=alt) |
|
254 | |||
255 | # Exclude SPEs first if requested |
||
256 | 1 | if SPEs: |
|
257 | 1 | logging.info("Removing SPEs.") |
|
258 | 1 | for spe in _SPEs: |
|
259 | 1 | NO_tds = NO_tds.drop( |
|
260 | NO_tds.sel(time=slice(spe[0], spe[-1])).time.values, |
||
261 | dim="time") |
||
262 | |||
263 | # Filter by season |
||
264 | 1 | if season in _seasons.keys(): |
|
265 | 1 | logging.info("Restricting to season: %s", season) |
|
266 | 1 | NO_tds = xr.concat([NO_tds.sel(time=s) for s in _seasons[season]], |
|
267 | dim="time") |
||
268 | 1 | NO_tds.load() |
|
269 | else: |
||
270 | 1 | logging.info("No season selected or unknown season, " |
|
271 | "using all available data.") |
||
272 | |||
273 | 1 | try: |
|
274 | 1 | NO_counts = NO_tds.NO_DENS_cnt |
|
275 | except AttributeError: |
||
276 | NO_counts = NO_tds.counts |
||
277 | |||
278 | # Select only useful data |
||
279 | 1 | NO_tds = NO_tds.where( |
|
280 | np.isfinite(NO_tds.NO_DENS) & |
||
281 | (NO_tds.NO_DENS_std != 0) & |
||
282 | (NO_tds.NO_AKDIAG > akd_threshold) & |
||
283 | (NO_counts > cnt_threshold) & |
||
284 | (NO_tds.NO_MASK == 0), |
||
285 | drop=True) |
||
286 | |||
287 | 1 | no_dens = scale * NO_tds.NO_DENS |
|
288 | 1 | if center: |
|
289 | 1 | no_dens -= scale * NO_mean |
|
290 | 1 | no_errs = scale * NO_tds.NO_DENS_std / np.sqrt(NO_counts) |
|
291 | 1 | logging.debug("no_dens.shape (ntime,): %s", no_dens.shape) |
|
292 | |||
293 | 1 | no_sza = NO_tds.mean_SZA |
|
294 | |||
295 | # Convert to astropy.Time for Julian epoch or decimal year |
||
296 | 1 | if NO_tds.time.size > 0: |
|
297 | 1 | no_t = Time(pd.to_datetime(NO_tds.time.values, utc=True).to_pydatetime(), |
|
298 | format="datetime", scale="utc") |
||
299 | 1 | no_ys = getattr(no_t, tfmt) |
|
300 | else: |
||
301 | 1 | no_ys = np.empty_like(NO_tds.time.values, dtype=np.float64) |
|
302 | |||
303 | 1 | if subsample_factor > 1: |
|
304 | new_data_size = no_dens.shape[0] // subsample_factor |
||
305 | if subsample_method == "random": |
||
306 | # random subsampling |
||
307 | _idxs = np.random.choice(no_dens.shape[0], |
||
308 | new_data_size, replace=False) |
||
309 | elif subsample_method == "equal": |
||
310 | # equally spaced subsampling |
||
311 | _idxs = slice(0, no_dens.shape[0], subsample_factor) |
||
312 | else: |
||
313 | # "greedy" subsampling (default fall-back) |
||
314 | _idxs = _greedy_idxs_post(no_dens, no_errs, new_data_size) |
||
315 | return (no_ys[_idxs], |
||
316 | no_dens.values[_idxs], |
||
317 | no_errs.values[_idxs], |
||
318 | no_sza.values[_idxs]) |
||
319 | |||
320 | return no_ys, no_dens.values, no_errs.values, no_sza.values |
||
321 |