Conditions | 24 |
Total Lines | 107 |
Lines | 0 |
Ratio | 0 % |
Changes | 4 | ||
Bugs | 1 | Features | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like join_results() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | #!/usr/bin/env python |
||
121 | def join_results(output_filename, result_filenames, model_filename=None, |
||
122 | from_filename=False, clobber=False, errors=False, cov=False, **kwargs): |
||
123 | """ |
||
124 | Join the test results from multiple files into a single table file. |
||
125 | """ |
||
126 | |||
127 | import AnniesLasso as tc |
||
128 | from astropy.table import Table, TableColumns |
||
129 | |||
130 | meta_keys = kwargs.pop("meta_keys", {}) |
||
131 | meta_keys.update({ |
||
132 | "chi_sq": nan, |
||
133 | "r_chi_sq": nan, |
||
134 | "snr": nan, |
||
135 | # "redshift": nan, |
||
136 | }) |
||
137 | |||
138 | logger = logging.getLogger("AnniesLasso") |
||
139 | |||
140 | # Does the output filename already exist? |
||
141 | if os.path.exists(output_filename) and not clobber: |
||
142 | logger.info("Output filename {} already exists and not clobbering."\ |
||
143 | .format(output_filename)) |
||
144 | return None |
||
145 | |||
146 | if from_filename: |
||
147 | with open(result_filenames[0], "r") as fp: |
||
148 | _ = list(map(str.strip, fp.readlines())) |
||
149 | result_filenames = _ |
||
150 | |||
151 | # We might need the label names from the model. |
||
152 | if model_filename is not None: |
||
153 | model = tc.load_model(model_filename) |
||
154 | assert model.is_trained |
||
155 | label_names = model.vectorizer.label_names |
||
156 | logger.warn( |
||
157 | "Results produced from newer models do not need a model_filename "\ |
||
158 | "to be specified when joining results.") |
||
159 | |||
160 | else: |
||
161 | with open(result_filenames[0], "rb") as fp: |
||
162 | contents = pickle.load(fp) |
||
163 | if "label_names" not in contents[-1]: |
||
164 | raise ValueError( |
||
165 | "cannot find label names; please provide the model used "\ |
||
166 | "to produce these results") |
||
167 | label_names = contents[-1]["label_names"] |
||
168 | |||
169 | |||
170 | # Load results from each file. |
||
171 | failed = [] |
||
172 | N = len(result_filenames) |
||
173 | |||
174 | # Create an ordered dictionary of lists for all the data. |
||
175 | data_dict = OrderedDict([("FILENAME", [])]) |
||
176 | for label_name in label_names: |
||
177 | data_dict[label_name] = [] |
||
178 | |||
179 | if errors: |
||
180 | for label_name in label_names: |
||
181 | data_dict["E_{}".format(label_name)] = [] |
||
182 | |||
183 | if cov: |
||
184 | data_dict["COV"] = [] |
||
185 | |||
186 | for key in meta_keys: |
||
187 | data_dict[key] = [] |
||
188 | |||
189 | # Iterate over all the result filenames |
||
190 | for i, filename in enumerate(result_filenames): |
||
191 | logger.info("{}/{}: {}".format(i + 1, N, filename)) |
||
192 | |||
193 | if not os.path.exists(filename): |
||
194 | logger.warn("Path {} does not exist. Continuing..".format(filename)) |
||
195 | failed.append(filename) |
||
196 | continue |
||
197 | |||
198 | with open(filename, "rb") as fp: |
||
199 | contents = pickle.load(fp) |
||
200 | |||
201 | assert len(contents) == 3, "You are using some old school version!" |
||
202 | |||
203 | labels, Sigma, meta = contents |
||
204 | |||
205 | if Sigma is None: |
||
206 | Sigma = nan * ones((labels.size, labels.size)) |
||
207 | |||
208 | result = [filename] + list(labels) |
||
209 | if errors: |
||
210 | result.extend(diag(Sigma)**0.5) |
||
211 | if cov: |
||
212 | result.append(Sigma) |
||
213 | result += [meta.get(k, v) for k, v in meta_keys.items()] |
||
214 | |||
215 | for key, value in zip(data_dict.keys(), result): |
||
216 | data_dict[key].append(value) |
||
217 | |||
218 | # Warn of any failures. |
||
219 | if failed: |
||
220 | logger.warn( |
||
221 | "The following {} result file(s) could not be found: \n{}".format( |
||
222 | len(failed), "\n".join(failed))) |
||
223 | |||
224 | # Construct the table. |
||
225 | table = Table(TableColumns(data_dict)) |
||
226 | table.write(output_filename, overwrite=clobber) |
||
227 | logger.info("Written to {}".format(output_filename)) |
||
228 | |||
318 |