Conditions | 9 |
Total Lines | 60 |
Code Lines | 53 |
Lines | 0 |
Ratio | 0 % |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
1 | #!/usr/bin/env python |
||
108 | def main(): |
||
109 | args = get_cl_arguments() |
||
110 | nb_docs = args.sample |
||
111 | if nb_docs != 'all': |
||
112 | nb_docs = int(nb_docs) |
||
113 | collections_dir = os.getenv('COLLECTIONS_DIR') |
||
114 | if not collections_dir: |
||
115 | raise RuntimeError( |
||
116 | "Please set the COLLECTIONS_DIR environment variable with the path to a directory containing collections/datasets") |
||
117 | |||
118 | ph = PipeHandler() |
||
119 | ph.process(args.config, args.category, sample=nb_docs, verbose=True) |
||
120 | political_spectrum.datapoint_ids = ph.outlet_ids |
||
121 | |||
122 | while 1: |
||
123 | try: |
||
124 | scheme = ask_discreetization(political_spectrum, ph, pool_size=100, prob=0.3, max_generation=100) |
||
125 | except KeyboardInterrupt: |
||
126 | print("Exiting ..") |
||
127 | sys.exit(0) |
||
128 | print("Scheme with classes: [{}]".format(' '.join(x for x, _ in scheme))) |
||
129 | try: |
||
130 | political_spectrum.discreetization_scheme = scheme |
||
131 | except ValueError as e: |
||
132 | raise ValueError("{}. {}".format(e, type(scheme).__name__)) |
||
133 | |||
134 | print("Scheme [{}] with resulting distribution [{}]".format(' '.join(political_spectrum.class_names), ', '.join( |
||
135 | '{:.2f}'.format(x) for x in political_spectrum.class_distribution))) |
||
136 | print("Bins: {}".format(' '.join('[{}]'.format(', '.join(class_bin) for _, class_bin in scheme)))) |
||
137 | while 1: |
||
138 | answer = what_to_do() |
||
139 | if answer == 'back': |
||
140 | break |
||
141 | if answer == 'Evolve more': |
||
142 | evolution_specs = ask_evolution_specs() |
||
143 | print("Evolving discreetization scheme ..") |
||
144 | scheme = political_spectrum.evolve(int(evolution_specs['nb-generations']), |
||
145 | prob=float(evolution_specs['probability'])) |
||
146 | political_spectrum.discreetization_scheme = scheme |
||
147 | print("Scheme [{}] with resulting distribution [{}]".format(' '.join(scheme.class_names), |
||
148 | ', '.join('{:.2f}'.format(x) for x in |
||
149 | political_spectrum.class_distribution))) |
||
150 | print("Bins: {}".format( |
||
151 | ' '.join('[{}]'.format(', '.join(outlet for outlet in class_bin) for _, class_bin in scheme)))) |
||
152 | else: |
||
153 | uci_dt = ph.persist(os.path.join(collections_dir, args.collection), |
||
154 | political_spectrum.poster_id2ideology_label, political_spectrum.class_names, |
||
155 | add_class_labels_to_vocab=not args.exclude_class_labels_from_vocab) |
||
156 | print(uci_dt) |
||
157 | print("Discreetization scheme\n{}".format(political_spectrum.discreetization_scheme)) |
||
158 | |||
159 | # print("Add the below to the DISCREETIZATION_SCHEMES_HASH") |
||
160 | # print("[{}]".format()) |
||
161 | print('\nBuilding coocurences information') |
||
162 | coherence_builder = CoherenceFilesBuilder(os.path.join(collections_dir, args.collection)) |
||
163 | coherence_builder.create_files(cooc_window=args.window, |
||
164 | min_tf=args.min_tf, |
||
165 | min_df=args.min_df, |
||
166 | apply_zero_index=False) |
||
167 | sys.exit(0) |
||
168 | |||
185 |