| Conditions | 9 |
| Total Lines | 60 |
| Code Lines | 53 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | #!/usr/bin/env python |
||
| 108 | def main(): |
||
| 109 | args = get_cl_arguments() |
||
| 110 | nb_docs = args.sample |
||
| 111 | if nb_docs != 'all': |
||
| 112 | nb_docs = int(nb_docs) |
||
| 113 | collections_dir = os.getenv('COLLECTIONS_DIR') |
||
| 114 | if not collections_dir: |
||
| 115 | raise RuntimeError( |
||
| 116 | "Please set the COLLECTIONS_DIR environment variable with the path to a directory containing collections/datasets") |
||
| 117 | |||
| 118 | ph = PipeHandler() |
||
| 119 | ph.process(args.config, args.category, sample=nb_docs, verbose=True) |
||
| 120 | political_spectrum.datapoint_ids = ph.outlet_ids |
||
| 121 | |||
| 122 | while 1: |
||
| 123 | try: |
||
| 124 | scheme = ask_discreetization(political_spectrum, ph, pool_size=100, prob=0.3, max_generation=100) |
||
| 125 | except KeyboardInterrupt: |
||
| 126 | print("Exiting ..") |
||
| 127 | sys.exit(0) |
||
| 128 | print("Scheme with classes: [{}]".format(' '.join(x for x, _ in scheme))) |
||
| 129 | try: |
||
| 130 | political_spectrum.discreetization_scheme = scheme |
||
| 131 | except ValueError as e: |
||
| 132 | raise ValueError("{}. {}".format(e, type(scheme).__name__)) |
||
| 133 | |||
| 134 | print("Scheme [{}] with resulting distribution [{}]".format(' '.join(political_spectrum.class_names), ', '.join( |
||
| 135 | '{:.2f}'.format(x) for x in political_spectrum.class_distribution))) |
||
| 136 | print("Bins: {}".format(' '.join('[{}]'.format(', '.join(class_bin) for _, class_bin in scheme)))) |
||
| 137 | while 1: |
||
| 138 | answer = what_to_do() |
||
| 139 | if answer == 'back': |
||
| 140 | break |
||
| 141 | if answer == 'Evolve more': |
||
| 142 | evolution_specs = ask_evolution_specs() |
||
| 143 | print("Evolving discreetization scheme ..") |
||
| 144 | scheme = political_spectrum.evolve(int(evolution_specs['nb-generations']), |
||
| 145 | prob=float(evolution_specs['probability'])) |
||
| 146 | political_spectrum.discreetization_scheme = scheme |
||
| 147 | print("Scheme [{}] with resulting distribution [{}]".format(' '.join(scheme.class_names), |
||
| 148 | ', '.join('{:.2f}'.format(x) for x in |
||
| 149 | political_spectrum.class_distribution))) |
||
| 150 | print("Bins: {}".format( |
||
| 151 | ' '.join('[{}]'.format(', '.join(outlet for outlet in class_bin) for _, class_bin in scheme)))) |
||
| 152 | else: |
||
| 153 | uci_dt = ph.persist(os.path.join(collections_dir, args.collection), |
||
| 154 | political_spectrum.poster_id2ideology_label, political_spectrum.class_names, |
||
| 155 | add_class_labels_to_vocab=not args.exclude_class_labels_from_vocab) |
||
| 156 | print(uci_dt) |
||
| 157 | print("Discreetization scheme\n{}".format(political_spectrum.discreetization_scheme)) |
||
| 158 | |||
| 159 | # print("Add the below to the DISCREETIZATION_SCHEMES_HASH") |
||
| 160 | # print("[{}]".format()) |
||
| 161 | print('\nBuilding coocurences information') |
||
| 162 | coherence_builder = CoherenceFilesBuilder(os.path.join(collections_dir, args.collection)) |
||
| 163 | coherence_builder.create_files(cooc_window=args.window, |
||
| 164 | min_tf=args.min_tf, |
||
| 165 | min_df=args.min_df, |
||
| 166 | apply_zero_index=False) |
||
| 167 | sys.exit(0) |
||
| 168 | |||
| 185 |