| Conditions | 15 |
| Total Lines | 102 |
| Code Lines | 45 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like crowdtruth.scanDirectory() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | import os |
||
| 34 | def scanDirectory(directory='',root=''): |
||
| 35 | root = os.getcwd() |
||
| 36 | files = os.listdir(root+directory) |
||
| 37 | app.log.debug("Found directory "+root+directory) |
||
| 38 | print 'Directory:',root,directory |
||
| 39 | |||
| 40 | |||
| 41 | results = { |
||
| 42 | #'collections' : {}, |
||
| 43 | 'jobs' : [], |
||
| 44 | 'units' : [], |
||
| 45 | 'workers' : [], |
||
| 46 | 'judgments' : [], |
||
| 47 | 'annotations' : [] |
||
| 48 | } |
||
| 49 | |||
| 50 | config = cc.getConfig(root, directory) |
||
| 51 | |||
| 52 | |||
| 53 | # go through all files in this folder |
||
| 54 | subdirectories = [] |
||
| 55 | for f in files: |
||
| 56 | |||
| 57 | # if it is a folder scan it |
||
| 58 | if os.path.isdir(root+directory+'/'+f): |
||
| 59 | subdirectories.append(directory+'/'+f) |
||
| 60 | |||
| 61 | # if it is a csv file open it |
||
| 62 | elif f.endswith('.csv') and f != 'groundtruth.csv': |
||
| 63 | # open csv |
||
| 64 | res = ic.processFile(root, directory, f, config) |
||
| 65 | for x in res: |
||
| 66 | results[x].append(res[x]) |
||
| 67 | |||
| 68 | |||
| 69 | |||
| 70 | # if jobs were found |
||
| 71 | if len(results['jobs']) > 0: |
||
| 72 | |||
| 73 | for x in results: |
||
| 74 | results[x] = pd.concat(results[x]) |
||
| 75 | |||
| 76 | # workers and annotations can appear across jobs, so we have to aggregate those extra |
||
| 77 | results['workers'] = results['workers'].groupby(results['workers'].index).agg({ |
||
| 78 | 'unit' : 'sum', |
||
| 79 | 'judgment' : 'sum', |
||
| 80 | 'job' : 'count', |
||
| 81 | 'duration' : 'mean', |
||
| 82 | 'spam' : 'sum', |
||
| 83 | 'worker-cosine' : 'mean', |
||
| 84 | 'worker-agreement' : 'mean' |
||
| 85 | }) |
||
| 86 | |||
| 87 | |||
| 88 | |||
| 89 | # aggregate annotations |
||
| 90 | results['annotations'] = results['annotations'].groupby(results['annotations'].index).sum() |
||
| 91 | |||
| 92 | |||
| 93 | # |
||
| 94 | # compute correlations |
||
| 95 | # |
||
| 96 | # remove 'output.' from the annotation column names |
||
| 97 | |||
| 98 | |||
| 99 | # How many times person a meets person b is described by the following (s.t. a < b) |
||
| 100 | |||
| 101 | |||
| 102 | # DataFrames corr() function calculates pairwise correlations using specified |
||
| 103 | # algorithm: 'peason, 'kendall', and 'spearman' are supported. |
||
| 104 | # Correlations are returned in a new DataFrame instance (corr_df below). |
||
| 105 | #likert_corr_df = likert.corr(method='pearson') |
||
| 106 | #likert_corr_df.to_csv(wd+'/results/likert_correlations.csv', sep=',') |
||
| 107 | |||
| 108 | # CT metrics 2.0 |
||
| 109 | #results = Metrics.run(results, config) |
||
| 110 | |||
| 111 | # add customized results |
||
| 112 | for c in config.output.items(): |
||
| 113 | results['units'][c[1]] = results['units'][c[1]].apply(lambda x: dict(x)) |
||
| 114 | |||
| 115 | # remove Counter for readability |
||
| 116 | for col in config.output.values(): |
||
| 117 | results['judgments'][col] = results['judgments'][col].apply(lambda x: ','.join(x.keys())) |
||
| 118 | |||
| 119 | |||
| 120 | results = config.processResults(results, config) |
||
| 121 | |||
| 122 | oc.saveResults(root, directory, results) |
||
| 123 | |||
| 124 | |||
| 125 | # remove config from system path |
||
| 126 | if config.name: |
||
| 127 | sys.path.remove(root+directory) |
||
| 128 | del sys.modules['config'] |
||
| 129 | if os.path.exists(root+directory+'/config.pyc'): |
||
| 130 | os.remove(root+directory+'/config.pyc') |
||
| 131 | |||
| 132 | |||
| 133 | # dive into subdirectories |
||
| 134 | for f in subdirectories: |
||
| 135 | scanDirectory(directory+'/'+f) |
||
| 136 | |||
| 168 |