Conditions | 15 |
Total Lines | 102 |
Code Lines | 45 |
Lines | 0 |
Ratio | 0 % |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like crowdtruth.scanDirectory() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | import os |
||
34 | def scanDirectory(directory='',root=''): |
||
35 | root = os.getcwd() |
||
36 | files = os.listdir(root+directory) |
||
37 | app.log.debug("Found directory "+root+directory) |
||
38 | print 'Directory:',root,directory |
||
39 | |||
40 | |||
41 | results = { |
||
42 | #'collections' : {}, |
||
43 | 'jobs' : [], |
||
44 | 'units' : [], |
||
45 | 'workers' : [], |
||
46 | 'judgments' : [], |
||
47 | 'annotations' : [] |
||
48 | } |
||
49 | |||
50 | config = cc.getConfig(root, directory) |
||
51 | |||
52 | |||
53 | # go through all files in this folder |
||
54 | subdirectories = [] |
||
55 | for f in files: |
||
56 | |||
57 | # if it is a folder scan it |
||
58 | if os.path.isdir(root+directory+'/'+f): |
||
59 | subdirectories.append(directory+'/'+f) |
||
60 | |||
61 | # if it is a csv file open it |
||
62 | elif f.endswith('.csv') and f != 'groundtruth.csv': |
||
63 | # open csv |
||
64 | res = ic.processFile(root, directory, f, config) |
||
65 | for x in res: |
||
66 | results[x].append(res[x]) |
||
67 | |||
68 | |||
69 | |||
70 | # if jobs were found |
||
71 | if len(results['jobs']) > 0: |
||
72 | |||
73 | for x in results: |
||
74 | results[x] = pd.concat(results[x]) |
||
75 | |||
76 | # workers and annotations can appear across jobs, so we have to aggregate those extra |
||
77 | results['workers'] = results['workers'].groupby(results['workers'].index).agg({ |
||
78 | 'unit' : 'sum', |
||
79 | 'judgment' : 'sum', |
||
80 | 'job' : 'count', |
||
81 | 'duration' : 'mean', |
||
82 | 'spam' : 'sum', |
||
83 | 'worker-cosine' : 'mean', |
||
84 | 'worker-agreement' : 'mean' |
||
85 | }) |
||
86 | |||
87 | |||
88 | |||
89 | # aggregate annotations |
||
90 | results['annotations'] = results['annotations'].groupby(results['annotations'].index).sum() |
||
91 | |||
92 | |||
93 | # |
||
94 | # compute correlations |
||
95 | # |
||
96 | # remove 'output.' from the annotation column names |
||
97 | |||
98 | |||
99 | # How many times person a meets person b is described by the following (s.t. a < b) |
||
100 | |||
101 | |||
102 | # DataFrames corr() function calculates pairwise correlations using specified |
||
103 | # algorithm: 'peason, 'kendall', and 'spearman' are supported. |
||
104 | # Correlations are returned in a new DataFrame instance (corr_df below). |
||
105 | #likert_corr_df = likert.corr(method='pearson') |
||
106 | #likert_corr_df.to_csv(wd+'/results/likert_correlations.csv', sep=',') |
||
107 | |||
108 | # CT metrics 2.0 |
||
109 | #results = Metrics.run(results, config) |
||
110 | |||
111 | # add customized results |
||
112 | for c in config.output.items(): |
||
113 | results['units'][c[1]] = results['units'][c[1]].apply(lambda x: dict(x)) |
||
114 | |||
115 | # remove Counter for readability |
||
116 | for col in config.output.values(): |
||
117 | results['judgments'][col] = results['judgments'][col].apply(lambda x: ','.join(x.keys())) |
||
118 | |||
119 | |||
120 | results = config.processResults(results, config) |
||
121 | |||
122 | oc.saveResults(root, directory, results) |
||
123 | |||
124 | |||
125 | # remove config from system path |
||
126 | if config.name: |
||
127 | sys.path.remove(root+directory) |
||
128 | del sys.modules['config'] |
||
129 | if os.path.exists(root+directory+'/config.pyc'): |
||
130 | os.remove(root+directory+'/config.pyc') |
||
131 | |||
132 | |||
133 | # dive into subdirectories |
||
134 | for f in subdirectories: |
||
135 | scanDirectory(directory+'/'+f) |
||
136 | |||
168 |