crowdtruth.scanDirectory() - Code Metrics - Inspection of "revert changes" - CrowdTruth/CrowdTruth-core - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( d75a64...aeff40 )

by Oana

created 2018-08-16 15:15 UTC

crowdtruth.scanDirectory() F

↳ Parent: crowdtruth

Complexity

Conditions

Size

Total Lines	102
Code Lines	45

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	45
dl	0
loc	102
rs	2.9998
c	0
b	0
f	0
cc	15
nop	2

How to fix Long Method Complexity

import os
import sys
from datetime import datetime
import pandas as pd
import numpy as np
from cement.core.foundation import CementApp
from cement.core import hook
from cement.utils.misc import init_defaults


from crowdtruth.models import *
import crowdtruth.controllers.inputController as ic
import crowdtruth.controllers.outputController as oc
import crowdtruth.controllers.configController as cc


# define our default configuration options
defaults = init_defaults('crowdtruth')
defaults['crowdtruth']['debug'] = False
defaults['crowdtruth']['some_param'] = 'some value'

# define any hook functions here



# define the application class
class CrowdTruth(CementApp):
    class Meta:
        label = 'CrowdTruth'
        config_defaults = defaults
        extensions = ['json', 'yaml']


def scanDirectory(directory='',root=''):
    root = os.getcwd()
    files = os.listdir(root+directory)
    app.log.debug("Found directory "+root+directory)
    print 'Directory:',root,directory


    results = {
        #'collections' : {},
        'jobs' : [],
        'units' : [],
        'workers' : [],
        'judgments' : [],
        'annotations' : []
        }

    config = cc.getConfig(root, directory)


    # go through all files in this folder
    subdirectories = []
    for f in files:

        # if it is a folder scan it
        if os.path.isdir(root+directory+'/'+f):
            subdirectories.append(directory+'/'+f)

        # if it is a csv file open it
        elif f.endswith('.csv') and f != 'groundtruth.csv':
            # open csv
            res = ic.processFile(root, directory, f, config)
            for x in res:
                results[x].append(res[x])



    # if jobs were found
    if len(results['jobs']) > 0:

        for x in results:
            results[x] = pd.concat(results[x])

        # workers and annotations can appear across jobs, so we have to aggregate those extra
        results['workers'] = results['workers'].groupby(results['workers'].index).agg({
            'unit' : 'sum',
            'judgment' : 'sum',
            'job' : 'count',
            'duration' : 'mean',
            'spam' : 'sum',
            'worker-cosine' : 'mean',
            'worker-agreement' : 'mean'
            })



        # aggregate annotations
        results['annotations'] = results['annotations'].groupby(results['annotations'].index).sum()
        

        #
        # compute correlations
        #
        # remove 'output.' from the annotation column names
        

        # How many times person a meets person b is described by the following (s.t. a < b)


        # DataFrames corr() function calculates pairwise correlations using specified 
        # algorithm: 'peason, 'kendall', and 'spearman' are supported.
        # Correlations are returned in a new DataFrame instance (corr_df below).
        #likert_corr_df = likert.corr(method='pearson')
        #likert_corr_df.to_csv(wd+'/results/likert_correlations.csv', sep=',')

        # CT metrics 2.0
        #results = Metrics.run(results, config)

        # add customized results
        for c in config.output.items():
            results['units'][c[1]] = results['units'][c[1]].apply(lambda x: dict(x))

        # remove Counter for readability
        for col in config.output.values():
            results['judgments'][col] = results['judgments'][col].apply(lambda x: ','.join(x.keys()))


        results = config.processResults(results, config)

        oc.saveResults(root, directory, results)


    # remove config from system path
    if config.name:
        sys.path.remove(root+directory)
        del sys.modules['config']
    if os.path.exists(root+directory+'/config.pyc'):
        os.remove(root+directory+'/config.pyc')


    # dive into subdirectories
    for f in subdirectories:
        scanDirectory(directory+'/'+f)



with CrowdTruth() as app:

    # track execution time
    startTime = datetime.now()

    # add arguments to the parser
    app.args.add_argument('-d', '--dir', action='store', metavar='DIR',
                          help='Set root directory (provide absolute path)')

    # log stuff
    app.log.debug("About to run my myapp application!")

    # run the application
    app.run()

    # continue with additional application logic
    #
    if app.pargs.dir:
        scanDirectory(root=app.pargs.dir)
    else:
        scanDirectory()
    # verify that we have something to do




    app.log.info('Finished in ' + str(datetime.now() - startTime))
    
    app.close()


1			import os
2			import sys
3			from datetime import datetime
4			import pandas as pd
5			import numpy as np
6			from cement.core.foundation import CementApp
7			from cement.core import hook
8			from cement.utils.misc import init_defaults
9
10
11			from crowdtruth.models import *
12			import crowdtruth.controllers.inputController as ic
13			import crowdtruth.controllers.outputController as oc
14			import crowdtruth.controllers.configController as cc
15
16
17			# define our default configuration options
18			defaults = init_defaults('crowdtruth')
19			defaults['crowdtruth']['debug'] = False
20			defaults['crowdtruth']['some_param'] = 'some value'
21
22			# define any hook functions here
23
24
25
26			# define the application class
27			class CrowdTruth(CementApp):
28			class Meta:
29			label = 'CrowdTruth'
30			config_defaults = defaults
31			extensions = ['json', 'yaml']
32
33
34			def scanDirectory(directory='',root=''):
35			root = os.getcwd()
36			files = os.listdir(root+directory)
37			app.log.debug("Found directory "+root+directory)
38			print 'Directory:',root,directory
39
40
41			results = {
42			#'collections' : {},
43			'jobs' : [],
44			'units' : [],
45			'workers' : [],
46			'judgments' : [],
47			'annotations' : []
48			}
49
50			config = cc.getConfig(root, directory)
51
52
53			# go through all files in this folder
54			subdirectories = []
55			for f in files:
56
57			# if it is a folder scan it
58			if os.path.isdir(root+directory+'/'+f):
59			subdirectories.append(directory+'/'+f)
60
61			# if it is a csv file open it
62			elif f.endswith('.csv') and f != 'groundtruth.csv':
63			# open csv
64			res = ic.processFile(root, directory, f, config)
65			for x in res:
66			results[x].append(res[x])
67
68
69
70			# if jobs were found
71			if len(results['jobs']) > 0:
72
73			for x in results:
74			results[x] = pd.concat(results[x])
75
76			# workers and annotations can appear across jobs, so we have to aggregate those extra
77			results['workers'] = results['workers'].groupby(results['workers'].index).agg({
78			'unit' : 'sum',
79			'judgment' : 'sum',
80			'job' : 'count',
81			'duration' : 'mean',
82			'spam' : 'sum',
83			'worker-cosine' : 'mean',
84			'worker-agreement' : 'mean'
85			})
86
87
88
89			# aggregate annotations
90			results['annotations'] = results['annotations'].groupby(results['annotations'].index).sum()
91
92
93			#
94			# compute correlations
95			#
96			# remove 'output.' from the annotation column names
97
98
99			# How many times person a meets person b is described by the following (s.t. a < b)
100
101
102			# DataFrames corr() function calculates pairwise correlations using specified
103			# algorithm: 'peason, 'kendall', and 'spearman' are supported.
104			# Correlations are returned in a new DataFrame instance (corr_df below).
105			#likert_corr_df = likert.corr(method='pearson')
106			#likert_corr_df.to_csv(wd+'/results/likert_correlations.csv', sep=',')
107
108			# CT metrics 2.0
109			#results = Metrics.run(results, config)
110
111			# add customized results
112			for c in config.output.items():
113			results['units'][c[1]] = results['units'][c[1]].apply(lambda x: dict(x))
114
115			# remove Counter for readability
116			for col in config.output.values():
117			results['judgments'][col] = results['judgments'][col].apply(lambda x: ','.join(x.keys()))
118
119
120			results = config.processResults(results, config)
121
122			oc.saveResults(root, directory, results)
123
124
125			# remove config from system path
126			if config.name:
127			sys.path.remove(root+directory)
128			del sys.modules['config']
129			if os.path.exists(root+directory+'/config.pyc'):
130			os.remove(root+directory+'/config.pyc')
131
132
133			# dive into subdirectories
134			for f in subdirectories:
135			scanDirectory(directory+'/'+f)
136
137
138
139			with CrowdTruth() as app:
140
141			# track execution time
142			startTime = datetime.now()
143
144			# add arguments to the parser
145			app.args.add_argument('-d', '--dir', action='store', metavar='DIR',
146			help='Set root directory (provide absolute path)')
147
148			# log stuff
149			app.log.debug("About to run my myapp application!")
150
151			# run the application
152			app.run()
153
154			# continue with additional application logic
155			#
156			if app.pargs.dir:
157			scanDirectory(root=app.pargs.dir)
158			else:
159			scanDirectory()
160			# verify that we have something to do
161
162
163
164
165			app.log.info('Finished in ' + str(datetime.now() - startTime))
166
167			app.close()
168

CrowdTruth / CrowdTruth-core

GitHub Access Token became invalid

Push — master ( d75a64...aeff40 )

crowdtruth.scanDirectory() F

Complexity

Size

Duplication

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like