crowdtruth - Code Metrics - Inspection of "revert changes" - CrowdTruth/CrowdTruth-core - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( d75a64...aeff40 )

by Oana

created 2018-08-16 15:15 UTC

crowdtruth A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	168
Duplicated Lines	0 %

Importance

Changes

Metric	Value
wmc	15
eloc	77
dl	0
loc	168
rs	10
c	0
b	0
f	0

1 Function

Rating	Name	Duplication	Size	Complexity
F	scanDirectory()	0	102	15

import os
import sys
from datetime import datetime
import pandas as pd
import numpy as np
from cement.core.foundation import CementApp
from cement.core import hook
from cement.utils.misc import init_defaults


from crowdtruth.models import *
import crowdtruth.controllers.inputController as ic
import crowdtruth.controllers.outputController as oc
import crowdtruth.controllers.configController as cc


# define our default configuration options
defaults = init_defaults('crowdtruth')
defaults['crowdtruth']['debug'] = False
defaults['crowdtruth']['some_param'] = 'some value'

# define any hook functions here



# define the application class
class CrowdTruth(CementApp):
    class Meta:
        label = 'CrowdTruth'
        config_defaults = defaults
        extensions = ['json', 'yaml']


def scanDirectory(directory='',root=''):
    root = os.getcwd()
    files = os.listdir(root+directory)
    app.log.debug("Found directory "+root+directory)
    print 'Directory:',root,directory


    results = {
        #'collections' : {},
        'jobs' : [],
        'units' : [],
        'workers' : [],
        'judgments' : [],
        'annotations' : []
        }

    config = cc.getConfig(root, directory)


    # go through all files in this folder
    subdirectories = []
    for f in files:

        # if it is a folder scan it
        if os.path.isdir(root+directory+'/'+f):
            subdirectories.append(directory+'/'+f)

        # if it is a csv file open it
        elif f.endswith('.csv') and f != 'groundtruth.csv':
            # open csv
            res = ic.processFile(root, directory, f, config)
            for x in res:
                results[x].append(res[x])



    # if jobs were found
    if len(results['jobs']) > 0:

        for x in results:
            results[x] = pd.concat(results[x])

        # workers and annotations can appear across jobs, so we have to aggregate those extra
        results['workers'] = results['workers'].groupby(results['workers'].index).agg({
            'unit' : 'sum',
            'judgment' : 'sum',
            'job' : 'count',
            'duration' : 'mean',
            'spam' : 'sum',
            'worker-cosine' : 'mean',
            'worker-agreement' : 'mean'
            })



        # aggregate annotations
        results['annotations'] = results['annotations'].groupby(results['annotations'].index).sum()
        

        #
        # compute correlations
        #
        # remove 'output.' from the annotation column names
        

        # How many times person a meets person b is described by the following (s.t. a < b)


        # DataFrames corr() function calculates pairwise correlations using specified 
        # algorithm: 'peason, 'kendall', and 'spearman' are supported.
        # Correlations are returned in a new DataFrame instance (corr_df below).
        #likert_corr_df = likert.corr(method='pearson')
        #likert_corr_df.to_csv(wd+'/results/likert_correlations.csv', sep=',')

        # CT metrics 2.0
        #results = Metrics.run(results, config)

        # add customized results
        for c in config.output.items():
            results['units'][c[1]] = results['units'][c[1]].apply(lambda x: dict(x))

        # remove Counter for readability
        for col in config.output.values():
            results['judgments'][col] = results['judgments'][col].apply(lambda x: ','.join(x.keys()))


        results = config.processResults(results, config)

        oc.saveResults(root, directory, results)


    # remove config from system path
    if config.name:
        sys.path.remove(root+directory)
        del sys.modules['config']
    if os.path.exists(root+directory+'/config.pyc'):
        os.remove(root+directory+'/config.pyc')


    # dive into subdirectories
    for f in subdirectories:
        scanDirectory(directory+'/'+f)



with CrowdTruth() as app:

    # track execution time
    startTime = datetime.now()

    # add arguments to the parser
    app.args.add_argument('-d', '--dir', action='store', metavar='DIR',
                          help='Set root directory (provide absolute path)')

    # log stuff
    app.log.debug("About to run my myapp application!")

    # run the application
    app.run()

    # continue with additional application logic
    #
    if app.pargs.dir:
        scanDirectory(root=app.pargs.dir)
    else:
        scanDirectory()
    # verify that we have something to do




    app.log.info('Finished in ' + str(datetime.now() - startTime))
    
    app.close()


1			import os
2			import sys
3			from datetime import datetime
4			import pandas as pd
5			import numpy as np
6			from cement.core.foundation import CementApp
7			from cement.core import hook
8			from cement.utils.misc import init_defaults
9
10
11			from crowdtruth.models import *
12			import crowdtruth.controllers.inputController as ic
13			import crowdtruth.controllers.outputController as oc
14			import crowdtruth.controllers.configController as cc
15
16
17			# define our default configuration options
18			defaults = init_defaults('crowdtruth')
19			defaults['crowdtruth']['debug'] = False
20			defaults['crowdtruth']['some_param'] = 'some value'
21
22			# define any hook functions here
23
24
25
26			# define the application class
27			class CrowdTruth(CementApp):
28			class Meta:
29			label = 'CrowdTruth'
30			config_defaults = defaults
31			extensions = ['json', 'yaml']
32
33
34			def scanDirectory(directory='',root=''):
35			root = os.getcwd()
36			files = os.listdir(root+directory)
37			app.log.debug("Found directory "+root+directory)
38			print 'Directory:',root,directory
39
40
41			results = {
42			#'collections' : {},
43			'jobs' : [],
44			'units' : [],
45			'workers' : [],
46			'judgments' : [],
47			'annotations' : []
48			}
49
50			config = cc.getConfig(root, directory)
51
52
53			# go through all files in this folder
54			subdirectories = []
55			for f in files:
56
57			# if it is a folder scan it
58			if os.path.isdir(root+directory+'/'+f):
59			subdirectories.append(directory+'/'+f)
60
61			# if it is a csv file open it
62			elif f.endswith('.csv') and f != 'groundtruth.csv':
63			# open csv
64			res = ic.processFile(root, directory, f, config)
65			for x in res:
66			results[x].append(res[x])
67
68
69
70			# if jobs were found
71			if len(results['jobs']) > 0:
72
73			for x in results:
74			results[x] = pd.concat(results[x])
75
76			# workers and annotations can appear across jobs, so we have to aggregate those extra
77			results['workers'] = results['workers'].groupby(results['workers'].index).agg({
78			'unit' : 'sum',
79			'judgment' : 'sum',
80			'job' : 'count',
81			'duration' : 'mean',
82			'spam' : 'sum',
83			'worker-cosine' : 'mean',
84			'worker-agreement' : 'mean'
85			})
86
87
88
89			# aggregate annotations
90			results['annotations'] = results['annotations'].groupby(results['annotations'].index).sum()
91
92
93			#
94			# compute correlations
95			#
96			# remove 'output.' from the annotation column names
97
98
99			# How many times person a meets person b is described by the following (s.t. a < b)
100
101
102			# DataFrames corr() function calculates pairwise correlations using specified
103			# algorithm: 'peason, 'kendall', and 'spearman' are supported.
104			# Correlations are returned in a new DataFrame instance (corr_df below).
105			#likert_corr_df = likert.corr(method='pearson')
106			#likert_corr_df.to_csv(wd+'/results/likert_correlations.csv', sep=',')
107
108			# CT metrics 2.0
109			#results = Metrics.run(results, config)
110
111			# add customized results
112			for c in config.output.items():
113			results['units'][c[1]] = results['units'][c[1]].apply(lambda x: dict(x))
114
115			# remove Counter for readability
116			for col in config.output.values():
117			results['judgments'][col] = results['judgments'][col].apply(lambda x: ','.join(x.keys()))
118
119
120			results = config.processResults(results, config)
121
122			oc.saveResults(root, directory, results)
123
124
125			# remove config from system path
126			if config.name:
127			sys.path.remove(root+directory)
128			del sys.modules['config']
129			if os.path.exists(root+directory+'/config.pyc'):
130			os.remove(root+directory+'/config.pyc')
131
132
133			# dive into subdirectories
134			for f in subdirectories:
135			scanDirectory(directory+'/'+f)
136
137
138
139			with CrowdTruth() as app:
140
141			# track execution time
142			startTime = datetime.now()
143
144			# add arguments to the parser
145			app.args.add_argument('-d', '--dir', action='store', metavar='DIR',
146			help='Set root directory (provide absolute path)')
147
148			# log stuff
149			app.log.debug("About to run my myapp application!")
150
151			# run the application
152			app.run()
153
154			# continue with additional application logic
155			#
156			if app.pargs.dir:
157			scanDirectory(root=app.pargs.dir)
158			else:
159			scanDirectory()
160			# verify that we have something to do
161
162
163
164
165			app.log.info('Finished in ' + str(datetime.now() - startTime))
166
167			app.close()
168

CrowdTruth / CrowdTruth-core

GitHub Access Token became invalid

Push — master ( d75a64...aeff40 )

crowdtruth A

Complexity

Size/Duplication

Importance

1 Function

Duplication Side-by-Side

Filter issues like