GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.

crowdtruth.crowd_platform   A
last analyzed

Complexity

Total Complexity 24

Size/Duplication

Total Lines 126
Duplicated Lines 0 %

Test Coverage

Coverage 69.09%

Importance

Changes 0
Metric Value
wmc 24
eloc 75
dl 0
loc 126
ccs 38
cts 55
cp 0.6909
rs 10
c 0
b 0
f 0

5 Functions

Rating   Name   Duplication   Size   Complexity  
A configure_platform_columns() 0 12 3
B configure_with_missing_columns() 0 20 8
A configure_amt_columns() 0 20 3
B get_column_types() 0 38 7
A get_platform() 0 24 3
1
"""
2
Module used to process information regarding the crowdsourcing platform.
3
"""
4 1
class Found(Exception):
5
    """ Exception. """
6 1
    pass
7
8 1
def get_platform(dframe):
9
    """ Get the crowdsourcing platform this file originates to """
10
11 1
    if dframe.columns.values[0] == '_unit_id':
12
        # CrowdFlower
13 1
        return {
14
            #'_platform'        : 'cf',
15
            '_id'           : 'judgment',
16
            '_unit_id'      : 'unit',
17
            '_worker_id'    : 'worker',
18
            '_started_at'   : 'started',
19
            '_created_at'   : 'submitted'
20
        }
21 1
    elif dframe.columns.values[0] == 'HITId':
22
        # Mturk
23 1
        return {
24
            #'id'       : 'amt',
25
            'AssignmentId'  : 'judgment',
26
            'HITId'         : 'unit',
27
            'WorkerId'      : 'worker',
28
            'AcceptTime'    : 'started',
29
            'SubmitTime'    : 'submitted'
30
        }
31 1
    return False
32
33 1
def configure_amt_columns(dframe, config):
34
    """ Configures AMT input and output columns. """
35 1
    config.input = {}
36 1
    config.output = {}
37
38 1
    if config.inputColumns:
39 1
        config.input = {c: 'input.'+c.replace('Input.', '') \
40
                        for c in dframe.columns.values if c in config.inputColumns}
41
    else:
42
        config.input = {c: 'input.'+c.replace('Input.', '') \
43
                        for c in dframe.columns.values if c.startswith('Input.')}
44
45
    # if config is specified, use those columns
46 1
    if config.outputColumns:
47 1
        config.output = {c: 'output.'+c.replace('Answer.', '') \
48
                         for c in dframe.columns.values if c in config.outputColumns}
49
    else:
50
        config.output = {c: 'output.'+c.replace('Answer.', '') \
51
                         for c in dframe.columns.values if c.startswith('Answer.')}
52 1
    return config.input, config.output
53
54 1
def configure_platform_columns(dframe, config):
55
    """ Configures FigureEight and custom platforms input and output columns. """
56 1
    config.input = {}
57 1
    config.output = {}
58
59 1
    if config.inputColumns:
60 1
        config.input = {c: 'input.'+c for c in dframe.columns.values \
61
                        if c in config.inputColumns}
62 1
    if config.outputColumns:
63 1
        config.output = {c: 'output.'+c for c in dframe.columns.values \
64
                         if c in config.outputColumns}
65 1
    return config.input, config.output
66
67 1
def configure_with_missing_columns(dframe, config):
68
    """ Identifies the type of the column based on naming """
69
    units = dframe.groupby('_unit_id')
70
    columns = [c for c in dframe.columns.values if c != 'clustering' and not c.startswith('_') \
71
                   and not c.startswith('e_') and not c.endswith('_gold') \
72
                   and not c.endswith('_reason') and not c.endswith('browser')]
73
    for colname in columns:
74
        try:
75
            for _, unit in units:
76
                unique = unit[colname].nunique()
77
                if unique != 1 and unique != 0:
78
                    raise Found
79
            if not config.inputColumns:
80
                config.input[colname] = 'input.'+colname
81
82
        except Found:
83
            if not config.outputColumns:
84
                config.output[colname] = 'output.'+colname
85
86
    return config
87
88 1
def get_column_types(dframe, config):
89
    """ return input and output columns """
90
    # returns a list of columns that contain are input content
91 1
    config.input = {}
92 1
    config.output = {}
93
94
    # get a dict of the columns with input content and the columns with output judgments
95
    # each entry matches [original column name]:[safestring column name]
96 1
    if dframe.columns.values[0] == 'HITId':
97
        # Mturk
98
        # if config is specified, use those columns
99 1
        config.input, config.output = configure_amt_columns(dframe, config)
100
101 1
        return config
102
103 1
    elif dframe.columns.values[0] == '_unit_id':
104
105
        # if a config is specified, use those columns
106 1
        config.input, config.output = configure_platform_columns(dframe, config)
107
        # if there is a config for both input and output columns, we can return those
108 1
        if config.inputColumns and config.outputColumns:
109 1
            return config
110
111
        # try to identify the input and output columns
112
        # this is the case if all the values in the column are identical
113
        # this is not failsafe but should give decent results without settings
114
        # it is best to make a settings.py file for a collection
115
116
        return configure_with_missing_columns(dframe, config)
117
118
    else:
119
        # unknown platform type
120
121
        # if a config is specified, use those columns
122 1
        config.input, config.output = configure_platform_columns(dframe, config)
123
        # if there is a config for both input and output columns, we can return those
124 1
        if config.inputColumns and config.outputColumns:
125
            return config
126