crowdtruth.load.processFile() - Code Metrics - Inspection of "revert changes" - CrowdTruth/CrowdTruth-core - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( d75a64...aeff40 )

by Oana

created 2018-08-16 15:15 UTC

crowdtruth.load.processFile() F

↳ Parent: crowdtruth.load

Complexity

Conditions

Size

Total Lines	148
Code Lines	82

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	54
CRAP Score	44.1854

Importance

Changes

Metric	Value
eloc	82
dl	0
loc	148
ccs	54
cts	71
cp	0.7606
rs	0
c	0
b	0
f	0
cc	31
nop	2
crap	44.1854

How to fix Long Method Complexity

import os

import logging
import pdb

import sys  
import chardet
import re, string
import pandas as pd
import numpy as np
from datetime import datetime
from collections import Counter, OrderedDict
import re

from crowdtruth.models.metrics import *
from crowdtruth.models.worker import *
from crowdtruth.models.unit import *
from crowdtruth.models.job import *
from crowdtruth.configuration import DefaultConfig




# create an ordered counter so that we can maintain the position of tags in the order they were annotated
class OrderedCounter(Counter, OrderedDict):
	pass

def createOrderedCounter(orderedCounter, annotation_vector):
	for relation in annotation_vector:
		if relation not in orderedCounter:
			orderedCounter.update({relation: 0})
	return orderedCounter


class Found(Exception): pass

def validateTimestampField(date_string, date_format):
	try:
	  date_obj = datetime.datetime.strptime(date_string, date_format)
	  print(date_obj)
	except ValueError:
	  raise ValueError('Incorrect date format')

def getFileList(directory):
	filelist = []

	# go through all files in this folder
	for f in os.listdir(directory):
		# if it is a folder scan it
		if os.path.isdir(directory+'/'+f):
			sublist = getFileList(directory+'/'+f)
			if len(sublist):
				filelist.append(sublist)

		# if it is a csv file open it
		elif f.endswith('.csv') and f != 'groundtruth.csv':
			filelist.append(f)
	return filelist

def load(**kwargs):

	# placeholder for aggregated results
	results = {
		'jobs' : [],
		'units' : [],
		'workers' : [],
		'judgments' : [],
		'annotations' : []
		}


	if 'config' not in kwargs:
		config = DefaultConfig()
	else:
		logging.info('Config loaded')
		config = kwargs['config']

	# check if files is a single file or folder
	if('file' in kwargs and kwargs['file'].endswith('.csv')):
		files = [kwargs['file']]
	elif('directory' in kwargs):
		directory = kwargs['directory']
		files = getFileList(directory)
		logging.info('Found ' + str(len(files)) + ' files')
	else:
		raise ValueError('No input was provided')


	for f in files:
		if 'directory' in locals():
			logging.info("Processing " + f)
			f = directory + "/" + f
		res, config = processFile(f, config)
		for x in res:
			results[x].append(res[x])


	for x in results:
		results[x] = pd.concat(results[x])


	# workers and annotations can appear across jobs, so we have to aggregate those extra
	results['workers'] = results['workers'].groupby(results['workers'].index).agg({
		'unit' : 'sum',
		'judgment' : 'sum',
		'job' : 'count',
		'duration' : 'mean'
		})

	# aggregate annotations
	results['annotations'] = results['annotations'].groupby(results['annotations'].index).sum()
	
	return results, config


def processFile(filename, config):


	job = filename.split('.csv')[0]

	judgments = pd.read_csv(filename)#, encoding=result['encoding'])

	collection = ''

	platform = getPlatform(judgments)

	if platform == False:
		logging.info("Custom crowdsourcing platform!")

		if (len(config.customPlatformColumns) != 5):
			logging.warning("The following column names are required: judgment id, unit id, worker id, start time, submit time")
			raise ValueError('No custom platform configuration was provided')
		else:

			platform = {
				#'id'		: 'custom',
				config.customPlatformColumns[0] : 'judgment',
				config.customPlatformColumns[1] : 'unit',
				config.customPlatformColumns[2] : 'worker',
				config.customPlatformColumns[3]	: 'started',
				config.customPlatformColumns[4]	: 'submitted'
			}


	# we must establish which fields were part of the input data and which are output judgments
	# if there is a config, check if there is a definition of which fields to use
	#config = []
	# else use the default and select them automatically
	config = getColumnTypes(judgments, config)

	# remove rows where the worker did not give an answer (AMT issue)
	empty_rows = set()
	for col in config.outputColumns:
		empty_rows = empty_rows.union(judgments[pd.isnull(judgments[col]) == True].index)
	for col in config.outputColumns:
		judgments = judgments[pd.isnull(judgments[col]) == False]
	judgments = judgments.reset_index(drop=True)
	if len(empty_rows) > 0:
		if len(empty_rows) == 1:
			logging.warning(str(len(empty_rows)) + " row with incomplete information in output columns was removed.")
		else:
			logging.warning(str(len(empty_rows)) + " rows with incomplete information in output columns were removed.")

	# allow customization of the judgments
	judgments = config.processJudgments(judgments)

	# update the config after the preprocessing of judgments
	config = getColumnTypes(judgments, config)

	allColumns = dict(list(config.input.items()) + list(config.output.items()) + list(platform.items()))
	# allColumns = dict(config.input.items() | config.output.items() | platform.items())
	judgments = judgments.rename(columns=allColumns)

	# remove columns we don't care about
	judgments = judgments[list(allColumns.values())]

	judgments['job'] = job

	# make output values safe keys
	for col in config.output.values():
		if type(judgments[col].iloc[0]) is dict:
			logging.info("Values stored as dictionary")
			if config.open_ended_task:
				judgments[col] = judgments[col].apply(lambda x: OrderedCounter(x))
			else:
				judgements[col] = judgements[col].apply(lambda x: createOrderedCounter(OrderedCounter(x), config.annotation_vector))	

		else:
			logging.info("Values not stored as dictionary")
			if config.open_ended_task:
				judgments[col] = judgments[col].apply(lambda x: OrderedCounter(x.split(config.annotation_separator)))
			else:
				judgments[col] = judgments[col].apply(lambda x: createOrderedCounter(OrderedCounter(x.split(config.annotation_separator)), config.annotation_vector))

	judgments['started'] = judgments['started'].apply(lambda x: pd.to_datetime(str(x)))
	judgments['submitted'] = judgments['submitted'].apply(lambda x: pd.to_datetime(str(x)))
	judgments['duration'] = judgments.apply(lambda row: (row['submitted'] - row['started']).seconds, axis=1)

	# remove units with just 1 judgment
	units_1work = judgments.groupby('unit').filter(lambda x: len(x) == 1)["unit"]
	judgments = judgments[~judgments['unit'].isin(units_1work)]
	judgments = judgments.reset_index(drop=True)
	if len(units_1work) > 0:
		if len(units_1work) == 1:
			logging.warning(str(len(units_1work)) + " Media Unit that was annotated by only 1 Worker was omitted, since agreement cannot be calculated.")
		else:
			logging.warning(str(len(units_1work)) + " Media Units that were annotated by only 1 Worker were omitted, since agreement cannot be calculated.")

	#
	# aggregate units
	#
	units = Unit.aggregate(judgments, config)


	for col in config.output.values():
		judgments[col+'.count'] = judgments[col].apply(lambda x: sum(x.values()))	
		judgments[col+'.unique'] = judgments[col].apply(lambda x: len(x))	


	#
	# aggregate workers
	#
	workers = Worker.aggregate(judgments, config)



	#
	# aggregate annotations
	# i.e. output columns
	#	
	annotations = pd.DataFrame()
	for col in config.output.values():
		res = pd.DataFrame(judgments[col].apply(lambda x: pd.Series(list(x.keys())).value_counts()).sum(),columns=[col])
		annotations = pd.concat([annotations, res], axis=0)
	
	#
	# aggregate job
	#
	job = Job.aggregate(units, judgments, workers, config)


	# Clean up judgments
	# remove input columns from judgments
	outputCol = [col for col in judgments.columns.values if col.startswith('output') or col.startswith('metric')]
	judgments = judgments[outputCol + list(platform.values()) + ['duration','job']]
	
	# set judgment id as index
	judgments.set_index('judgment', inplace=True)

	# add missing vector values if closed task
	for col in config.output.values():
		try:
			openended = config.open_ended_task
			for idx in list(units.index):
				for relation in config.annotation_vector:
					if relation not in units[col][idx]:
						units[col][idx].update({relation : 0})	
		except AttributeError:
			continue

	return {
		'jobs' : job, 
		'units' : units,
		'workers' : workers,
		'judgments' : judgments,
		'annotations' : annotations,
		}, config


def getPlatform(df):
	# Get the crowdsourcing platform this file originates to

	if df.columns.values[0] == '_unit_id':
		# CrowdFlower
		return {
			#'_platform'		: 'cf',
			'_id' 			: 'judgment',
			'_unit_id' 		: 'unit',
			'_worker_id' 	: 'worker',
			'_started_at'	: 'started',
			'_created_at'	: 'submitted'
		}
	elif df.columns.values[0] == 'HITId':
		# Mturk
		return {
			#'id'		: 'amt',
			'AssignmentId' 	: 'judgment',
			'HITId' 		: 'unit',
			'WorkerId' 		: 'worker',
			'AcceptTime'	: 'started',
			'SubmitTime'	: 'submitted'
		}
	else:
		
		return False



def getColumnTypes(df, config):

	# returns a list of columns that contain are input content
	config.input = {}
	config.output = {}

	# get a dict of the columns with input content and the columns with output judgments
	# each entry matches [original column name]:[safestring column name]
	if df.columns.values[0] == 'HITId':
		# Mturk
		# if config is specified, use those columns
		if config.inputColumns:
			config.input = {c:'input.'+c.replace('Input.','') for c in df.columns.values if c in config.inputColumns}
		else:
			config.input = {c:'input.'+c.replace('Input.','') for c in df.columns.values if c.startswith('Input.')}
		
		# if config is specified, use those columns
		if config.outputColumns:
			config.output = {c:'output.'+c.replace('Answer.','') for c in df.columns.values if c in config.outputColumns}
		else:
			config.output = {c:'output.'+c.replace('Answer.','') for c in df.columns.values if c.startswith('Answer.')}
		return config

	elif df.columns.values[0] == '_unit_id':

		# if a config is specified, use those columns
		if config.inputColumns:
			config.input = {c:'input.'+c for c in df.columns.values if c in config.inputColumns}
		if config.outputColumns:
			config.output = {c:'output.'+c for c in df.columns.values if c in config.outputColumns}
		# if there is a config for both input and output columns, we can return those
		if config.inputColumns and config.outputColumns:
			return config

		# try to identify the input and output columns
		# this is the case if all the values in the column are identical
		# this is not failsafe but should give decent results without settings
		# it is best to make a settings.py file for a collection

		units = df.groupby('_unit_id')
		columns = [c for c in df.columns.values if c != 'clustering' and not c.startswith('_') and not c.startswith('e_') and not c.endswith('_gold') and not c.endswith('_reason') and not c.endswith('browser')]
		for c in columns:
			try:
				for i, unit in units:
					unique = unit[c].nunique()
					if unique != 1 and unique != 0:
						raise Found
				if not config.inputColumns:
					config.input[c] = 'input.'+c

			except Found:
				if not config.outputColumns:
					config.output[c] = 'output.'+c

		return config
	else:
		# unknown platform type

		# if a config is specified, use those columns
		if config.inputColumns:
			config.input = {c:'input.'+c for c in df.columns.values if c in config.inputColumns}
		if config.outputColumns:
			config.output = {c:'output.'+c for c in df.columns.values if c in config.outputColumns}
		# if there is a config for both input and output columns, we can return those
		if config.inputColumns and config.outputColumns:
			return config


1	1	import os
2
3	1	import logging
4	1	import pdb
5
6	1	import sys
7	1	import chardet
8	1	import re, string
9	1	import pandas as pd
10	1	import numpy as np
11	1	from datetime import datetime
12	1	from collections import Counter, OrderedDict
13	1	import re
14
15	1	from crowdtruth.models.metrics import *
16	1	from crowdtruth.models.worker import *
17	1	from crowdtruth.models.unit import *
18	1	from crowdtruth.models.job import *
19	1	from crowdtruth.configuration import DefaultConfig
20
21
22
23
24		# create an ordered counter so that we can maintain the position of tags in the order they were annotated
25	1	class OrderedCounter(Counter, OrderedDict):
26	1	pass
27
28	1	def createOrderedCounter(orderedCounter, annotation_vector):
29	1	for relation in annotation_vector:
30	1	if relation not in orderedCounter:
31	1	orderedCounter.update({relation: 0})
32	1	return orderedCounter
33
34
35	1	class Found(Exception): pass
36
37	1	def validateTimestampField(date_string, date_format):
38		try:
39		date_obj = datetime.datetime.strptime(date_string, date_format)
40		print(date_obj)
41		except ValueError:
42		raise ValueError('Incorrect date format')
43
44	1	def getFileList(directory):
45		filelist = []
46
47		# go through all files in this folder
48		for f in os.listdir(directory):
49		# if it is a folder scan it
50		if os.path.isdir(directory+'/'+f):
51		sublist = getFileList(directory+'/'+f)
52		if len(sublist):
53		filelist.append(sublist)
54
55		# if it is a csv file open it
56		elif f.endswith('.csv') and f != 'groundtruth.csv':
57		filelist.append(f)
58		return filelist
59
60	1	def load(**kwargs):
61
62		# placeholder for aggregated results
63	1	results = {
64		'jobs' : [],
65		'units' : [],
66		'workers' : [],
67		'judgments' : [],
68		'annotations' : []
69		}
70
71
72	1	if 'config' not in kwargs:
73		config = DefaultConfig()
74		else:
75	1	logging.info('Config loaded')
76	1	config = kwargs['config']
77
78		# check if files is a single file or folder
79	1	if('file' in kwargs and kwargs['file'].endswith('.csv')):
80	1	files = [kwargs['file']]
81		elif('directory' in kwargs):
82		directory = kwargs['directory']
83		files = getFileList(directory)
84		logging.info('Found ' + str(len(files)) + ' files')
85		else:
86		raise ValueError('No input was provided')
87
88
89	1	for f in files:
90	1	if 'directory' in locals():
91		logging.info("Processing " + f)
92		f = directory + "/" + f
93	1	res, config = processFile(f, config)
94	1	for x in res:
95	1	results[x].append(res[x])
96
97
98	1	for x in results:
99	1	results[x] = pd.concat(results[x])
100
101
102		# workers and annotations can appear across jobs, so we have to aggregate those extra
103	1	results['workers'] = results['workers'].groupby(results['workers'].index).agg({
104		'unit' : 'sum',
105		'judgment' : 'sum',
106		'job' : 'count',
107		'duration' : 'mean'
108		})
109
110		# aggregate annotations
111	1	results['annotations'] = results['annotations'].groupby(results['annotations'].index).sum()
112
113	1	return results, config
114
115
116	1	def processFile(filename, config):
117
118
119	1	job = filename.split('.csv')[0]
120
121	1	judgments = pd.read_csv(filename)#, encoding=result['encoding'])
122
123	1	collection = ''
124
125	1	platform = getPlatform(judgments)
126
127	1	if platform == False:
128		logging.info("Custom crowdsourcing platform!")
129
130		if (len(config.customPlatformColumns) != 5):
131		logging.warning("The following column names are required: judgment id, unit id, worker id, start time, submit time")
132		raise ValueError('No custom platform configuration was provided')
133		else:
134
135		platform = {
136		#'id' : 'custom',
137		config.customPlatformColumns[0] : 'judgment',
138		config.customPlatformColumns[1] : 'unit',
139		config.customPlatformColumns[2] : 'worker',
140		config.customPlatformColumns[3] : 'started',
141		config.customPlatformColumns[4] : 'submitted'
142		}
143
144
145		# we must establish which fields were part of the input data and which are output judgments
146		# if there is a config, check if there is a definition of which fields to use
147		#config = []
148		# else use the default and select them automatically
149	1	config = getColumnTypes(judgments, config)
150
151		# remove rows where the worker did not give an answer (AMT issue)
152	1	empty_rows = set()
153	1	for col in config.outputColumns:
154	1	empty_rows = empty_rows.union(judgments[pd.isnull(judgments[col]) == True].index)
155	1	for col in config.outputColumns:
156	1	judgments = judgments[pd.isnull(judgments[col]) == False]
157	1	judgments = judgments.reset_index(drop=True)
158	1	if len(empty_rows) > 0:
159		if len(empty_rows) == 1:
160		logging.warning(str(len(empty_rows)) + " row with incomplete information in output columns was removed.")
161		else:
162		logging.warning(str(len(empty_rows)) + " rows with incomplete information in output columns were removed.")
163
164		# allow customization of the judgments
165	1	judgments = config.processJudgments(judgments)
166
167		# update the config after the preprocessing of judgments
168	1	config = getColumnTypes(judgments, config)
169
170	1	allColumns = dict(list(config.input.items()) + list(config.output.items()) + list(platform.items()))
171		# allColumns = dict(config.input.items() \| config.output.items() \| platform.items())
172	1	judgments = judgments.rename(columns=allColumns)
173
174		# remove columns we don't care about
175	1	judgments = judgments[list(allColumns.values())]
176
177	1	judgments['job'] = job
178
179		# make output values safe keys
180	1	for col in config.output.values():
181	1	if type(judgments[col].iloc[0]) is dict:
182		logging.info("Values stored as dictionary")
183		if config.open_ended_task:
184		judgments[col] = judgments[col].apply(lambda x: OrderedCounter(x))
185		else:
186		judgements[col] = judgements[col].apply(lambda x: createOrderedCounter(OrderedCounter(x), config.annotation_vector))
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-08-14 12:11 UTC by Report Bug Copy Issue Report The variable `judgements` does not seem to be defined. Loading history...
187		else:
188	1	logging.info("Values not stored as dictionary")
189	1	if config.open_ended_task:
190	1	judgments[col] = judgments[col].apply(lambda x: OrderedCounter(x.split(config.annotation_separator)))
191		else:
192	1	judgments[col] = judgments[col].apply(lambda x: createOrderedCounter(OrderedCounter(x.split(config.annotation_separator)), config.annotation_vector))
193
194	1	judgments['started'] = judgments['started'].apply(lambda x: pd.to_datetime(str(x)))
195	1	judgments['submitted'] = judgments['submitted'].apply(lambda x: pd.to_datetime(str(x)))
196	1	judgments['duration'] = judgments.apply(lambda row: (row['submitted'] - row['started']).seconds, axis=1)
197
198		# remove units with just 1 judgment
199	1	units_1work = judgments.groupby('unit').filter(lambda x: len(x) == 1)["unit"]
200	1	judgments = judgments[~judgments['unit'].isin(units_1work)]
201	1	judgments = judgments.reset_index(drop=True)
202	1	if len(units_1work) > 0:
203		if len(units_1work) == 1:
204		logging.warning(str(len(units_1work)) + " Media Unit that was annotated by only 1 Worker was omitted, since agreement cannot be calculated.")
205		else:
206		logging.warning(str(len(units_1work)) + " Media Units that were annotated by only 1 Worker were omitted, since agreement cannot be calculated.")
207
208		#
209		# aggregate units
210		#
211	1	units = Unit.aggregate(judgments, config)
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-08-14 12:11 UTC by Report Bug Copy Issue Report The variable `Unit` does not seem to be defined. Loading history...
212
213	1	for col in config.output.values():
214	1	judgments[col+'.count'] = judgments[col].apply(lambda x: sum(x.values()))
215	1	judgments[col+'.unique'] = judgments[col].apply(lambda x: len(x))
216
217
218		#
219		# aggregate workers
220		#
221	1	workers = Worker.aggregate(judgments, config)
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-08-14 12:11 UTC by Report Bug Copy Issue Report The variable `Worker` does not seem to be defined. Loading history...
222
223
224		#
225		# aggregate annotations
226		# i.e. output columns
227		#
228	1	annotations = pd.DataFrame()
229	1	for col in config.output.values():
230	1	res = pd.DataFrame(judgments[col].apply(lambda x: pd.Series(list(x.keys())).value_counts()).sum(),columns=[col])
231	1	annotations = pd.concat([annotations, res], axis=0)
232
233		#
234		# aggregate job
235		#
236	1	job = Job.aggregate(units, judgments, workers, config)
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-08-14 12:11 UTC by Report Bug Copy Issue Report The variable `Job` does not seem to be defined. Loading history...
237
238		# Clean up judgments
239		# remove input columns from judgments
240	1	outputCol = [col for col in judgments.columns.values if col.startswith('output') or col.startswith('metric')]
241	1	judgments = judgments[outputCol + list(platform.values()) + ['duration','job']]
242
243		# set judgment id as index
244	1	judgments.set_index('judgment', inplace=True)
245
246		# add missing vector values if closed task
247	1	for col in config.output.values():
248	1	try:
249	1	openended = config.open_ended_task
250	1	for idx in list(units.index):
251	1	for relation in config.annotation_vector:
252	1	if relation not in units[col][idx]:
253	1	units[col][idx].update({relation : 0})
254		except AttributeError:
255		continue
256
257	1	return {
258		'jobs' : job,
259		'units' : units,
260		'workers' : workers,
261		'judgments' : judgments,
262		'annotations' : annotations,
263		}, config
264
265
266	1	def getPlatform(df):
267		# Get the crowdsourcing platform this file originates to
268
269	1	if df.columns.values[0] == '_unit_id':
270		# CrowdFlower
271	1	return {
272		#'_platform' : 'cf',
273		'_id' : 'judgment',
274		'_unit_id' : 'unit',
275		'_worker_id' : 'worker',
276		'_started_at' : 'started',
277		'_created_at' : 'submitted'
278		}
279		elif df.columns.values[0] == 'HITId':
280		# Mturk
281		return {
282		#'id' : 'amt',
283		'AssignmentId' : 'judgment',
284		'HITId' : 'unit',
285		'WorkerId' : 'worker',
286		'AcceptTime' : 'started',
287		'SubmitTime' : 'submitted'
288		}
289		else:
290
291		return False
292
293
294
295	1	def getColumnTypes(df, config):
296
297		# returns a list of columns that contain are input content
298	1	config.input = {}
299	1	config.output = {}
300
301		# get a dict of the columns with input content and the columns with output judgments
302		# each entry matches [original column name]:[safestring column name]
303	1	if df.columns.values[0] == 'HITId':
304		# Mturk
305		# if config is specified, use those columns
306		if config.inputColumns:
307		config.input = {c:'input.'+c.replace('Input.','') for c in df.columns.values if c in config.inputColumns}
308		else:
309		config.input = {c:'input.'+c.replace('Input.','') for c in df.columns.values if c.startswith('Input.')}
310
311		# if config is specified, use those columns
312		if config.outputColumns:
313		config.output = {c:'output.'+c.replace('Answer.','') for c in df.columns.values if c in config.outputColumns}
314		else:
315		config.output = {c:'output.'+c.replace('Answer.','') for c in df.columns.values if c.startswith('Answer.')}
316		return config
317
318	1	elif df.columns.values[0] == '_unit_id':
319
320		# if a config is specified, use those columns
321	1	if config.inputColumns:
322	1	config.input = {c:'input.'+c for c in df.columns.values if c in config.inputColumns}
323	1	if config.outputColumns:
324	1	config.output = {c:'output.'+c for c in df.columns.values if c in config.outputColumns}
325		# if there is a config for both input and output columns, we can return those
326	1	if config.inputColumns and config.outputColumns:
327	1	return config
328
329		# try to identify the input and output columns
330		# this is the case if all the values in the column are identical
331		# this is not failsafe but should give decent results without settings
332		# it is best to make a settings.py file for a collection
333
334		units = df.groupby('_unit_id')
335		columns = [c for c in df.columns.values if c != 'clustering' and not c.startswith('_') and not c.startswith('e_') and not c.endswith('_gold') and not c.endswith('_reason') and not c.endswith('browser')]
336		for c in columns:
337		try:
338		for i, unit in units:
339		unique = unit[c].nunique()
340		if unique != 1 and unique != 0:
341		raise Found
342		if not config.inputColumns:
343		config.input[c] = 'input.'+c
344
345		except Found:
346		if not config.outputColumns:
347		config.output[c] = 'output.'+c
348
349		return config
350		else:
351		# unknown platform type
352
353		# if a config is specified, use those columns
354		if config.inputColumns:
355		config.input = {c:'input.'+c for c in df.columns.values if c in config.inputColumns}
356		if config.outputColumns:
357		config.output = {c:'output.'+c for c in df.columns.values if c in config.outputColumns}
358		# if there is a config for both input and output columns, we can return those
359		if config.inputColumns and config.outputColumns:
360		return config
361

CrowdTruth / CrowdTruth-core

GitHub Access Token became invalid

Push — master ( d75a64...aeff40 )

crowdtruth.load.processFile() F

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like