|
1
|
|
|
import os |
|
2
|
|
|
import random |
|
3
|
|
|
import sqlite3 |
|
4
|
|
|
|
|
5
|
|
|
import yaml |
|
6
|
|
|
|
|
7
|
|
|
|
|
8
|
|
|
class FileNotFoundError(IOError): |
|
9
|
|
|
pass |
|
10
|
|
|
|
|
11
|
|
|
|
|
12
|
|
|
class ProjectBuilder(object): |
|
13
|
|
|
def __init__(self, **kwargs): |
|
14
|
|
|
if "data" in kwargs: |
|
15
|
|
|
self.data = kwargs["data"] |
|
16
|
|
|
else: |
|
17
|
|
|
self.data = None |
|
18
|
|
|
if "coder" in kwargs: |
|
19
|
|
|
self.coder = kwargs["coder"] |
|
20
|
|
|
else: |
|
21
|
|
|
self.coder = None |
|
22
|
|
|
if "config" in kwargs: |
|
23
|
|
|
self.config = kwargs["config"] |
|
24
|
|
|
else: |
|
25
|
|
|
self.config = None |
|
26
|
|
|
|
|
27
|
|
|
def finished(self): |
|
28
|
|
|
return self.data is not None and self.coder is not None and self.config is not None |
|
29
|
|
|
|
|
30
|
|
|
def build(self): |
|
31
|
|
|
return Project(data=self.data, coder=self.coder, config=self.config) |
|
32
|
|
|
|
|
33
|
|
|
|
|
34
|
|
|
class Project(object): |
|
35
|
|
|
def __init__(self, path=".", file="project.db", coder=None, *args, **kwargs): |
|
36
|
|
|
# Project file doesn't already exist |
|
37
|
|
|
self.path = path |
|
38
|
|
|
self.file = file |
|
39
|
|
|
self.conn = sqlite3.connect(os.path.join(path, file)) |
|
40
|
|
|
self.init_db(path, *args, **kwargs) |
|
41
|
|
|
self.current_coding_unit = None |
|
42
|
|
|
if coder is not None: |
|
43
|
|
|
self.coder = coder |
|
44
|
|
|
else: |
|
45
|
|
|
raise Exception("Please define the coder!") |
|
46
|
|
|
|
|
47
|
|
|
def init_db(self, path, *args, **kwargs): |
|
48
|
|
|
c = self.conn.cursor() |
|
49
|
|
|
c.execute("""CREATE TABLE IF NOT EXISTS vars (key text, value text)""") |
|
50
|
|
|
c.execute("""CREATE TABLE IF NOT EXISTS translation (clear text, translated text, UNIQUE(clear, translated))""") |
|
51
|
|
|
self.conn.commit() |
|
52
|
|
|
if "data" in kwargs: |
|
53
|
|
|
with open(os.path.join(path, kwargs["data"])) as file: |
|
54
|
|
|
res = Project.handleCSV(file, ";") |
|
55
|
|
|
if len(res): |
|
56
|
|
|
titles = list(res[0].keys()) |
|
57
|
|
|
cquery = ["{} {}".format(self.__transform_column(i), "TEXT") for i in titles] |
|
58
|
|
|
cquery = ", ".join(cquery) |
|
59
|
|
|
c.execute("""CREATE TABLE IF NOT EXISTS individuals (id INTEGER PRIMARY KEY,{})""".format(cquery)) |
|
60
|
|
|
self.conn.commit() |
|
61
|
|
|
for row in res: |
|
62
|
|
|
columns = [] |
|
63
|
|
|
values = [] |
|
64
|
|
|
for column in row: |
|
65
|
|
|
if len(row[column].strip()) != 0: |
|
66
|
|
|
columns.append(self.__transform_column(column)) |
|
67
|
|
|
values.append((row[column])) |
|
68
|
|
|
aquery = " AND ".join(i + "=?" for i in columns) |
|
69
|
|
|
|
|
70
|
|
|
if len(values): |
|
71
|
|
|
c.execute("""SELECT id FROM individuals WHERE {}""".format(aquery), values) |
|
72
|
|
|
identifier = c.fetchone() |
|
73
|
|
|
if not identifier: |
|
74
|
|
|
c.execute("""INSERT INTO individuals ({}) VALUES ({})""".format(", ".join(columns), |
|
75
|
|
|
" ,".join( |
|
76
|
|
|
"?" for _ in values)), |
|
77
|
|
|
values) |
|
78
|
|
|
self.conn.commit() |
|
79
|
|
|
if "config" in kwargs: |
|
80
|
|
|
c.execute("""CREATE TABLE IF NOT EXISTS question_assoc (question text, coding text)""") |
|
81
|
|
|
with open(os.path.join(path, kwargs["config"])) as file: |
|
82
|
|
|
questions = yaml.load(file) |
|
83
|
|
|
|
|
84
|
|
|
for question in questions["questions"]: |
|
85
|
|
|
qquery = ", ".join( |
|
86
|
|
|
[self.__transform_column(i["criteria"]) + " TEXT" for i in question["coding"]] + ["coder TEXT"]) |
|
87
|
|
|
c.execute("""CREATE TABLE IF NOT EXISTS {} (id INTEGER PRIMARY KEY, {})""".format( |
|
88
|
|
|
self.__transform_column(question["text"]), qquery)) |
|
89
|
|
|
for i in question["coding"]: |
|
90
|
|
|
c.execute("""INSERT INTO question_assoc SELECT ?,? |
|
91
|
|
|
WHERE NOT EXISTS(SELECT 1 FROM question_assoc WHERE question=? AND coding=?)""", |
|
92
|
|
|
(question["text"], i["criteria"], question["text"], i["criteria"])) |
|
93
|
|
|
self.conn.commit() |
|
94
|
|
|
|
|
95
|
|
|
def get_questions(self, question): |
|
96
|
|
|
c = self.conn.cursor() |
|
97
|
|
|
c.execute("""SELECT coding FROM question_assoc WHERE question=?""", (question,)) |
|
98
|
|
|
res = [i[0] for i in c.fetchall()] |
|
99
|
|
|
return res |
|
100
|
|
|
|
|
101
|
|
|
def __transform_column(self, column): |
|
102
|
|
|
column = column.strip() |
|
103
|
|
|
before = column |
|
104
|
|
|
for i in "?()-,;[].=": |
|
105
|
|
|
column = column.replace(i, "_") |
|
106
|
|
|
columns = column.split(" ") |
|
107
|
|
|
columns = list(map(str.lower, columns)) |
|
108
|
|
|
kw = ["alter"] |
|
109
|
|
|
for i in range(len(columns)): |
|
110
|
|
|
if columns[i] in kw: |
|
111
|
|
|
columns[i] = "_".join([columns[i][:-1], columns[i][-1]]) |
|
112
|
|
|
column = "_".join(columns) |
|
113
|
|
|
c = self.conn.cursor() |
|
114
|
|
|
c.execute("""INSERT OR IGNORE INTO translation (clear, translated) VALUES (?,?)""", (before, column)) |
|
115
|
|
|
self.conn.commit() |
|
116
|
|
|
return column |
|
117
|
|
|
|
|
118
|
|
|
def __reverse_transform_column(self, column): |
|
119
|
|
|
c = self.conn.cursor() |
|
120
|
|
|
c.execute("""SELECT clear FROM translation WHERE translated=?""", (column,)) |
|
121
|
|
|
res = c.fetchone() |
|
122
|
|
|
if res is not None and len(res) > 0: |
|
123
|
|
|
return str(res[0]) |
|
124
|
|
|
return str(None) |
|
125
|
|
|
|
|
126
|
|
|
""" |
|
127
|
|
|
def init_dict(self, init_kwargs, **kwargs): |
|
128
|
|
|
for i in kwargs: |
|
129
|
|
|
if i not in self.state: |
|
130
|
|
|
if i in init_kwargs: |
|
131
|
|
|
self.state[i] = init_kwargs[i] |
|
132
|
|
|
else: |
|
133
|
|
|
self.state[i] = kwargs[i] |
|
134
|
|
|
""" |
|
135
|
|
|
@staticmethod |
|
136
|
|
|
def handleCSV(file, separator): |
|
137
|
|
|
res = [] |
|
138
|
|
|
titles = [] |
|
139
|
|
|
for i, j in enumerate(file): |
|
140
|
|
|
if i == 0: |
|
141
|
|
|
titles = j.strip("\n").split(separator) |
|
142
|
|
|
else: |
|
143
|
|
|
res.append(dict(zip(titles, j.strip("\n").split(separator)))) |
|
144
|
|
|
return res |
|
145
|
|
|
|
|
146
|
|
|
@property |
|
147
|
|
|
def all_tables(self): |
|
148
|
|
|
c = self.conn.cursor() |
|
149
|
|
|
c.execute("""SELECT name FROM sqlite_master WHERE type='table'""") |
|
150
|
|
|
return [self.__transform_column(i[0]) for i in c.fetchall()] |
|
151
|
|
|
|
|
152
|
|
|
@property |
|
153
|
|
|
def custom_tables(self): |
|
154
|
|
|
return [i for i in self.all_tables if i not in self.system_tables] |
|
155
|
|
|
|
|
156
|
|
|
@property |
|
157
|
|
|
def system_tables(self): |
|
158
|
|
|
return ["vars", "individuals", "question_assoc", "translation"] |
|
159
|
|
|
|
|
160
|
|
|
def get_columns(self, table): |
|
161
|
|
|
c = self.conn.cursor() |
|
162
|
|
|
c.execute("""PRAGMA table_info({})""".format(table)) |
|
163
|
|
|
return [i[1] for i in c.fetchall()] |
|
164
|
|
|
|
|
165
|
|
|
def get_number_of_entries(self, table): |
|
166
|
|
|
c = self.conn.cursor() |
|
167
|
|
|
c.execute("""SELECT count(*) FROM individuals""".format(self.__transform_column(table))) |
|
168
|
|
|
return c.fetchall()[0][0] |
|
169
|
|
|
|
|
170
|
|
|
def get_whole_table(self, table): |
|
171
|
|
|
colums = self.get_columns(table) |
|
172
|
|
|
c = self.conn.cursor() |
|
173
|
|
|
c.execute("""SELECT {} FROM {}""".format(", ".join(colums), table)) |
|
174
|
|
|
return c.fetchall() |
|
175
|
|
|
|
|
176
|
|
|
def __iter__(self): |
|
177
|
|
|
return self |
|
178
|
|
|
|
|
179
|
|
|
def __next__(self): |
|
180
|
|
|
if self.current_coding_unit is not None: |
|
181
|
|
|
if not self.current_coding_unit.isFinished(): |
|
182
|
|
|
return self.current_coding_unit |
|
183
|
|
|
amount_of_individuals = self.get_number_of_entries("individuals") |
|
184
|
|
|
for table in self.custom_tables: |
|
185
|
|
|
c = self.conn.cursor() |
|
186
|
|
|
c.execute("""SELECT * FROM {}""".format(table)) |
|
187
|
|
|
|
|
188
|
|
|
ids_in_table = c.fetchall() |
|
189
|
|
|
ids_in_table[:] = [i for i in ids_in_table if None not in i] |
|
190
|
|
|
if amount_of_individuals > len(ids_in_table): |
|
191
|
|
|
entries = set(range(amount_of_individuals)) - set([i[0] for i in ids_in_table]) |
|
192
|
|
|
entry = random.choice(list(entries)) |
|
193
|
|
|
c.execute("""SELECT {} FROM individuals""".format(table)) |
|
194
|
|
|
coding_answer = c.fetchall()[entry][0] |
|
195
|
|
|
coding_question = self.__reverse_transform_column(table) |
|
196
|
|
|
questions = self.get_questions(coding_question) |
|
197
|
|
|
questions[:] = [i for i in questions if not self.__question_already_answered(coding_question, i, entry)] |
|
198
|
|
|
if len(questions) == 0: |
|
199
|
|
|
c.execute("""SELECT * FROM question_assoc""") |
|
200
|
|
|
res = c.fetchall() |
|
201
|
|
|
for i in res: |
|
202
|
|
|
print(i, len(i[0]), len(coding_question), repr(coding_question)) |
|
203
|
|
|
raise Exception("This should not happen") |
|
204
|
|
|
self.current_coding_unit = CodingUnit(self, coding_question, coding_answer, questions, entry) |
|
205
|
|
|
return self.current_coding_unit |
|
206
|
|
|
raise StopIteration |
|
207
|
|
|
|
|
208
|
|
|
def __question_already_answered(self, coding_question, question, id_): |
|
209
|
|
|
c = self.conn.cursor() |
|
210
|
|
|
c.execute("""SELECT {} FROM {} WHERE id=?""".format(self.__transform_column(question), |
|
211
|
|
|
self.__transform_column(coding_question)), (id_,)) |
|
212
|
|
|
res = c.fetchone() |
|
213
|
|
|
return res is not None and res[0] is not None |
|
214
|
|
|
|
|
215
|
|
|
def store_answer(self, coding_question, question, answer, id_): |
|
216
|
|
|
c = self.conn.cursor() |
|
217
|
|
|
c.execute("""INSERT OR IGNORE INTO {} (id,{}) VALUES (?,?)""".format(self.__transform_column(coding_question), |
|
218
|
|
|
self.__transform_column(question)), |
|
219
|
|
|
(id_, answer)) |
|
220
|
|
|
c.execute("""UPDATE {} SET id=?,{}=? WHERE id=?""".format(self.__transform_column(coding_question), |
|
221
|
|
|
self.__transform_column(question)), |
|
222
|
|
|
(id_, answer, id_)) |
|
223
|
|
|
self.conn.commit() |
|
224
|
|
|
|
|
225
|
|
|
def export(self, filename="out.txt"): |
|
226
|
|
|
with open(os.path.join(self.path, filename), "w") as file: |
|
227
|
|
|
file.write("\t".join([self.__reverse_transform_column(i) for i in self.get_columns("individuals") if |
|
228
|
|
|
i not in self.custom_tables + ["id"]] |
|
229
|
|
|
+ ["Question to participant", "Participant Answer", "Coder", "Coding Questions", |
|
230
|
|
|
"Coding Answer", "\n"])) |
|
231
|
|
|
for individual in self.get_whole_table("individuals"): |
|
232
|
|
|
column = self.get_columns("individuals") |
|
233
|
|
|
individual = dict(zip(column, individual)) |
|
234
|
|
|
for question in self.custom_tables: |
|
235
|
|
|
for i in self.get_whole_table(question): |
|
236
|
|
|
column = self.get_columns(question) |
|
237
|
|
|
coding_questions = dict(zip(column, i)) |
|
238
|
|
|
if coding_questions["id"] == individual["id"]-1: |
|
239
|
|
|
for coding_question in coding_questions: |
|
240
|
|
|
if coding_question != "id" and coding_question != "coder": |
|
241
|
|
|
file.write("\t".join([str(individual[i]) for i in self.get_columns("individuals") if |
|
242
|
|
|
i not in self.custom_tables + ["id"]] |
|
243
|
|
|
+ [str(self.__reverse_transform_column(question)), |
|
244
|
|
|
str(individual[question]), coding_questions["coder"], |
|
245
|
|
|
self.__reverse_transform_column(coding_question), |
|
246
|
|
|
coding_questions[coding_question], "\n"])) |
|
247
|
|
|
|
|
248
|
|
|
titles = list() |
|
249
|
|
|
for i in ["individuals"] + self.custom_tables: |
|
250
|
|
|
for j in self.get_columns(i): |
|
251
|
|
|
titles.append((i, j)) |
|
252
|
|
|
""" |
|
253
|
|
|
columns = [i[1] for i in titles if i[0] == "individuals" and i[1] not |
|
254
|
|
|
in [self.__transform_column(i[0]) for i in self.custom_tables]] |
|
255
|
|
|
print(len(columns),columns) |
|
256
|
|
|
""" |
|
257
|
|
|
''' |
|
258
|
|
|
command = """SELECT {} FROM individuals\n""".format(", ".join(list(map(".".join, titles)))) + "".join( |
|
259
|
|
|
["""INNER JOIN {} ON {}\n""".format(i, """{}.id = individuals.id""".format(i)) for i in self.custom_tables]) |
|
260
|
|
|
''' |
|
261
|
|
|
""" |
|
262
|
|
|
print(command) |
|
263
|
|
|
c = self.conn.cursor() |
|
264
|
|
|
c.execute(command) |
|
265
|
|
|
res = c.fetchall() |
|
266
|
|
|
print(res) |
|
267
|
|
|
for i in res: |
|
268
|
|
|
print(i) |
|
269
|
|
|
print( titles ) |
|
270
|
|
|
print([(i[0],self.__reverse_transform_column(i[1])) for i in titles]) |
|
271
|
|
|
|
|
272
|
|
|
#with open(os.path.join(self.path,filename)): |
|
273
|
|
|
# pass |
|
274
|
|
|
""" |
|
275
|
|
|
|
|
276
|
|
|
|
|
277
|
|
|
class CodingUnit(object): |
|
278
|
|
|
def __init__(self, project, question, answer, coding_questions, id_): |
|
279
|
|
|
self.question = question |
|
280
|
|
|
self.answer = answer |
|
281
|
|
|
self.coding_questions = coding_questions |
|
282
|
|
|
self.coding_answers = dict() |
|
283
|
|
|
self.id = id_ |
|
284
|
|
|
self.project = project |
|
285
|
|
|
self["coder"] = project.coder |
|
286
|
|
|
|
|
287
|
|
|
def isFinished(self): |
|
288
|
|
|
res = True |
|
289
|
|
|
res &= all(i in self.coding_answers.keys() for i in self.coding_questions) |
|
290
|
|
|
res &= all(self.coding_answers[i] is not None for i in self.coding_answers) |
|
291
|
|
|
return res |
|
292
|
|
|
|
|
293
|
|
|
def __setitem__(self, key, value): |
|
294
|
|
|
self.coding_answers[key] = value |
|
295
|
|
|
self.project.store_answer(self.question, key, value, self.id) |
|
296
|
|
|
|
|
297
|
|
|
def __repr__(self): |
|
298
|
|
|
return "\n".join(["Coding unit: {} -> {}".format(self.question, self.answer)] + [ |
|
299
|
|
|
"-{}\n\t-> {}".format(i, self.coding_answers.get(i, None)) for i in self.coding_questions]) |
|
300
|
|
|
|