1
|
|
|
import os |
2
|
|
|
import random |
3
|
|
|
import sqlite3 |
4
|
|
|
|
5
|
|
|
import yaml |
6
|
|
|
|
7
|
|
|
|
8
|
|
|
class FileNotFoundError(IOError): |
9
|
|
|
pass |
10
|
|
|
|
11
|
|
|
|
12
|
|
|
class ProjectBuilder(object): |
13
|
|
|
def __init__(self, **kwargs): |
14
|
|
|
if "data" in kwargs: |
15
|
|
|
self.data = kwargs["data"] |
16
|
|
|
else: |
17
|
|
|
self.data = None |
18
|
|
|
if "coder" in kwargs: |
19
|
|
|
self.coder = kwargs["coder"] |
20
|
|
|
else: |
21
|
|
|
self.coder = None |
22
|
|
|
if "config" in kwargs: |
23
|
|
|
self.config = kwargs["config"] |
24
|
|
|
else: |
25
|
|
|
self.config = None |
26
|
|
|
|
27
|
|
|
def finished(self): |
28
|
|
|
return self.data is not None and self.coder is not None and self.config is not None |
29
|
|
|
|
30
|
|
|
def build(self): |
31
|
|
|
return Project(data=self.data, coder=self.coder, config=self.config) |
32
|
|
|
|
33
|
|
|
|
34
|
|
|
class Project(object): |
35
|
|
|
def __init__(self, path=".", file="project.db", coder=None, *args, **kwargs): |
36
|
|
|
# Project file doesn't already exist |
37
|
|
|
self.path = path |
38
|
|
|
self.file = file |
39
|
|
|
self.conn = sqlite3.connect(os.path.join(path, file)) |
40
|
|
|
self.init_db(path, *args, **kwargs) |
41
|
|
|
self.current_coding_unit = None |
42
|
|
|
if coder is not None: |
43
|
|
|
self.coder = coder |
44
|
|
|
else: |
45
|
|
|
raise Exception("Please define the coder!") |
46
|
|
|
|
47
|
|
|
def init_db(self, path, *args, **kwargs): |
48
|
|
|
c = self.conn.cursor() |
49
|
|
|
c.execute("""CREATE TABLE IF NOT EXISTS vars (key text, value text)""") |
50
|
|
|
c.execute("""CREATE TABLE IF NOT EXISTS translation (clear text, translated text, UNIQUE(clear, translated))""") |
51
|
|
|
self.conn.commit() |
52
|
|
|
if "data" in kwargs: |
53
|
|
|
with open(os.path.join(path, kwargs["data"])) as file: |
54
|
|
|
res = Project.handleCSV(file, ";") |
55
|
|
|
if len(res): |
56
|
|
|
titles = list(res[0].keys()) |
57
|
|
|
cquery = ["{} {}".format(self.__transform_column(i), "TEXT") for i in titles] |
58
|
|
|
cquery = ", ".join(cquery) |
59
|
|
|
c.execute("""CREATE TABLE IF NOT EXISTS individuals (id INTEGER PRIMARY KEY,{})""".format(cquery)) |
60
|
|
|
self.conn.commit() |
61
|
|
|
for row in res: |
62
|
|
|
columns = [] |
63
|
|
|
values = [] |
64
|
|
|
for column in row: |
65
|
|
|
if len(row[column].strip()) != 0: |
66
|
|
|
columns.append(self.__transform_column(column)) |
67
|
|
|
values.append((row[column])) |
68
|
|
|
aquery = " AND ".join(i + "=?" for i in columns) |
69
|
|
|
|
70
|
|
|
if len(values): |
71
|
|
|
c.execute("""SELECT id FROM individuals WHERE {}""".format(aquery), values) |
72
|
|
|
identifier = c.fetchone() |
73
|
|
|
if not identifier: |
74
|
|
|
c.execute("""INSERT INTO individuals ({}) VALUES ({})""".format(", ".join(columns), |
75
|
|
|
" ,".join( |
76
|
|
|
"?" for _ in values)), |
77
|
|
|
values) |
78
|
|
|
self.conn.commit() |
79
|
|
|
if "config" in kwargs: |
80
|
|
|
c.execute("""CREATE TABLE IF NOT EXISTS question_assoc (question text, coding text)""") |
81
|
|
|
with open(os.path.join(path, kwargs["config"])) as file: |
82
|
|
|
questions = yaml.load(file) |
83
|
|
|
|
84
|
|
|
for question in questions["questions"]: |
85
|
|
|
qquery = ", ".join( |
86
|
|
|
[self.__transform_column(i["criteria"]) + " TEXT" for i in question["coding"]] + ["coder TEXT"]) |
87
|
|
|
c.execute("""CREATE TABLE IF NOT EXISTS {} (id INTEGER PRIMARY KEY, {})""".format( |
88
|
|
|
self.__transform_column(question["text"]), qquery)) |
89
|
|
|
for i in question["coding"]: |
90
|
|
|
c.execute("""INSERT INTO question_assoc SELECT ?,? |
91
|
|
|
WHERE NOT EXISTS(SELECT 1 FROM question_assoc WHERE question=? AND coding=?)""", |
92
|
|
|
(question["text"], i["criteria"], question["text"], i["criteria"])) |
93
|
|
|
self.conn.commit() |
94
|
|
|
|
95
|
|
|
def get_questions(self, question): |
96
|
|
|
c = self.conn.cursor() |
97
|
|
|
c.execute("""SELECT coding FROM question_assoc WHERE question=?""", (question,)) |
98
|
|
|
res = [i[0] for i in c.fetchall()] |
99
|
|
|
return res |
100
|
|
|
|
101
|
|
|
def __transform_column(self, column): |
102
|
|
|
column = column.strip() |
103
|
|
|
before = column |
104
|
|
|
for i in "?()-,;[].=": |
105
|
|
|
column = column.replace(i, "_") |
106
|
|
|
columns = column.split(" ") |
107
|
|
|
columns = list(map(str.lower, columns)) |
108
|
|
|
kw = ["alter"] |
109
|
|
|
for i in range(len(columns)): |
110
|
|
|
if columns[i] in kw: |
111
|
|
|
columns[i] = "_".join([columns[i][:-1], columns[i][-1]]) |
112
|
|
|
column = "_".join(columns) |
113
|
|
|
c = self.conn.cursor() |
114
|
|
|
c.execute("""INSERT OR IGNORE INTO translation (clear, translated) VALUES (?,?)""", (before, column)) |
115
|
|
|
self.conn.commit() |
116
|
|
|
return column |
117
|
|
|
|
118
|
|
|
def __reverse_transform_column(self, column): |
119
|
|
|
c = self.conn.cursor() |
120
|
|
|
c.execute("""SELECT clear FROM translation WHERE translated=?""", (column,)) |
121
|
|
|
res = c.fetchone() |
122
|
|
|
if res is not None and len(res) > 0: |
123
|
|
|
return str(res[0]) |
124
|
|
|
return str(None) |
125
|
|
|
|
126
|
|
|
""" |
127
|
|
|
def init_dict(self, init_kwargs, **kwargs): |
128
|
|
|
for i in kwargs: |
129
|
|
|
if i not in self.state: |
130
|
|
|
if i in init_kwargs: |
131
|
|
|
self.state[i] = init_kwargs[i] |
132
|
|
|
else: |
133
|
|
|
self.state[i] = kwargs[i] |
134
|
|
|
""" |
135
|
|
|
@staticmethod |
136
|
|
|
def handleCSV(file, separator): |
137
|
|
|
res = [] |
138
|
|
|
titles = [] |
139
|
|
|
for i, j in enumerate(file): |
140
|
|
|
if i == 0: |
141
|
|
|
titles = j.strip("\n").split(separator) |
142
|
|
|
else: |
143
|
|
|
res.append(dict(zip(titles, j.strip("\n").split(separator)))) |
144
|
|
|
return res |
145
|
|
|
|
146
|
|
|
@property |
147
|
|
|
def all_tables(self): |
148
|
|
|
c = self.conn.cursor() |
149
|
|
|
c.execute("""SELECT name FROM sqlite_master WHERE type='table'""") |
150
|
|
|
return [self.__transform_column(i[0]) for i in c.fetchall()] |
151
|
|
|
|
152
|
|
|
@property |
153
|
|
|
def custom_tables(self): |
154
|
|
|
return [i for i in self.all_tables if i not in self.system_tables] |
155
|
|
|
|
156
|
|
|
@property |
157
|
|
|
def system_tables(self): |
158
|
|
|
return ["vars", "individuals", "question_assoc", "translation"] |
159
|
|
|
|
160
|
|
|
def get_columns(self, table): |
161
|
|
|
c = self.conn.cursor() |
162
|
|
|
c.execute("""PRAGMA table_info({})""".format(table)) |
163
|
|
|
return [i[1] for i in c.fetchall()] |
164
|
|
|
|
165
|
|
|
def get_number_of_entries(self, table): |
166
|
|
|
c = self.conn.cursor() |
167
|
|
|
c.execute("""SELECT count(*) FROM individuals""".format(self.__transform_column(table))) |
168
|
|
|
return c.fetchall()[0][0] |
169
|
|
|
|
170
|
|
|
def get_whole_table(self, table): |
171
|
|
|
colums = self.get_columns(table) |
172
|
|
|
c = self.conn.cursor() |
173
|
|
|
c.execute("""SELECT {} FROM {}""".format(", ".join(colums), table)) |
174
|
|
|
return c.fetchall() |
175
|
|
|
|
176
|
|
|
def __iter__(self): |
177
|
|
|
return self |
178
|
|
|
|
179
|
|
|
def __next__(self): |
180
|
|
|
if self.current_coding_unit is not None: |
181
|
|
|
if not self.current_coding_unit.isFinished(): |
182
|
|
|
return self.current_coding_unit |
183
|
|
|
amount_of_individuals = self.get_number_of_entries("individuals") |
184
|
|
|
for table in self.custom_tables: |
185
|
|
|
c = self.conn.cursor() |
186
|
|
|
c.execute("""SELECT * FROM {}""".format(table)) |
187
|
|
|
|
188
|
|
|
ids_in_table = c.fetchall() |
189
|
|
|
ids_in_table[:] = [i for i in ids_in_table if None not in i] |
190
|
|
|
if amount_of_individuals > len(ids_in_table): |
191
|
|
|
entries = set(range(amount_of_individuals)) - set([i[0] for i in ids_in_table]) |
192
|
|
|
entry = random.choice(list(entries)) |
193
|
|
|
c.execute("""SELECT {} FROM individuals""".format(table)) |
194
|
|
|
coding_answer = c.fetchall()[entry][0] |
195
|
|
|
coding_question = self.__reverse_transform_column(table) |
196
|
|
|
questions = self.get_questions(coding_question) |
197
|
|
|
questions[:] = [i for i in questions if not self.__question_already_answered(coding_question, i, entry)] |
198
|
|
|
if len(questions) == 0: |
199
|
|
|
c.execute("""SELECT * FROM question_assoc""") |
200
|
|
|
res = c.fetchall() |
201
|
|
|
for i in res: |
202
|
|
|
print(i, len(i[0]), len(coding_question), repr(coding_question)) |
203
|
|
|
raise Exception("This should not happen") |
204
|
|
|
self.current_coding_unit = CodingUnit(self, coding_question, coding_answer, questions, entry) |
205
|
|
|
return self.current_coding_unit |
206
|
|
|
raise StopIteration |
207
|
|
|
|
208
|
|
|
def __question_already_answered(self, coding_question, question, id_): |
209
|
|
|
c = self.conn.cursor() |
210
|
|
|
c.execute("""SELECT {} FROM {} WHERE id=?""".format(self.__transform_column(question), |
211
|
|
|
self.__transform_column(coding_question)), (id_,)) |
212
|
|
|
res = c.fetchone() |
213
|
|
|
return res is not None and res[0] is not None |
214
|
|
|
|
215
|
|
|
def store_answer(self, coding_question, question, answer, id_): |
216
|
|
|
c = self.conn.cursor() |
217
|
|
|
c.execute("""INSERT OR IGNORE INTO {} (id,{}) VALUES (?,?)""".format(self.__transform_column(coding_question), |
218
|
|
|
self.__transform_column(question)), |
219
|
|
|
(id_, answer)) |
220
|
|
|
c.execute("""UPDATE {} SET id=?,{}=? WHERE id=?""".format(self.__transform_column(coding_question), |
221
|
|
|
self.__transform_column(question)), |
222
|
|
|
(id_, answer, id_)) |
223
|
|
|
self.conn.commit() |
224
|
|
|
|
225
|
|
|
def export(self, filename="out.txt"): |
226
|
|
|
with open(os.path.join(self.path, filename), "w") as file: |
227
|
|
|
file.write("\t".join([self.__reverse_transform_column(i) for i in self.get_columns("individuals") if |
228
|
|
|
i not in self.custom_tables + ["id"]] |
229
|
|
|
+ ["Question to participant", "Participant Answer", "Coder", "Coding Questions", |
230
|
|
|
"Coding Answer", "\n"])) |
231
|
|
|
for individual in self.get_whole_table("individuals"): |
232
|
|
|
column = self.get_columns("individuals") |
233
|
|
|
individual = dict(zip(column, individual)) |
234
|
|
|
for question in self.custom_tables: |
235
|
|
|
for i in self.get_whole_table(question): |
236
|
|
|
column = self.get_columns(question) |
237
|
|
|
coding_questions = dict(zip(column, i)) |
238
|
|
|
if coding_questions["id"] == individual["id"]-1: |
239
|
|
|
for coding_question in coding_questions: |
240
|
|
|
if coding_question != "id" and coding_question != "coder": |
241
|
|
|
file.write("\t".join([str(individual[i]) for i in self.get_columns("individuals") if |
242
|
|
|
i not in self.custom_tables + ["id"]] |
243
|
|
|
+ [str(self.__reverse_transform_column(question)), |
244
|
|
|
str(individual[question]), coding_questions["coder"], |
245
|
|
|
self.__reverse_transform_column(coding_question), |
246
|
|
|
coding_questions[coding_question], "\n"])) |
247
|
|
|
|
248
|
|
|
titles = list() |
249
|
|
|
for i in ["individuals"] + self.custom_tables: |
250
|
|
|
for j in self.get_columns(i): |
251
|
|
|
titles.append((i, j)) |
252
|
|
|
""" |
253
|
|
|
columns = [i[1] for i in titles if i[0] == "individuals" and i[1] not |
254
|
|
|
in [self.__transform_column(i[0]) for i in self.custom_tables]] |
255
|
|
|
print(len(columns),columns) |
256
|
|
|
""" |
257
|
|
|
''' |
258
|
|
|
command = """SELECT {} FROM individuals\n""".format(", ".join(list(map(".".join, titles)))) + "".join( |
259
|
|
|
["""INNER JOIN {} ON {}\n""".format(i, """{}.id = individuals.id""".format(i)) for i in self.custom_tables]) |
260
|
|
|
''' |
261
|
|
|
""" |
262
|
|
|
print(command) |
263
|
|
|
c = self.conn.cursor() |
264
|
|
|
c.execute(command) |
265
|
|
|
res = c.fetchall() |
266
|
|
|
print(res) |
267
|
|
|
for i in res: |
268
|
|
|
print(i) |
269
|
|
|
print( titles ) |
270
|
|
|
print([(i[0],self.__reverse_transform_column(i[1])) for i in titles]) |
271
|
|
|
|
272
|
|
|
#with open(os.path.join(self.path,filename)): |
273
|
|
|
# pass |
274
|
|
|
""" |
275
|
|
|
|
276
|
|
|
|
277
|
|
|
class CodingUnit(object): |
278
|
|
|
def __init__(self, project, question, answer, coding_questions, id_): |
279
|
|
|
self.question = question |
280
|
|
|
self.answer = answer |
281
|
|
|
self.coding_questions = coding_questions |
282
|
|
|
self.coding_answers = dict() |
283
|
|
|
self.id = id_ |
284
|
|
|
self.project = project |
285
|
|
|
self["coder"] = project.coder |
286
|
|
|
|
287
|
|
|
def isFinished(self): |
288
|
|
|
res = True |
289
|
|
|
res &= all(i in self.coding_answers.keys() for i in self.coding_questions) |
290
|
|
|
res &= all(self.coding_answers[i] is not None for i in self.coding_answers) |
291
|
|
|
return res |
292
|
|
|
|
293
|
|
|
def __setitem__(self, key, value): |
294
|
|
|
self.coding_answers[key] = value |
295
|
|
|
self.project.store_answer(self.question, key, value, self.id) |
296
|
|
|
|
297
|
|
|
def __repr__(self): |
298
|
|
|
return "\n".join(["Coding unit: {} -> {}".format(self.question, self.answer)] + [ |
299
|
|
|
"-{}\n\t-> {}".format(i, self.coding_answers.get(i, None)) for i in self.coding_questions]) |
300
|
|
|
|