Completed
Pull Request — master (#22)
by Jerome
01:25
created

CodingUnit   A

Complexity

Total Complexity 7

Size/Duplication

Total Lines 22
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
dl 0
loc 22
rs 10
c 0
b 0
f 0
wmc 7

4 Methods

Rating   Name   Duplication   Size   Complexity  
A __init__() 0 8 1
A isFinished() 0 5 3
A __repr__() 0 2 2
A __setitem__() 0 3 1
1
from msquaredc.persistence import BackedUpDict
2
import random
3
import yaml
4
import os
5
import sqlite3
6
from itertools import repeat
7
8
class FileNotFoundError(IOError):
9
    pass
10
11
class ProjectBuilder(object):
12
    def __init__(self,**kwargs):
13
        if "data" in kwargs:
14
            self.data = kwargs["data"]
15
        else:
16
            self.data = None
17
        if "coder" in kwargs:
18
            self.coder = kwargs["coder"]
19
        else:
20
            self.coder = None
21
        if "config" in kwargs:
22
            self.config = kwargs["config"]
23
        else:
24
            self.config = None
25
26
    def finished(self):
27
        return self.data is not None and self.coder is not None and self.config is not None
28
29
    def build(self):
30
        return Project(data=self.data,coder=self.coder,config=self.config)
31
32
class Project(object):
33
    def __init__(self, path=".",file="project.db",coder=None, *args, **kwargs):
34
        # Project file doesn't already exist
35
        self.path = path
36
        self.file = file
37
        self.conn = sqlite3.connect(os.path.join(path, file))
38
        self.init_db(path,*args,**kwargs)
39
        self.current_coding_unit = None
40
        if coder is not None:
41
            self.coder = coder
42
        else:
43
            raise Exception("Please define the coder!")
44
        print("End init")
45
46
    def init_db(self,path,*args,**kwargs):
47
        c = self.conn.cursor()
48
        c.execute("""CREATE TABLE IF NOT EXISTS vars (key text, value text)""")
49
        c.execute("""CREATE TABLE IF NOT EXISTS translation (clear text, translated text, UNIQUE(clear, translated))""")
50
        self.conn.commit()
51
        if "data" in kwargs:
52
            with open(os.path.join(path,kwargs["data"])) as file:
53
                res = Project.handleTSV(file)
54
            if len(res):
55
                titles = list(res[0].keys())
56
                cquery = ["{} {}".format(self.__transform_column(i),"TEXT") for i in titles]
57
                cquery = ", ".join(cquery)
58
                c.execute("""CREATE TABLE IF NOT EXISTS individuals (id INTEGER PRIMARY KEY,{})""".format(cquery))
59
                self.conn.commit()
60
                for row in res:
61
                    columns = []
62
                    values = []
63
                    for column in row:
64
                        if len(row[column].strip()) != 0:
65
                            columns.append(self.__transform_column(column))
66
                            values.append((row[column]))
67
                    aquery = " AND ".join(i+"=?" for i in columns)
68
69
                    if len(values):
70
                        c.execute("""SELECT id FROM individuals WHERE {}""".format(aquery),values)
71
                        identifier = c.fetchone()
72
                        if not identifier:
73
                            c.execute("""INSERT INTO individuals ({}) VALUES ({})""".format(", ".join(columns)," ,".join("?" for _ in values)),values)
74
                            self.conn.commit()
75
        if "questions" in kwargs:
76
            c.execute("""CREATE TABLE IF NOT EXISTS question_assoc (question text, coding text)""")
77
            with open(os.path.join(path,kwargs["questions"])) as file:
78
                questions = yaml.load(file)
79
            for question in questions["questions"]:
80
                qquery = ", ".join([self.__transform_column(i["criteria"])+" TEXT" for i in question["coding"]]+["coder TEXT"])
81
                c.execute("""CREATE TABLE IF NOT EXISTS {} (id INTEGER PRIMARY KEY, {})""".format(self.__transform_column(question["text"]), qquery))
82
                for i in question["coding"]:
83
                    c.execute("""INSERT INTO question_assoc SELECT ?,? 
84
                                 WHERE NOT EXISTS(SELECT 1 FROM question_assoc WHERE question=? AND coding=?)""",
85
                              (question["text"],i["criteria"],question["text"],i["criteria"]))
86
                self.conn.commit()
87
88
    def get_questions(self,question):
89
        c = self.conn.cursor()
90
        c.execute("""SELECT coding FROM question_assoc WHERE question=?""",(question,))
91
        res = [i[0] for i in c.fetchall()]
92
        return res
93
94
    def __transform_column(self, column):
95
        column = column.strip()
96
        before = column
97
        for i in "?()-,;[].=":
98
            column = column.replace(i,"_")
99
        columns = column.split(" ")
100
        columns = list(map(str.lower,columns))
101
        kw = ["alter"]
102
        for i in range(len(columns)):
103
            if columns[i] in kw:
104
                columns[i] = "_".join([columns[i][:-1],columns[i][-1]])
105
        column = "_".join(columns)
106
        c = self.conn.cursor()
107
        c.execute("""INSERT OR IGNORE INTO translation (clear, translated) VALUES (?,?)""",(before,column))
108
        self.conn.commit()
109
        return column
110
111
    def __reverse_transform_column(self,column):
112
        c = self.conn.cursor()
113
        c.execute("""SELECT clear FROM translation WHERE translated=?""",(column,))
114
        res = c.fetchone()
115
        if res is not None and len(res) > 0:
116
            return str(res[0])
117
        return str(None)
118
119
    def init_dict(self,init_kwargs,**kwargs):
120
        for i in kwargs:
121
            if i not in self.state:
122
                if i in init_kwargs:
123
                    self.state[i] = init_kwargs[i]
124
                else:
125
                    self.state[i] = kwargs[i]
126
127
    @staticmethod
128
    def handleTSV(file):
129
        res = []
130
        titles = []
131
        for i,j in enumerate(file):
132
            if i == 0:
133
                titles = j.strip("\n").split("\t")
134
            else:
135
                res.append(dict(zip(titles,j.strip("\n").split("\t"))))
136
        return res
137
138
    @staticmethod
139
    def handleCSV(file):
140
        res =[]
141
        titles = []
142
        for i, j in enumerate(file):
143
            if i == 0:
144
                titles = j.split(",")
145
        print(titles)
146
        return res
147
148
    @property
149
    def all_tables(self):
150
        c = self.conn.cursor()
151
        c.execute("""SELECT name FROM sqlite_master WHERE type='table'""")
152
        return [self.__transform_column(i[0]) for i in c.fetchall()]
153
154
    @property
155
    def custom_tables(self):
156
        return [i for i in self.all_tables if i not in self.system_tables]
157
158
    @property
159
    def system_tables(self):
160
        return ["vars","individuals","question_assoc","translation"]
161
162
    def get_columns(self,table):
163
        c = self.conn.cursor()
164
        c.execute("""PRAGMA table_info({})""".format(table))
165
        return [i[1] for i in c.fetchall()]
166
167
    def get_number_of_entries(self,table):
168
        c = self.conn.cursor()
169
        c.execute("""SELECT count(*) FROM individuals""".format(self.__transform_column(table)))
170
        return c.fetchall()[0][0]
171
172
    def get_whole_table(self,table):
173
        colums = self.get_columns(table)
174
        c = self.conn.cursor()
175
        c.execute("""SELECT {} FROM {}""".format(", ".join(colums),table))
176
        return c.fetchall()
177
178
    def __iter__(self):
179
        return self
180
181
    def __next__(self):
182
        if self.current_coding_unit is not None:
183
            if not self.current_coding_unit.isFinished():
184
                return self.current_coding_unit
185
        amount_of_individuals = self.get_number_of_entries("individuals")
186
        for table in self.custom_tables:
187
            c = self.conn.cursor()
188
            c.execute("""SELECT * FROM {}""".format(table))
189
190
            ids_in_table = c.fetchall()
191
            ids_in_table[:] = [i for i in ids_in_table if None not in i]
192
            if amount_of_individuals > len(ids_in_table):
193
                entries = set(range(amount_of_individuals)) - set([i[0] for i in ids_in_table])
194
                entry = random.choice(list(entries))
195
                c.execute("""SELECT {} FROM individuals""".format(table))
196
                coding_answer = c.fetchall()[entry][0]
197
                coding_question = self.__reverse_transform_column(table)
198
                questions = self.get_questions(coding_question)
199
                questions[:] = [i for i in questions if not self.__question_already_answered(coding_question,i,entry)]
200
                if len(questions) == 0:
201
                    c.execute("""SELECT * FROM question_assoc""")
202
                    res = c.fetchall()
203
                    for i in res:
204
                        print(i, len(i[0]),len(coding_question),repr(coding_question))
205
                    raise Exception("This should not happen")
206
                self.current_coding_unit = CodingUnit(self,coding_question,coding_answer,questions,entry)
207
                return self.current_coding_unit
208
        raise StopIteration
209
210
    def __question_already_answered(self,coding_question,question,id_):
211
        c = self.conn.cursor()
212
        c.execute("""SELECT {} FROM {} WHERE id=?""".format(self.__transform_column(question),self.__transform_column(coding_question)),(id_,))
213
        res = c.fetchone()
214
        return res is not None and res[0] is not None
215
216
    def store_answer(self,coding_question, question, answer,id_):
217
        c = self.conn.cursor()
218
        c.execute("""INSERT OR IGNORE INTO {} (id,{}) VALUES (?,?)""".format(self.__transform_column(coding_question),self.__transform_column(question)),(id_,answer))
219
        c.execute("""UPDATE {} SET id=?,{}=? WHERE id=?""".format(self.__transform_column(coding_question),self.__transform_column(question)),(id_,answer,id_))
220
        self.conn.commit()
221
222
    def export(self, filename="out.txt"):
223
        with open(os.path.join(self.path, filename), "w") as file:
224
            file.write("\t".join([self.__reverse_transform_column(i) for i in self.get_columns("individuals") if i not in self.custom_tables + ["id"]]
225
                                 +["Question to participant","Participant Answer","Coder","Coding Questions","Coding Answer","\n"]))
226
            for individual in self.get_whole_table("individuals"):
227
                column = self.get_columns("individuals")
228
                individual = dict(zip(column,individual))
229
                for question in self.custom_tables:
230
                    for i in self.get_whole_table(question):
231
                        column = self.get_columns(question)
232
                        coding_questions = dict(zip(column,i))
233
                        if coding_questions["id"] == individual["id"]:
234
                            for coding_question in coding_questions:
235
                                if coding_question != "id" and coding_question != "coder":
236
                                    file.write("\t".join([str(individual[i]) for i in self.get_columns("individuals") if i not in self.custom_tables +["id"]]
237
                                                         +[str(self.__reverse_transform_column(question)),str(individual[question]),coding_questions["coder"],self.__reverse_transform_column(coding_question),coding_questions[coding_question],"\n"]))
238
239
240
        titles = list()
241
        for i in ["individuals"] + self.custom_tables :
242
            for j in self.get_columns(i):
243
                titles.append((i, j))
244
        """
245
        columns = [i[1] for i in titles if i[0] == "individuals" and i[1] not in [self.__transform_column(i[0]) for i in self.custom_tables]]
246
        print(len(columns),columns)
247
        """
248
        command = """SELECT {} FROM individuals\n""".format(", ".join(list(map(".".join,titles))))+"".join(["""INNER JOIN {} ON {}\n""".format(i,"""{}.id = individuals.id""".format(i)) for i in self.custom_tables])
249
        """
250
        print(command)
251
        c = self.conn.cursor()
252
        c.execute(command)
253
        res = c.fetchall()
254
        print(res)
255
        for i in res:
256
            print(i)
257
        print( titles )
258
        print([(i[0],self.__reverse_transform_column(i[1])) for i in titles])
259
260
        #with open(os.path.join(self.path,filename)):
261
        #    pass
262
        """
263
264
class CodingUnit(object):
265
    def __init__(self, project, question, answer, coding_questions,id_):
266
        self.question = question
267
        self.answer = answer
268
        self.coding_questions = coding_questions
269
        self.coding_answers = dict()
270
        self.id = id_
271
        self.project = project
272
        self["coder"] = project.coder
273
274
    def isFinished(self):
275
        res = True
276
        res &= all(i in self.coding_answers.keys() for i in self.coding_questions)
277
        res &= all(self.coding_answers[i] is not None for i in self.coding_answers)
278
        return res
279
280
    def __setitem__(self, key, value):
281
        self.coding_answers[key] = value
282
        self.project.store_answer(self.question,key,value,self.id)
283
284
    def __repr__(self):
285
        return "\n".join(["Coding unit: {} -> {}".format(self.question,self.answer)]+["-{}\n\t-> {}".format(i, self.coding_answers.get(i,None)) for i in self.coding_questions])
286
287
288