Completed
Pull Request — master (#22)
by Jerome
44s
created

Project   A

Complexity

Total Complexity 5

Size/Duplication

Total Lines 12
Duplicated Lines 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
c 2
b 0
f 0
dl 0
loc 12
rs 10
wmc 5
1
from msquaredc.persistence import BackedUpDict
2
import random
3
import yaml
4
import os
5
import sqlite3
6
from itertools import repeat
7
8
9
class Project(object):
10
    def __init__(self, path=".",file="project.db",coder=None, *args, **kwargs):
11
        # Project file doesn't already exist
12
        self.path = path
13
        self.file = file
14
        self.conn = sqlite3.connect(os.path.join(path, file))
15
        self.init_db(path,*args,**kwargs)
16
        self.current_coding_unit = None
17
        if coder is not None:
18
            self.coder = coder
19
        else:
20
            raise Exception("Please define the coder!")
21
        print("End init")
22
23
    def init_db(self,path,*args,**kwargs):
24
        c = self.conn.cursor()
25
        c.execute("""CREATE TABLE IF NOT EXISTS vars (key text, value text)""")
26
        c.execute("""CREATE TABLE IF NOT EXISTS translation (clear text, translated text, UNIQUE(clear, translated))""")
27
        self.conn.commit()
28
        if "data" in kwargs:
29
            with open(os.path.join(path,kwargs["data"])) as file:
30
                res = Project.handleTSV(file)
31
            if len(res):
32
                titles = list(res[0].keys())
33
                cquery = ["{} {}".format(self.__transform_column(i),"TEXT") for i in titles]
34
                cquery = ", ".join(cquery)
35
                c.execute("""CREATE TABLE IF NOT EXISTS individuals (id INTEGER PRIMARY KEY,{})""".format(cquery))
36
                self.conn.commit()
37
                for row in res:
38
                    columns = []
39
                    values = []
40
                    for column in row:
41
                        if len(row[column].strip()) != 0:
42
                            columns.append(self.__transform_column(column))
43
                            values.append((row[column]))
44
                    aquery = " AND ".join(i+"=?" for i in columns)
45
46
                    if len(values):
47
                        c.execute("""SELECT id FROM individuals WHERE {}""".format(aquery),values)
48
                        identifier = c.fetchone()
49
                        if not identifier:
50
                            c.execute("""INSERT INTO individuals ({}) VALUES ({})""".format(", ".join(columns)," ,".join("?" for _ in values)),values)
51
                            self.conn.commit()
52
53
        if "questions" in kwargs:
54
            c.execute("""CREATE TABLE IF NOT EXISTS question_assoc (question text, coding text)""")
55
            with open(os.path.join(path,kwargs["questions"])) as file:
56
                questions = yaml.load(file)
57
            for question in questions["questions"]:
58
                qquery = ", ".join([self.__transform_column(i["criteria"])+" TEXT" for i in question["coding"]]+["coder TEXT"])
59
                c.execute("""CREATE TABLE IF NOT EXISTS {} (id INTEGER PRIMARY KEY, {})""".format(self.__transform_column(question["text"]), qquery))
60
                for i in question["coding"]:
61
                    c.execute("""INSERT INTO question_assoc SELECT ?,? 
62
                                 WHERE NOT EXISTS(SELECT 1 FROM question_assoc WHERE question=? AND coding=?)""",
63
                              (question["text"],i["criteria"],question["text"],i["criteria"]))
64
                self.conn.commit()
65
66
    def get_questions(self,question):
67
        c = self.conn.cursor()
68
        c.execute("""SELECT coding FROM question_assoc WHERE question=?""",(question,))
69
        res = [i[0] for i in c.fetchall()]
70
        return res
71
72
    def __transform_column(self, column):
73
        column = column.strip()
74
        before = column
75
        for i in "?()-,;[].=":
76
            column = column.replace(i,"_")
77
        columns = column.split(" ")
78
        columns = list(map(str.lower,columns))
79
        kw = ["alter"]
80
        for i in range(len(columns)):
81
            if columns[i] in kw:
82
                columns[i] = "_".join([columns[i][:-1],columns[i][-1]])
83
        column = "_".join(columns)
84
        c = self.conn.cursor()
85
        c.execute("""INSERT OR IGNORE INTO translation (clear, translated) VALUES (?,?)""",(before,column))
86
        self.conn.commit()
87
        return column
88
89
    def __reverse_transform_column(self,column):
90
        c = self.conn.cursor()
91
        c.execute("""SELECT clear FROM translation WHERE translated=?""",(column,))
92
        res = c.fetchone()
93
        if res is not None and len(res) > 0:
94
            return str(res[0])
95
        return str(None)
96
97
    def init_dict(self,init_kwargs,**kwargs):
98
        for i in kwargs:
99
            if i not in self.state:
100
                if i in init_kwargs:
101
                    self.state[i] = init_kwargs[i]
102
                else:
103
                    self.state[i] = kwargs[i]
104
105
    @staticmethod
106
    def handleTSV(file):
107
        res = []
108
        titles = []
109
        for i,j in enumerate(file):
110
            if i == 0:
111
                titles = j.strip("\n").split("\t")
112
            else:
113
                res.append(dict(zip(titles,j.strip("\n").split("\t"))))
114
        return res
115
116
    @staticmethod
117
    def handleCSV(file):
118
        res =[]
119
        titles = []
120
        for i, j in enumerate(file):
121
            if i == 0:
122
                titles = j.split(",")
123
        print(titles)
124
        return res
125
126
    @property
127
    def all_tables(self):
128
        c = self.conn.cursor()
129
        c.execute("""SELECT name FROM sqlite_master WHERE type='table'""")
130
        return [self.__transform_column(i[0]) for i in c.fetchall()]
131
132
    @property
133
    def custom_tables(self):
134
        return [i for i in self.all_tables if i not in self.system_tables]
135
136
    @property
137
    def system_tables(self):
138
        return ["vars","individuals","question_assoc","translation"]
139
140
    def get_columns(self,table):
141
        c = self.conn.cursor()
142
        c.execute("""PRAGMA table_info({})""".format(table))
143
        return [i[1] for i in c.fetchall()]
144
145
    def get_number_of_entries(self,table):
146
        c = self.conn.cursor()
147
        c.execute("""SELECT count(*) FROM individuals""".format(self.__transform_column(table)))
148
        return c.fetchall()[0][0]
149
150
    def get_whole_table(self,table):
151
        colums = self.get_columns(table)
152
        c = self.conn.cursor()
153
        c.execute("""SELECT {} FROM {}""".format(", ".join(colums),table))
154
        return c.fetchall()
155
156
    def __iter__(self):
157
        return self
158
159
    def __next__(self):
160
        if self.current_coding_unit is not None:
161
            if not self.current_coding_unit.isFinished():
162
                return self.current_coding_unit
163
        amount_of_individuals = self.get_number_of_entries("individuals")
164
        for table in self.custom_tables:
165
            c = self.conn.cursor()
166
            c.execute("""SELECT * FROM {}""".format(table))
167
168
            ids_in_table = c.fetchall()
169
            ids_in_table[:] = [i for i in ids_in_table if None not in i]
170
            if amount_of_individuals > len(ids_in_table):
171
                entries = set(range(amount_of_individuals)) - set([i[0] for i in ids_in_table])
172
                entry = random.choice(list(entries))
173
                c.execute("""SELECT {} FROM individuals""".format(table))
174
                coding_answer = c.fetchall()[entry][0]
175
                coding_question = self.__reverse_transform_column(table)
176
                questions = self.get_questions(coding_question)
177
                questions[:] = [i for i in questions if not self.__question_already_answered(coding_question,i,entry)]
178
                if len(questions) == 0:
179
                    c.execute("""SELECT * FROM question_assoc""")
180
                    res = c.fetchall()
181
                    for i in res:
182
                        print(i, len(i[0]),len(coding_question),repr(coding_question))
183
                    raise Exception("This should not happen")
184
                self.current_coding_unit = CodingUnit(self,coding_question,coding_answer,questions,entry)
185
                return self.current_coding_unit
186
        raise StopIteration
187
188
    def __question_already_answered(self,coding_question,question,id_):
189
        c = self.conn.cursor()
190
        c.execute("""SELECT {} FROM {} WHERE id=?""".format(self.__transform_column(question),self.__transform_column(coding_question)),(id_,))
191
        res = c.fetchone()
192
        return res is not None and res[0] is not None
193
194
    def store_answer(self,coding_question, question, answer,id_):
195
        c = self.conn.cursor()
196
        c.execute("""INSERT OR IGNORE INTO {} (id,{}) VALUES (?,?)""".format(self.__transform_column(coding_question),self.__transform_column(question)),(id_,answer))
197
        c.execute("""UPDATE {} SET id=?,{}=? WHERE id=?""".format(self.__transform_column(coding_question),self.__transform_column(question)),(id_,answer,id_))
198
        self.conn.commit()
199
200
    def export(self, filename="out.txt"):
201
        with open(os.path.join(self.path, filename), "w") as file:
202
            file.write("\t".join([self.__reverse_transform_column(i) for i in self.get_columns("individuals") if i not in self.custom_tables + ["id"]]
203
                                 +["Question to participant","Participant Answer","Coder","Coding Questions","Coding Answer","\n"]))
204
            for individual in self.get_whole_table("individuals"):
205
                column = self.get_columns("individuals")
206
                individual = dict(zip(column,individual))
207
                for question in self.custom_tables:
208
                    for i in self.get_whole_table(question):
209
                        column = self.get_columns(question)
210
                        coding_questions = dict(zip(column,i))
211
                        if coding_questions["id"] == individual["id"]:
212
                            for coding_question in coding_questions:
213
                                if coding_question != "id" and coding_question != "coder":
214
                                    file.write("\t".join([str(individual[i]) for i in self.get_columns("individuals") if i not in self.custom_tables +["id"]]
215
                                                         +[str(self.__reverse_transform_column(question)),str(individual[question]),coding_questions["coder"],self.__reverse_transform_column(coding_question),coding_questions[coding_question],"\n"]))
216
217
218
        titles = list()
219
        for i in ["individuals"] + self.custom_tables :
220
            for j in self.get_columns(i):
221
                titles.append((i, j))
222
        """
223
        columns = [i[1] for i in titles if i[0] == "individuals" and i[1] not in [self.__transform_column(i[0]) for i in self.custom_tables]]
224
        print(len(columns),columns)
225
        """
226
        command = """SELECT {} FROM individuals\n""".format(", ".join(list(map(".".join,titles))))+"".join(["""INNER JOIN {} ON {}\n""".format(i,"""{}.id = individuals.id""".format(i)) for i in self.custom_tables])
227
        """
228
        print(command)
229
        c = self.conn.cursor()
230
        c.execute(command)
231
        res = c.fetchall()
232
        print(res)
233
        for i in res:
234
            print(i)
235
        print( titles )
236
        print([(i[0],self.__reverse_transform_column(i[1])) for i in titles])
237
238
        #with open(os.path.join(self.path,filename)):
239
        #    pass
240
        """
241
242
class CodingUnit(object):
243
    def __init__(self, project, question, answer, coding_questions,id_):
244
        self.question = question
245
        self.answer = answer
246
        self.coding_questions = coding_questions
247
        self.coding_answers = dict()
248
        self.id = id_
249
        self.project = project
250
        self["coder"] = project.coder
251
252
    def isFinished(self):
253
        res = True
254
        res &= all(i in self.coding_answers.keys() for i in self.coding_questions)
255
        res &= all(self.coding_answers[i] is not None for i in self.coding_answers)
256
        return res
257
258
    def __setitem__(self, key, value):
259
        self.coding_answers[key] = value
260
        self.project.store_answer(self.question,key,value,self.id)
261
262
    def __repr__(self):
263
        return "\n".join(["Coding unit: {} -> {}".format(self.question,self.answer)]+["•{}\n\t-> {}".format(i, self.coding_answers.get(i,None)) for i in self.coding_questions])
264
265
class FileNotFoundError(IOError):
266
    pass
267
268