Completed
Pull Request — master (#22)
by Jerome
25s
created

Project   F

Complexity

Total Complexity 79

Size/Duplication

Total Lines 231
Duplicated Lines 0 %

Importance

Changes 3
Bugs 0 Features 0
Metric Value
dl 0
loc 231
rs 2.0547
c 3
b 0
f 0
wmc 79

19 Methods

Rating   Name   Duplication   Size   Complexity  
A __iter__() 0 2 1
A get_number_of_entries() 0 4 1
F __next__() 0 28 12
A get_columns() 0 4 2
A init_dict() 0 7 4
A __init__() 0 12 2
A __question_already_answered() 0 5 1
A handleTSV() 0 10 3
A all_tables() 0 5 2
F init_db() 0 41 17
A system_tables() 0 3 1
F export() 0 41 16
A __transform_column() 0 16 4
A get_questions() 0 5 2
A store_answer() 0 5 1
A handleCSV() 0 9 3
A custom_tables() 0 3 3
A __reverse_transform_column() 0 7 3
A get_whole_table() 0 5 1

How to fix   Complexity   

Complex Class

Complex classes like Project often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from msquaredc.persistence import BackedUpDict
2
import random
3
import yaml
4
import os
5
import sqlite3
6
from itertools import repeat
7
8
class FileNotFoundError(IOError):
9
    pass
10
11
class ProjectBuilder(dict):
12
    def __init__(self):
13
        for i in ["data","questions","coder","file"]:
14
            self[i] = None
15
16
    def build(self):
17
        return Project(**self)
18
19
class Project(object):
20
    def __init__(self, path=".",file="project.db",coder=None, *args, **kwargs):
21
        # Project file doesn't already exist
22
        self.path = path
23
        self.file = file
24
        self.conn = sqlite3.connect(os.path.join(path, file))
25
        self.init_db(path,*args,**kwargs)
26
        self.current_coding_unit = None
27
        if coder is not None:
28
            self.coder = coder
29
        else:
30
            raise Exception("Please define the coder!")
31
        print("End init")
32
33
    def init_db(self,path,*args,**kwargs):
34
        c = self.conn.cursor()
35
        c.execute("""CREATE TABLE IF NOT EXISTS vars (key text, value text)""")
36
        c.execute("""CREATE TABLE IF NOT EXISTS translation (clear text, translated text, UNIQUE(clear, translated))""")
37
        self.conn.commit()
38
        if "data" in kwargs:
39
            with open(os.path.join(path,kwargs["data"])) as file:
40
                res = Project.handleTSV(file)
41
            if len(res):
42
                titles = list(res[0].keys())
43
                cquery = ["{} {}".format(self.__transform_column(i),"TEXT") for i in titles]
44
                cquery = ", ".join(cquery)
45
                c.execute("""CREATE TABLE IF NOT EXISTS individuals (id INTEGER PRIMARY KEY,{})""".format(cquery))
46
                self.conn.commit()
47
                for row in res:
48
                    columns = []
49
                    values = []
50
                    for column in row:
51
                        if len(row[column].strip()) != 0:
52
                            columns.append(self.__transform_column(column))
53
                            values.append((row[column]))
54
                    aquery = " AND ".join(i+"=?" for i in columns)
55
56
                    if len(values):
57
                        c.execute("""SELECT id FROM individuals WHERE {}""".format(aquery),values)
58
                        identifier = c.fetchone()
59
                        if not identifier:
60
                            c.execute("""INSERT INTO individuals ({}) VALUES ({})""".format(", ".join(columns)," ,".join("?" for _ in values)),values)
61
                            self.conn.commit()
62
        if "questions" in kwargs:
63
            c.execute("""CREATE TABLE IF NOT EXISTS question_assoc (question text, coding text)""")
64
            with open(os.path.join(path,kwargs["questions"])) as file:
65
                questions = yaml.load(file)
66
            for question in questions["questions"]:
67
                qquery = ", ".join([self.__transform_column(i["criteria"])+" TEXT" for i in question["coding"]]+["coder TEXT"])
68
                c.execute("""CREATE TABLE IF NOT EXISTS {} (id INTEGER PRIMARY KEY, {})""".format(self.__transform_column(question["text"]), qquery))
69
                for i in question["coding"]:
70
                    c.execute("""INSERT INTO question_assoc SELECT ?,? 
71
                                 WHERE NOT EXISTS(SELECT 1 FROM question_assoc WHERE question=? AND coding=?)""",
72
                              (question["text"],i["criteria"],question["text"],i["criteria"]))
73
                self.conn.commit()
74
75
    def get_questions(self,question):
76
        c = self.conn.cursor()
77
        c.execute("""SELECT coding FROM question_assoc WHERE question=?""",(question,))
78
        res = [i[0] for i in c.fetchall()]
79
        return res
80
81
    def __transform_column(self, column):
82
        column = column.strip()
83
        before = column
84
        for i in "?()-,;[].=":
85
            column = column.replace(i,"_")
86
        columns = column.split(" ")
87
        columns = list(map(str.lower,columns))
88
        kw = ["alter"]
89
        for i in range(len(columns)):
90
            if columns[i] in kw:
91
                columns[i] = "_".join([columns[i][:-1],columns[i][-1]])
92
        column = "_".join(columns)
93
        c = self.conn.cursor()
94
        c.execute("""INSERT OR IGNORE INTO translation (clear, translated) VALUES (?,?)""",(before,column))
95
        self.conn.commit()
96
        return column
97
98
    def __reverse_transform_column(self,column):
99
        c = self.conn.cursor()
100
        c.execute("""SELECT clear FROM translation WHERE translated=?""",(column,))
101
        res = c.fetchone()
102
        if res is not None and len(res) > 0:
103
            return str(res[0])
104
        return str(None)
105
106
    def init_dict(self,init_kwargs,**kwargs):
107
        for i in kwargs:
108
            if i not in self.state:
109
                if i in init_kwargs:
110
                    self.state[i] = init_kwargs[i]
111
                else:
112
                    self.state[i] = kwargs[i]
113
114
    @staticmethod
115
    def handleTSV(file):
116
        res = []
117
        titles = []
118
        for i,j in enumerate(file):
119
            if i == 0:
120
                titles = j.strip("\n").split("\t")
121
            else:
122
                res.append(dict(zip(titles,j.strip("\n").split("\t"))))
123
        return res
124
125
    @staticmethod
126
    def handleCSV(file):
127
        res =[]
128
        titles = []
129
        for i, j in enumerate(file):
130
            if i == 0:
131
                titles = j.split(",")
132
        print(titles)
133
        return res
134
135
    @property
136
    def all_tables(self):
137
        c = self.conn.cursor()
138
        c.execute("""SELECT name FROM sqlite_master WHERE type='table'""")
139
        return [self.__transform_column(i[0]) for i in c.fetchall()]
140
141
    @property
142
    def custom_tables(self):
143
        return [i for i in self.all_tables if i not in self.system_tables]
144
145
    @property
146
    def system_tables(self):
147
        return ["vars","individuals","question_assoc","translation"]
148
149
    def get_columns(self,table):
150
        c = self.conn.cursor()
151
        c.execute("""PRAGMA table_info({})""".format(table))
152
        return [i[1] for i in c.fetchall()]
153
154
    def get_number_of_entries(self,table):
155
        c = self.conn.cursor()
156
        c.execute("""SELECT count(*) FROM individuals""".format(self.__transform_column(table)))
157
        return c.fetchall()[0][0]
158
159
    def get_whole_table(self,table):
160
        colums = self.get_columns(table)
161
        c = self.conn.cursor()
162
        c.execute("""SELECT {} FROM {}""".format(", ".join(colums),table))
163
        return c.fetchall()
164
165
    def __iter__(self):
166
        return self
167
168
    def __next__(self):
169
        if self.current_coding_unit is not None:
170
            if not self.current_coding_unit.isFinished():
171
                return self.current_coding_unit
172
        amount_of_individuals = self.get_number_of_entries("individuals")
173
        for table in self.custom_tables:
174
            c = self.conn.cursor()
175
            c.execute("""SELECT * FROM {}""".format(table))
176
177
            ids_in_table = c.fetchall()
178
            ids_in_table[:] = [i for i in ids_in_table if None not in i]
179
            if amount_of_individuals > len(ids_in_table):
180
                entries = set(range(amount_of_individuals)) - set([i[0] for i in ids_in_table])
181
                entry = random.choice(list(entries))
182
                c.execute("""SELECT {} FROM individuals""".format(table))
183
                coding_answer = c.fetchall()[entry][0]
184
                coding_question = self.__reverse_transform_column(table)
185
                questions = self.get_questions(coding_question)
186
                questions[:] = [i for i in questions if not self.__question_already_answered(coding_question,i,entry)]
187
                if len(questions) == 0:
188
                    c.execute("""SELECT * FROM question_assoc""")
189
                    res = c.fetchall()
190
                    for i in res:
191
                        print(i, len(i[0]),len(coding_question),repr(coding_question))
192
                    raise Exception("This should not happen")
193
                self.current_coding_unit = CodingUnit(self,coding_question,coding_answer,questions,entry)
194
                return self.current_coding_unit
195
        raise StopIteration
196
197
    def __question_already_answered(self,coding_question,question,id_):
198
        c = self.conn.cursor()
199
        c.execute("""SELECT {} FROM {} WHERE id=?""".format(self.__transform_column(question),self.__transform_column(coding_question)),(id_,))
200
        res = c.fetchone()
201
        return res is not None and res[0] is not None
202
203
    def store_answer(self,coding_question, question, answer,id_):
204
        c = self.conn.cursor()
205
        c.execute("""INSERT OR IGNORE INTO {} (id,{}) VALUES (?,?)""".format(self.__transform_column(coding_question),self.__transform_column(question)),(id_,answer))
206
        c.execute("""UPDATE {} SET id=?,{}=? WHERE id=?""".format(self.__transform_column(coding_question),self.__transform_column(question)),(id_,answer,id_))
207
        self.conn.commit()
208
209
    def export(self, filename="out.txt"):
210
        with open(os.path.join(self.path, filename), "w") as file:
211
            file.write("\t".join([self.__reverse_transform_column(i) for i in self.get_columns("individuals") if i not in self.custom_tables + ["id"]]
212
                                 +["Question to participant","Participant Answer","Coder","Coding Questions","Coding Answer","\n"]))
213
            for individual in self.get_whole_table("individuals"):
214
                column = self.get_columns("individuals")
215
                individual = dict(zip(column,individual))
216
                for question in self.custom_tables:
217
                    for i in self.get_whole_table(question):
218
                        column = self.get_columns(question)
219
                        coding_questions = dict(zip(column,i))
220
                        if coding_questions["id"] == individual["id"]:
221
                            for coding_question in coding_questions:
222
                                if coding_question != "id" and coding_question != "coder":
223
                                    file.write("\t".join([str(individual[i]) for i in self.get_columns("individuals") if i not in self.custom_tables +["id"]]
224
                                                         +[str(self.__reverse_transform_column(question)),str(individual[question]),coding_questions["coder"],self.__reverse_transform_column(coding_question),coding_questions[coding_question],"\n"]))
225
226
227
        titles = list()
228
        for i in ["individuals"] + self.custom_tables :
229
            for j in self.get_columns(i):
230
                titles.append((i, j))
231
        """
232
        columns = [i[1] for i in titles if i[0] == "individuals" and i[1] not in [self.__transform_column(i[0]) for i in self.custom_tables]]
233
        print(len(columns),columns)
234
        """
235
        command = """SELECT {} FROM individuals\n""".format(", ".join(list(map(".".join,titles))))+"".join(["""INNER JOIN {} ON {}\n""".format(i,"""{}.id = individuals.id""".format(i)) for i in self.custom_tables])
236
        """
237
        print(command)
238
        c = self.conn.cursor()
239
        c.execute(command)
240
        res = c.fetchall()
241
        print(res)
242
        for i in res:
243
            print(i)
244
        print( titles )
245
        print([(i[0],self.__reverse_transform_column(i[1])) for i in titles])
246
247
        #with open(os.path.join(self.path,filename)):
248
        #    pass
249
        """
250
251
class CodingUnit(object):
252
    def __init__(self, project, question, answer, coding_questions,id_):
253
        self.question = question
254
        self.answer = answer
255
        self.coding_questions = coding_questions
256
        self.coding_answers = dict()
257
        self.id = id_
258
        self.project = project
259
        self["coder"] = project.coder
260
261
    def isFinished(self):
262
        res = True
263
        res &= all(i in self.coding_answers.keys() for i in self.coding_questions)
264
        res &= all(self.coding_answers[i] is not None for i in self.coding_answers)
265
        return res
266
267
    def __setitem__(self, key, value):
268
        self.coding_answers[key] = value
269
        self.project.store_answer(self.question,key,value,self.id)
270
271
    def __repr__(self):
272
        return "\n".join(["Coding unit: {} -> {}".format(self.question,self.answer)]+["-{}\n\t-> {}".format(i, self.coding_answers.get(i,None)) for i in self.coding_questions])
273
274
275