Completed
Pull Request — master (#22)
by Jerome
01:49
created

Project   F

Complexity

Total Complexity 79

Size/Duplication

Total Lines 247
Duplicated Lines 0 %

Importance

Changes 4
Bugs 0 Features 0
Metric Value
c 4
b 0
f 0
dl 0
loc 247
rs 2.0547
wmc 79

19 Methods

Rating   Name   Duplication   Size   Complexity  
A __iter__() 0 2 1
A get_number_of_entries() 0 4 1
F __next__() 0 28 12
A get_columns() 0 4 2
A init_dict() 0 7 4
A __init__() 0 12 2
A __question_already_answered() 0 6 1
A handleTSV() 0 10 3
F init_db() 0 46 17
A all_tables() 0 5 2
A system_tables() 0 3 1
F export() 0 47 16
A __transform_column() 0 16 4
A get_questions() 0 5 2
A store_answer() 0 9 1
A handleCSV() 0 9 3
A custom_tables() 0 3 3
A __reverse_transform_column() 0 7 3
A get_whole_table() 0 5 1

How to fix   Complexity   

Complex Class

Complex classes like Project often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
import os
2
import random
3
import sqlite3
4
5
import yaml
6
7
8
class FileNotFoundError(IOError):
9
    pass
10
11
12
class ProjectBuilder(object):
13
    def __init__(self, **kwargs):
14
        if "data" in kwargs:
15
            self.data = kwargs["data"]
16
        else:
17
            self.data = None
18
        if "coder" in kwargs:
19
            self.coder = kwargs["coder"]
20
        else:
21
            self.coder = None
22
        if "config" in kwargs:
23
            self.config = kwargs["config"]
24
        else:
25
            self.config = None
26
27
    def finished(self):
28
        return self.data is not None and self.coder is not None and self.config is not None
29
30
    def build(self):
31
        return Project(data=self.data, coder=self.coder, config=self.config)
32
33
34
class Project(object):
35
    def __init__(self, path=".", file="project.db", coder=None, *args, **kwargs):
36
        # Project file doesn't already exist
37
        self.path = path
38
        self.file = file
39
        self.conn = sqlite3.connect(os.path.join(path, file))
40
        self.init_db(path, *args, **kwargs)
41
        self.current_coding_unit = None
42
        if coder is not None:
43
            self.coder = coder
44
        else:
45
            raise Exception("Please define the coder!")
46
        print("End init")
47
48
    def init_db(self, path, *args, **kwargs):
49
        c = self.conn.cursor()
50
        c.execute("""CREATE TABLE IF NOT EXISTS vars (key text, value text)""")
51
        c.execute("""CREATE TABLE IF NOT EXISTS translation (clear text, translated text, UNIQUE(clear, translated))""")
52
        self.conn.commit()
53
        if "data" in kwargs:
54
            with open(os.path.join(path, kwargs["data"])) as file:
55
                res = Project.handleTSV(file)
56
            if len(res):
57
                titles = list(res[0].keys())
58
                cquery = ["{} {}".format(self.__transform_column(i), "TEXT") for i in titles]
59
                cquery = ", ".join(cquery)
60
                c.execute("""CREATE TABLE IF NOT EXISTS individuals (id INTEGER PRIMARY KEY,{})""".format(cquery))
61
                self.conn.commit()
62
                for row in res:
63
                    columns = []
64
                    values = []
65
                    for column in row:
66
                        if len(row[column].strip()) != 0:
67
                            columns.append(self.__transform_column(column))
68
                            values.append((row[column]))
69
                    aquery = " AND ".join(i + "=?" for i in columns)
70
71
                    if len(values):
72
                        c.execute("""SELECT id FROM individuals WHERE {}""".format(aquery), values)
73
                        identifier = c.fetchone()
74
                        if not identifier:
75
                            c.execute("""INSERT INTO individuals ({}) VALUES ({})""".format(", ".join(columns),
76
                                                                                            " ,".join(
77
                                                                                                "?" for _ in values)),
78
                                      values)
79
                            self.conn.commit()
80
        if "questions" in kwargs:
81
            c.execute("""CREATE TABLE IF NOT EXISTS question_assoc (question text, coding text)""")
82
            with open(os.path.join(path, kwargs["questions"])) as file:
83
                questions = yaml.load(file)
84
            for question in questions["questions"]:
85
                qquery = ", ".join(
86
                    [self.__transform_column(i["criteria"]) + " TEXT" for i in question["coding"]] + ["coder TEXT"])
87
                c.execute("""CREATE TABLE IF NOT EXISTS {} (id INTEGER PRIMARY KEY, {})""".format(
88
                    self.__transform_column(question["text"]), qquery))
89
                for i in question["coding"]:
90
                    c.execute("""INSERT INTO question_assoc SELECT ?,? 
91
                                 WHERE NOT EXISTS(SELECT 1 FROM question_assoc WHERE question=? AND coding=?)""",
92
                              (question["text"], i["criteria"], question["text"], i["criteria"]))
93
                self.conn.commit()
94
95
    def get_questions(self, question):
96
        c = self.conn.cursor()
97
        c.execute("""SELECT coding FROM question_assoc WHERE question=?""", (question,))
98
        res = [i[0] for i in c.fetchall()]
99
        return res
100
101
    def __transform_column(self, column):
102
        column = column.strip()
103
        before = column
104
        for i in "?()-,;[].=":
105
            column = column.replace(i, "_")
106
        columns = column.split(" ")
107
        columns = list(map(str.lower, columns))
108
        kw = ["alter"]
109
        for i in range(len(columns)):
110
            if columns[i] in kw:
111
                columns[i] = "_".join([columns[i][:-1], columns[i][-1]])
112
        column = "_".join(columns)
113
        c = self.conn.cursor()
114
        c.execute("""INSERT OR IGNORE INTO translation (clear, translated) VALUES (?,?)""", (before, column))
115
        self.conn.commit()
116
        return column
117
118
    def __reverse_transform_column(self, column):
119
        c = self.conn.cursor()
120
        c.execute("""SELECT clear FROM translation WHERE translated=?""", (column,))
121
        res = c.fetchone()
122
        if res is not None and len(res) > 0:
123
            return str(res[0])
124
        return str(None)
125
126
    def init_dict(self, init_kwargs, **kwargs):
127
        for i in kwargs:
128
            if i not in self.state:
129
                if i in init_kwargs:
130
                    self.state[i] = init_kwargs[i]
131
                else:
132
                    self.state[i] = kwargs[i]
133
134
    @staticmethod
135
    def handleTSV(file):
136
        res = []
137
        titles = []
138
        for i, j in enumerate(file):
139
            if i == 0:
140
                titles = j.strip("\n").split("\t")
141
            else:
142
                res.append(dict(zip(titles, j.strip("\n").split("\t"))))
143
        return res
144
145
    @staticmethod
146
    def handleCSV(file):
147
        res = []
148
        titles = []
149
        for i, j in enumerate(file):
150
            if i == 0:
151
                titles = j.split(",")
152
        print(titles)
153
        return res
154
155
    @property
156
    def all_tables(self):
157
        c = self.conn.cursor()
158
        c.execute("""SELECT name FROM sqlite_master WHERE type='table'""")
159
        return [self.__transform_column(i[0]) for i in c.fetchall()]
160
161
    @property
162
    def custom_tables(self):
163
        return [i for i in self.all_tables if i not in self.system_tables]
164
165
    @property
166
    def system_tables(self):
167
        return ["vars", "individuals", "question_assoc", "translation"]
168
169
    def get_columns(self, table):
170
        c = self.conn.cursor()
171
        c.execute("""PRAGMA table_info({})""".format(table))
172
        return [i[1] for i in c.fetchall()]
173
174
    def get_number_of_entries(self, table):
175
        c = self.conn.cursor()
176
        c.execute("""SELECT count(*) FROM individuals""".format(self.__transform_column(table)))
177
        return c.fetchall()[0][0]
178
179
    def get_whole_table(self, table):
180
        colums = self.get_columns(table)
181
        c = self.conn.cursor()
182
        c.execute("""SELECT {} FROM {}""".format(", ".join(colums), table))
183
        return c.fetchall()
184
185
    def __iter__(self):
186
        return self
187
188
    def __next__(self):
189
        if self.current_coding_unit is not None:
190
            if not self.current_coding_unit.isFinished():
191
                return self.current_coding_unit
192
        amount_of_individuals = self.get_number_of_entries("individuals")
193
        for table in self.custom_tables:
194
            c = self.conn.cursor()
195
            c.execute("""SELECT * FROM {}""".format(table))
196
197
            ids_in_table = c.fetchall()
198
            ids_in_table[:] = [i for i in ids_in_table if None not in i]
199
            if amount_of_individuals > len(ids_in_table):
200
                entries = set(range(amount_of_individuals)) - set([i[0] for i in ids_in_table])
201
                entry = random.choice(list(entries))
202
                c.execute("""SELECT {} FROM individuals""".format(table))
203
                coding_answer = c.fetchall()[entry][0]
204
                coding_question = self.__reverse_transform_column(table)
205
                questions = self.get_questions(coding_question)
206
                questions[:] = [i for i in questions if not self.__question_already_answered(coding_question, i, entry)]
207
                if len(questions) == 0:
208
                    c.execute("""SELECT * FROM question_assoc""")
209
                    res = c.fetchall()
210
                    for i in res:
211
                        print(i, len(i[0]), len(coding_question), repr(coding_question))
212
                    raise Exception("This should not happen")
213
                self.current_coding_unit = CodingUnit(self, coding_question, coding_answer, questions, entry)
214
                return self.current_coding_unit
215
        raise StopIteration
216
217
    def __question_already_answered(self, coding_question, question, id_):
218
        c = self.conn.cursor()
219
        c.execute("""SELECT {} FROM {} WHERE id=?""".format(self.__transform_column(question),
220
                                                            self.__transform_column(coding_question)), (id_,))
221
        res = c.fetchone()
222
        return res is not None and res[0] is not None
223
224
    def store_answer(self, coding_question, question, answer, id_):
225
        c = self.conn.cursor()
226
        c.execute("""INSERT OR IGNORE INTO {} (id,{}) VALUES (?,?)""".format(self.__transform_column(coding_question),
227
                                                                             self.__transform_column(question)),
228
                  (id_, answer))
229
        c.execute("""UPDATE {} SET id=?,{}=? WHERE id=?""".format(self.__transform_column(coding_question),
230
                                                                  self.__transform_column(question)),
231
                  (id_, answer, id_))
232
        self.conn.commit()
233
234
    def export(self, filename="out.txt"):
235
        with open(os.path.join(self.path, filename), "w") as file:
236
            file.write("\t".join([self.__reverse_transform_column(i) for i in self.get_columns("individuals") if
237
                                  i not in self.custom_tables + ["id"]]
238
                                 + ["Question to participant", "Participant Answer", "Coder", "Coding Questions",
239
                                    "Coding Answer", "\n"]))
240
            for individual in self.get_whole_table("individuals"):
241
                column = self.get_columns("individuals")
242
                individual = dict(zip(column, individual))
243
                for question in self.custom_tables:
244
                    for i in self.get_whole_table(question):
245
                        column = self.get_columns(question)
246
                        coding_questions = dict(zip(column, i))
247
                        if coding_questions["id"] == individual["id"]:
248
                            for coding_question in coding_questions:
249
                                if coding_question != "id" and coding_question != "coder":
250
                                    file.write("\t".join([str(individual[i]) for i in self.get_columns("individuals") if
251
                                                          i not in self.custom_tables + ["id"]]
252
                                                         + [str(self.__reverse_transform_column(question)),
253
                                                            str(individual[question]), coding_questions["coder"],
254
                                                            self.__reverse_transform_column(coding_question),
255
                                                            coding_questions[coding_question], "\n"]))
256
257
        titles = list()
258
        for i in ["individuals"] + self.custom_tables:
259
            for j in self.get_columns(i):
260
                titles.append((i, j))
261
        """
262
        columns = [i[1] for i in titles if i[0] == "individuals" and i[1] not in [self.__transform_column(i[0]) for i in self.custom_tables]]
263
        print(len(columns),columns)
264
        """
265
        command = """SELECT {} FROM individuals\n""".format(", ".join(list(map(".".join, titles)))) + "".join(
266
            ["""INNER JOIN {} ON {}\n""".format(i, """{}.id = individuals.id""".format(i)) for i in self.custom_tables])
267
        """
268
        print(command)
269
        c = self.conn.cursor()
270
        c.execute(command)
271
        res = c.fetchall()
272
        print(res)
273
        for i in res:
274
            print(i)
275
        print( titles )
276
        print([(i[0],self.__reverse_transform_column(i[1])) for i in titles])
277
278
        #with open(os.path.join(self.path,filename)):
279
        #    pass
280
        """
281
282
283
class CodingUnit(object):
284
    def __init__(self, project, question, answer, coding_questions, id_):
285
        self.question = question
286
        self.answer = answer
287
        self.coding_questions = coding_questions
288
        self.coding_answers = dict()
289
        self.id = id_
290
        self.project = project
291
        self["coder"] = project.coder
292
293
    def isFinished(self):
294
        res = True
295
        res &= all(i in self.coding_answers.keys() for i in self.coding_questions)
296
        res &= all(self.coding_answers[i] is not None for i in self.coding_answers)
297
        return res
298
299
    def __setitem__(self, key, value):
300
        self.coding_answers[key] = value
301
        self.project.store_answer(self.question, key, value, self.id)
302
303
    def __repr__(self):
304
        return "\n".join(["Coding unit: {} -> {}".format(self.question, self.answer)] + [
305
            "-{}\n\t-> {}".format(i, self.coding_answers.get(i, None)) for i in self.coding_questions])
306