1 | import csv |
||
2 | import re |
||
3 | import sys |
||
4 | import pickle |
||
5 | from itertools import chain |
||
6 | |||
7 | import os |
||
8 | from collections import namedtuple |
||
9 | |||
10 | import bottlechest as bn |
||
0 ignored issues
–
show
|
|||
11 | import numpy as np |
||
0 ignored issues
–
show
The import
numpy could not be resolved.
This can be caused by one of the following: 1. Missing DependenciesThis error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml
before_commands:
- sudo pip install abc # Python2
- sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use
the command for the correct version.
2. Missing __init__.py filesThis error could also result from missing ![]() |
|||
12 | from scipy import sparse |
||
0 ignored issues
–
show
The import
scipy could not be resolved.
This can be caused by one of the following: 1. Missing DependenciesThis error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml
before_commands:
- sudo pip install abc # Python2
- sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use
the command for the correct version.
2. Missing __init__.py filesThis error could also result from missing ![]() |
|||
13 | # We are not loading openpyxl here since it takes some time |
||
14 | |||
15 | from Orange.data import Domain |
||
16 | from Orange.data.variable import * |
||
0 ignored issues
–
show
|
|||
17 | |||
18 | |||
19 | # A singleton simulated with a class |
||
20 | class FileFormats: |
||
21 | formats = [] |
||
22 | names = {} |
||
23 | writers = {} |
||
24 | readers = {} |
||
25 | img_writers = {} |
||
26 | graph_writers = {} |
||
27 | |||
28 | @classmethod |
||
29 | def register(cls, name, extension): |
||
30 | def f(format): |
||
0 ignored issues
–
show
|
|||
31 | cls.NAME = name |
||
32 | cls.formats.append(format) |
||
33 | cls.names[extension] = name |
||
34 | if hasattr(format, "write_file"): |
||
35 | cls.writers[extension] = format |
||
36 | if hasattr(format, "read_file"): |
||
37 | cls.readers[extension] = format |
||
38 | if hasattr(format, "write_image"): |
||
39 | cls.img_writers[extension] = format |
||
40 | if hasattr(format, "write_graph"): |
||
41 | cls.graph_writers[extension] = format |
||
42 | return format |
||
43 | |||
44 | return f |
||
45 | |||
46 | |||
47 | class FileReader: |
||
48 | def prescan_file(self, f, delim, nvars, disc_cols, cont_cols): |
||
0 ignored issues
–
show
This method could be written as a function/class method.
If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo:
def some_method(self, x, y):
return x + y;
could be written as class Foo:
@classmethod
def some_method(cls, x, y):
return x + y;
![]() |
|||
49 | values = [set() for _ in range(nvars)] |
||
50 | decimals = [-1] * nvars |
||
51 | for lne in f: |
||
52 | lne = lne.split(delim) |
||
53 | for vs, col in zip(values, disc_cols): |
||
54 | vs[col].add(lne[col]) |
||
55 | for col in cont_cols: |
||
56 | val = lne[col] |
||
57 | if not col in Variable._DefaultUnknownStr and "." in val: |
||
0 ignored issues
–
show
It seems like
_DefaultUnknownStr was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
58 | decs = len(val) - val.find(".") - 1 |
||
59 | if decs > decimals[col]: |
||
60 | decimals[col] = decs |
||
61 | return values, decimals |
||
62 | |||
63 | |||
64 | @FileFormats.register("Tab-delimited file", ".tab") |
||
65 | class TabDelimFormat: |
||
66 | non_escaped_spaces = re.compile(r"(?<!\\) +") |
||
67 | |||
68 | def read_header(self, f): |
||
69 | f.seek(0) |
||
70 | names = [x.strip() for x in f.readline().strip("\n\r").split("\t")] |
||
71 | types = [x.strip() for x in f.readline().strip("\n\r").split("\t")] |
||
72 | flags = [x.strip() for x in f.readline().strip("\n\r").split("\t")] |
||
73 | self.n_columns = len(names) |
||
0 ignored issues
–
show
|
|||
74 | if len(types) != self.n_columns: |
||
75 | raise ValueError("File contains %i variable names and %i types" % |
||
76 | (len(names), len(types))) |
||
77 | if len(flags) > self.n_columns: |
||
78 | raise ValueError("There are more flags than variables") |
||
79 | else: |
||
80 | flags += [""] * self.n_columns |
||
81 | |||
82 | attributes = [] |
||
83 | class_vars = [] |
||
84 | metas = [] |
||
85 | |||
86 | self.attribute_columns = [] |
||
0 ignored issues
–
show
|
|||
87 | self.classvar_columns = [] |
||
0 ignored issues
–
show
|
|||
88 | self.meta_columns = [] |
||
0 ignored issues
–
show
|
|||
89 | self.weight_column = -1 |
||
0 ignored issues
–
show
|
|||
90 | self.basket_column = -1 |
||
0 ignored issues
–
show
|
|||
91 | |||
92 | for col, (name, tpe, flag) in enumerate(zip(names, types, flags)): |
||
93 | tpe = tpe.strip() |
||
94 | flag = self.non_escaped_spaces.split(flag) |
||
95 | flag = [f.replace("\\ ", " ") for f in flag] |
||
96 | if "i" in flag or "ignore" in flag: |
||
97 | continue |
||
98 | if "b" in flag or "basket" in flag: |
||
99 | self.basket_column = col |
||
0 ignored issues
–
show
|
|||
100 | continue |
||
101 | is_class = "class" in flag |
||
102 | is_meta = "m" in flag or "meta" in flag or tpe in ["s", "string"] |
||
103 | is_weight = "w" in flag or "weight" in flag \ |
||
104 | or tpe in ["w", "weight"] |
||
105 | |||
106 | attrs = [f.split("=", 1) for f in flag if "=" in f] |
||
107 | |||
108 | if is_weight: |
||
109 | if is_class: |
||
110 | raise ValueError("Variable {} (column {}) is marked as " |
||
111 | "class and weight".format(name, col)) |
||
112 | self.weight_column = col |
||
0 ignored issues
–
show
|
|||
113 | continue |
||
114 | |||
115 | if tpe in ["c", "continuous"]: |
||
116 | var = ContinuousVariable.make(name) |
||
117 | elif tpe in ["w", "weight"]: |
||
118 | var = None |
||
119 | elif tpe in ["d", "discrete"]: |
||
120 | var = DiscreteVariable() # no name to bypass caching |
||
121 | var.name = name |
||
122 | var.fix_order = True |
||
123 | elif tpe in ["s", "string"]: |
||
124 | var = StringVariable.make(name) |
||
125 | else: |
||
126 | values = [v.replace("\\ ", " ") |
||
127 | for v in self.non_escaped_spaces.split(tpe)] |
||
128 | var = DiscreteVariable.make(name, values, True) |
||
129 | var.attributes.update(attrs) |
||
130 | |||
131 | if is_class: |
||
132 | if is_meta: |
||
133 | raise ValueError( |
||
134 | "Variable {} (column {}) is marked as " |
||
135 | "class and meta attribute".format(name, col)) |
||
136 | class_vars.append(var) |
||
137 | self.classvar_columns.append((col, var.val_from_str_add)) |
||
138 | elif is_meta: |
||
139 | metas.append(var) |
||
140 | self.meta_columns.append((col, var.val_from_str_add)) |
||
141 | else: |
||
142 | attributes.append(var) |
||
143 | self.attribute_columns.append((col, var.val_from_str_add)) |
||
144 | |||
145 | domain = Domain(attributes, class_vars, metas) |
||
146 | return domain |
||
147 | |||
148 | def count_lines(self, file): |
||
0 ignored issues
–
show
This method could be written as a function/class method.
If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo:
def some_method(self, x, y):
return x + y;
could be written as class Foo:
@classmethod
def some_method(cls, x, y):
return x + y;
![]() |
|||
149 | file.seek(0) |
||
150 | i = -3 |
||
151 | for _ in file: |
||
152 | i += 1 |
||
153 | return i |
||
154 | |||
155 | def read_data(self, f, table): |
||
156 | X, Y = table.X, table._Y |
||
0 ignored issues
–
show
It seems like
_Y was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
157 | W = table.W if table.W.shape[-1] else None |
||
158 | f.seek(0) |
||
159 | f.readline() |
||
160 | f.readline() |
||
161 | f.readline() |
||
162 | padding = [""] * self.n_columns |
||
163 | if self.basket_column >= 0: |
||
164 | # TODO how many columns?! |
||
0 ignored issues
–
show
|
|||
165 | table._Xsparse = sparse.lil_matrix(len(X), 100) |
||
0 ignored issues
–
show
It seems like
_Xsparse was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
166 | table.metas = metas = ( |
||
167 | np.empty((len(X), len(self.meta_columns)), dtype=object)) |
||
168 | line_count = 0 |
||
169 | Xr = None |
||
170 | for lne in f: |
||
171 | values = lne |
||
172 | if not values.strip(): |
||
173 | continue |
||
174 | values = values.split("\t") |
||
175 | if len(values) > self.n_columns: |
||
176 | raise ValueError("Too many columns in line {}". |
||
177 | format(4 + line_count)) |
||
178 | elif len(values) < self.n_columns: |
||
179 | values += padding |
||
180 | if self.attribute_columns: |
||
181 | Xr = X[line_count] |
||
182 | for i, (col, reader) in enumerate(self.attribute_columns): |
||
183 | Xr[i] = reader(values[col].strip()) |
||
184 | for i, (col, reader) in enumerate(self.classvar_columns): |
||
185 | Y[line_count, i] = reader(values[col].strip()) |
||
186 | if W is not None: |
||
187 | W[line_count] = float(values[self.weight_column]) |
||
188 | for i, (col, reader) in enumerate(self.meta_columns): |
||
189 | metas[line_count, i] = reader(values[col].strip()) |
||
190 | line_count += 1 |
||
191 | if line_count != len(X): |
||
0 ignored issues
–
show
This code seems to be duplicated in your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. ![]() |
|||
192 | del Xr, X, Y, W, metas |
||
193 | table.X.resize(line_count, len(table.domain.attributes)) |
||
194 | table._Y.resize(line_count, len(table.domain.class_vars)) |
||
0 ignored issues
–
show
It seems like
_Y was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
195 | if table.W.ndim == 1: |
||
196 | table.W.resize(line_count) |
||
197 | else: |
||
198 | table.W.resize((line_count, 0)) |
||
199 | table.metas.resize((line_count, len(self.meta_columns))) |
||
200 | table.n_rows = line_count |
||
201 | |||
202 | def reorder_values_array(self, arr, variables): |
||
0 ignored issues
–
show
This method could be written as a function/class method.
If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo:
def some_method(self, x, y):
return x + y;
could be written as class Foo:
@classmethod
def some_method(cls, x, y):
return x + y;
![]() |
|||
203 | newvars = [] |
||
204 | for col, var in enumerate(variables): |
||
205 | if getattr(var, "fix_order", False): |
||
206 | nvar = var.make(var.name, var.values, var.ordered) |
||
207 | nvar.attributes = var.attributes |
||
208 | move = len(var.values) |
||
209 | if nvar.values != var.values: |
||
210 | arr[:, col] += move |
||
211 | for i, val in enumerate(var.values): |
||
212 | bn.replace(arr[:, col], move + i, nvar.values.index(val)) |
||
213 | var = nvar |
||
214 | newvars.append(var) |
||
215 | return newvars |
||
216 | |||
217 | def reorder_values(self, table): |
||
218 | attrs = self.reorder_values_array(table.X, table.domain.attributes) |
||
219 | classes = self.reorder_values_array(table._Y, table.domain.class_vars) |
||
0 ignored issues
–
show
It seems like
_Y was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
220 | metas = self.reorder_values_array(table.metas, table.domain.metas) |
||
221 | table.domain = Domain(attrs, classes, metas=metas) |
||
222 | |||
223 | def read_file(self, filename, cls=None): |
||
224 | with open(filename) as file: |
||
225 | return self._read_file(file, cls) |
||
226 | |||
227 | def _read_file(self, file, cls=None): |
||
228 | from ..data import Table |
||
229 | |||
230 | if cls is None: |
||
231 | cls = Table |
||
232 | domain = self.read_header(file) |
||
233 | nExamples = self.count_lines(file) |
||
234 | table = cls.from_domain(domain, nExamples, self.weight_column >= 0) |
||
235 | self.read_data(file, table) |
||
236 | self.reorder_values(table) |
||
237 | return table |
||
238 | |||
239 | @classmethod |
||
240 | def _write_fast(cls, f, data): |
||
241 | wa = [var.str_val for var in data.domain.variables + data.domain.metas] |
||
242 | for Xi, Yi, Mi in zip(data.X, data._Y, data.metas): |
||
0 ignored issues
–
show
It seems like
_Y was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
243 | f.write("\t".join(w(val) for val, w in zip(chain(Xi, Yi, Mi), wa))) |
||
244 | f.write("\n") |
||
245 | |||
246 | @classmethod |
||
247 | def write_file(cls, filename, data): |
||
248 | """ |
||
249 | Save data to file. |
||
250 | |||
251 | Function uses fast implementation in case of numpy data, and slower |
||
252 | fall-back for general storage. |
||
253 | |||
254 | :param filename: the name of the file |
||
255 | :type filename: str |
||
256 | :param data: the data to be saved |
||
257 | :type data: Orange.data.Storage |
||
258 | """ |
||
259 | if isinstance(filename, str): |
||
260 | f = open(filename, "w") |
||
261 | else: |
||
262 | f = filename |
||
263 | domain_vars = data.domain.variables + data.domain.metas |
||
264 | # first line |
||
265 | f.write("\t".join([str(j.name) for j in domain_vars])) |
||
266 | f.write("\n") |
||
267 | |||
268 | # second line |
||
269 | # TODO Basket column. |
||
0 ignored issues
–
show
|
|||
270 | t = {"ContinuousVariable": "c", "DiscreteVariable": "d", |
||
271 | "StringVariable": "string", "Basket": "basket"} |
||
272 | |||
273 | f.write("\t".join([t[type(j).__name__] for j in domain_vars])) |
||
274 | f.write("\n") |
||
275 | |||
276 | # third line |
||
277 | m = list(data.domain.metas) |
||
278 | c = list(data.domain.class_vars) |
||
279 | r = [] |
||
280 | for i in domain_vars: |
||
281 | r1 = ["{}={}".format(k, v).replace(" ", "\\ ") |
||
282 | for k, v in i.attributes.items()] |
||
283 | if i in m: |
||
284 | r1.append("m") |
||
285 | elif i in c: |
||
286 | r1.append("class") |
||
287 | r.append(" ".join(r1)) |
||
288 | f.write("\t".join(r)) |
||
289 | f.write("\n") |
||
290 | |||
291 | # data |
||
292 | # noinspection PyBroadException |
||
293 | try: |
||
294 | cls._write_fast(f, data) |
||
295 | except: |
||
0 ignored issues
–
show
General except handlers without types should be used sparingly.
Typically, you would use general except handlers when you intend to specifically handle all types of errors, f.e. when logging. Otherwise, such general error handlers can mask errors in your application that you want to know of. ![]() |
|||
296 | domain_vars = [data.domain.index(var) for var in domain_vars] |
||
297 | for i in data: |
||
298 | f.write("\t".join(str(i[j]) for j in domain_vars) + "\n") |
||
299 | f.close() |
||
300 | |||
301 | def write(self, filename, data): |
||
302 | self.write_file(filename, data) |
||
303 | |||
304 | |||
305 | @FileFormats.register("Comma-separated file", ".csv") |
||
306 | class TxtFormat: |
||
307 | MISSING_VALUES = frozenset({"", "NA", "?"}) |
||
308 | |||
309 | @staticmethod |
||
310 | def read_header(file, delimiter=None): |
||
311 | first_line = file.readline() |
||
312 | file.seek(0) |
||
313 | if delimiter is None: |
||
314 | for delimiter in "\t,; ": |
||
315 | if delimiter in first_line: |
||
316 | break |
||
317 | else: |
||
318 | delimiter = None |
||
319 | if delimiter == " ": |
||
320 | delimiter = None |
||
321 | atoms = first_line.split(delimiter) |
||
322 | try: |
||
323 | [float(atom) for atom in set(atoms) - TxtFormat.MISSING_VALUES] |
||
0 ignored issues
–
show
The expression
[float(atom) for atom in...Format.MISSING_VALUES)] does not seem to have sideeffects and its result is not used.
If a expression has no sideeffects (any lasting effect after it has been called) and its return value is not used, this usually means that this code can be removed or that an assignment is missing. ![]() |
|||
324 | header_lines = 0 |
||
325 | names = ["Var{:04}".format(i + 1) for i in range(len(atoms))] |
||
326 | except ValueError: |
||
327 | names = [atom.strip() for atom in atoms] |
||
328 | header_lines = 1 |
||
329 | domain = Domain([ContinuousVariable.make(name) for name in names]) |
||
330 | return domain, header_lines, delimiter |
||
331 | |||
332 | def read_file(self, filename, cls=None): |
||
333 | from ..data import Table |
||
334 | |||
335 | if cls is None: |
||
336 | cls = Table |
||
337 | with open(filename, "rt") as file: |
||
338 | domain, header_lines, delimiter = self.read_header(file) |
||
339 | with open(filename, "rb") as file: |
||
340 | arr = np.genfromtxt(file, delimiter=delimiter, |
||
341 | skip_header=header_lines, |
||
342 | missing_values=self.MISSING_VALUES) |
||
343 | table = cls.from_numpy(domain, arr) |
||
344 | return table |
||
345 | |||
346 | @classmethod |
||
347 | def csv_saver(cls, filename, data, delimiter='\t'): |
||
348 | with open(filename, 'w') as csvfile: |
||
349 | writer = csv.writer(csvfile, delimiter=delimiter) |
||
350 | all_vars = data.domain.variables + data.domain.metas |
||
351 | writer.writerow([v.name for v in all_vars]) # write variable names |
||
352 | if delimiter == '\t': |
||
353 | flags = ([''] * len(data.domain.attributes)) + \ |
||
354 | (['class'] * len(data.domain.class_vars)) + \ |
||
355 | (['m'] * len(data.domain.metas)) |
||
356 | |||
357 | for i, var in enumerate(all_vars): |
||
358 | attrs = ["{0!s}={1!s}".format(*item).replace(" ", "\\ ") |
||
359 | for item in var.attributes.items()] |
||
360 | if attrs: |
||
361 | flags[i] += (" " if flags[i] else "") + (" ".join(attrs)) |
||
362 | |||
363 | writer.writerow([type(v).__name__.replace("Variable", "").lower() |
||
364 | for v in all_vars]) # write variable types |
||
365 | writer.writerow(flags) # write flags |
||
366 | for ex in data: # write examples |
||
367 | writer.writerow(ex) |
||
368 | |||
369 | @classmethod |
||
370 | def write_file(cls, filename, data): |
||
371 | cls.csv_saver(filename, data, ',') |
||
372 | |||
373 | def write(self, filename, data): |
||
374 | self.write_file(filename, data) |
||
375 | |||
376 | |||
377 | @FileFormats.register("Basket file", ".basket") |
||
378 | class BasketFormat: |
||
379 | @classmethod |
||
380 | def read_file(cls, filename, storage_class=None): |
||
381 | from Orange.data import _io |
||
382 | |||
383 | if storage_class is None: |
||
384 | from ..data import Table as storage_class |
||
385 | |||
386 | def constr_vars(inds): |
||
387 | if inds: |
||
388 | return [ContinuousVariable(x.decode("utf-8")) for _, x in |
||
389 | sorted((ind, name) for name, ind in inds.items())] |
||
390 | |||
391 | X, Y, metas, attr_indices, class_indices, meta_indices = \ |
||
392 | _io.sparse_read_float(filename.encode(sys.getdefaultencoding())) |
||
393 | |||
394 | attrs = constr_vars(attr_indices) |
||
395 | classes = constr_vars(class_indices) |
||
396 | meta_attrs = constr_vars(meta_indices) |
||
397 | domain = Domain(attrs, classes, meta_attrs) |
||
398 | return storage_class.from_numpy( |
||
399 | domain, attrs and X, classes and Y, metas and meta_attrs) |
||
400 | |||
401 | |||
402 | @FileFormats.register("Excel file", ".xlsx") |
||
403 | class ExcelFormat: |
||
404 | non_escaped_spaces = re.compile(r"(?<!\\) +") |
||
405 | |||
406 | def __init__(self): |
||
407 | self.attribute_columns = [] |
||
408 | self.classvar_columns = [] |
||
409 | self.meta_columns = [] |
||
410 | self.weight_column = -1 |
||
411 | self.basket_column = -1 |
||
412 | |||
413 | self.n_columns = self.first_data_row = 0 |
||
414 | |||
415 | def open_workbook(self, f): |
||
416 | from openpyxl import load_workbook |
||
0 ignored issues
–
show
The import
openpyxl could not be resolved.
This can be caused by one of the following: 1. Missing DependenciesThis error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml
before_commands:
- sudo pip install abc # Python2
- sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use
the command for the correct version.
2. Missing __init__.py filesThis error could also result from missing ![]() |
|||
417 | |||
418 | if isinstance(f, str) and ":" in f[2:]: |
||
419 | f, sheet = f.rsplit(":", 1) |
||
420 | else: |
||
421 | sheet = None |
||
422 | wb = load_workbook(f, use_iterators=True, |
||
423 | read_only=True, data_only=True) |
||
424 | ws = wb.get_sheet_by_name(sheet) if sheet else wb.get_active_sheet() |
||
425 | self.n_columns = ws.get_highest_column() |
||
426 | return ws |
||
427 | |||
428 | # noinspection PyBroadException |
||
429 | def read_header_3(self, worksheet): |
||
430 | cols = self.n_columns |
||
431 | try: |
||
432 | names, types, flags = [ |
||
433 | [cell.value.strip() if cell.value is not None else "" |
||
434 | for cell in row] |
||
435 | for row in worksheet.get_squared_range(1, 1, cols, 3)] |
||
436 | except: |
||
0 ignored issues
–
show
General except handlers without types should be used sparingly.
Typically, you would use general except handlers when you intend to specifically handle all types of errors, f.e. when logging. Otherwise, such general error handlers can mask errors in your application that you want to know of. ![]() |
|||
437 | return False |
||
438 | if not (all(tpe in ("", "c", "d", "s", "continuous", "discrete", |
||
439 | "string", "w", "weight") or " " in tpe |
||
440 | for tpe in types) and |
||
441 | all(flg in ("", "i", "ignore", "m", "meta", "w", "weight", |
||
442 | "b", "basket", "class") or "=" in flg |
||
443 | for flg in flags)): |
||
444 | return False |
||
445 | attributes = [] |
||
446 | class_vars = [] |
||
447 | metas = [] |
||
448 | for col, (name, tpe, flag) in enumerate(zip(names, types, flags)): |
||
449 | flag = self.non_escaped_spaces.split(flag) |
||
450 | if "i" in flag or "ignore" in flag: |
||
451 | continue |
||
452 | if "b" in flag or "basket" in flag: |
||
453 | self.basket_column = col |
||
454 | continue |
||
455 | is_class = "class" in flag |
||
456 | is_meta = "m" in flag or "meta" in flag or tpe in ["s", "string"] |
||
457 | is_weight = "w" in flag or "weight" in flag \ |
||
458 | or tpe in ["w", "weight"] |
||
459 | attrs = [f.split("=", 1) for f in flag if "=" in f] |
||
460 | if is_weight: |
||
461 | if is_class: |
||
462 | raise ValueError("Variable {} (column {}) is marked as " |
||
463 | "class and weight".format(name, col + 1)) |
||
464 | self.weight_column = col |
||
465 | continue |
||
466 | if tpe in ["c", "continuous"]: |
||
467 | var = ContinuousVariable.make(name) |
||
468 | elif tpe in ["w", "weight"]: |
||
469 | var = None |
||
470 | elif tpe in ["d", "discrete"]: |
||
471 | var = DiscreteVariable.make(name) |
||
472 | var.fix_order = True |
||
473 | elif tpe in ["s", "string"]: |
||
474 | var = StringVariable.make(name) |
||
475 | else: |
||
476 | values = [v.replace("\\ ", " ") |
||
477 | for v in self.non_escaped_spaces.split(tpe)] |
||
478 | var = DiscreteVariable.make(name, values, True) |
||
479 | var.attributes.update(attrs) |
||
480 | if is_class: |
||
481 | if is_meta: |
||
482 | raise ValueError( |
||
483 | "Variable {} (column {}) is marked as " |
||
484 | "class and meta attribute".format(name, col)) |
||
485 | class_vars.append(var) |
||
486 | self.classvar_columns.append((col, var.val_from_str_add)) |
||
487 | elif is_meta: |
||
488 | metas.append(var) |
||
489 | self.meta_columns.append((col, var.val_from_str_add)) |
||
490 | else: |
||
491 | attributes.append(var) |
||
492 | self.attribute_columns.append((col, var.val_from_str_add)) |
||
493 | self.first_data_row = 4 |
||
494 | return Domain(attributes, class_vars, metas) |
||
495 | |||
496 | # noinspection PyBroadException |
||
497 | def read_header_0(self, worksheet): |
||
498 | try: |
||
499 | [float(cell.value) if cell.value is not None else None |
||
500 | for cell in |
||
501 | worksheet.get_squared_range(1, 1, self.n_columns, 3).__next__()] |
||
502 | except: |
||
0 ignored issues
–
show
General except handlers without types should be used sparingly.
Typically, you would use general except handlers when you intend to specifically handle all types of errors, f.e. when logging. Otherwise, such general error handlers can mask errors in your application that you want to know of. ![]() |
|||
503 | return False |
||
504 | self.first_data_row = 1 |
||
505 | attrs = [ContinuousVariable.make("Var{:04}".format(i + 1)) |
||
506 | for i in range(self.n_columns)] |
||
507 | self.attribute_columns = [(i, var.val_from_str_add) |
||
508 | for i, var in enumerate(attrs)] |
||
509 | return Domain(attrs) |
||
510 | |||
511 | def read_header_1(self, worksheet): |
||
512 | import openpyxl.cell.cell |
||
0 ignored issues
–
show
The import
openpyxl.cell.cell could not be resolved.
This can be caused by one of the following: 1. Missing DependenciesThis error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml
before_commands:
- sudo pip install abc # Python2
- sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use
the command for the correct version.
2. Missing __init__.py filesThis error could also result from missing ![]() |
|||
513 | |||
514 | if worksheet.get_highest_column() < 2 or \ |
||
515 | worksheet.get_highest_row() < 2: |
||
516 | return False |
||
517 | cols = self.n_columns |
||
518 | names = [cell.value.strip() if cell.value is not None else "" |
||
519 | for cell in |
||
520 | worksheet.get_squared_range(1, 1, cols, 3).__next__()] |
||
521 | row2 = list(worksheet.get_squared_range(1, 2, cols, 3).__next__()) |
||
522 | attributes = [] |
||
523 | class_vars = [] |
||
524 | metas = [] |
||
525 | for col, name in enumerate(names): |
||
526 | if "#" in name: |
||
527 | flags, name = name.split("#", 1) |
||
528 | else: |
||
529 | flags = "" |
||
530 | if "i" in flags: |
||
531 | continue |
||
532 | if "b" in flags: |
||
533 | self.basket_column = col |
||
534 | continue |
||
535 | is_class = "c" in flags |
||
536 | is_meta = "m" in flags or "s" in flags |
||
537 | is_weight = "W" in flags or "w" in flags |
||
538 | if is_weight: |
||
539 | if is_class: |
||
540 | raise ValueError("Variable {} (column {}) is marked as " |
||
541 | "class and weight".format(name, col)) |
||
542 | self.weight_column = col |
||
543 | continue |
||
544 | if "C" in flags: |
||
545 | var = ContinuousVariable.make(name) |
||
546 | elif is_weight: |
||
547 | var = None |
||
548 | elif "D" in flags: |
||
549 | var = DiscreteVariable.make(name) |
||
550 | var.fix_order = True |
||
551 | elif "S" in flags: |
||
552 | var = StringVariable.make(name) |
||
553 | elif row2[col].data_type == "n": |
||
554 | var = ContinuousVariable.make(name) |
||
555 | else: |
||
556 | if len(set(row[col].value for row in worksheet.rows)) > 20: |
||
557 | var = StringVariable.make(name) |
||
558 | is_meta = True |
||
559 | else: |
||
560 | var = DiscreteVariable.make(name) |
||
561 | var.fix_order = True |
||
562 | if is_class: |
||
563 | if is_meta: |
||
564 | raise ValueError( |
||
565 | "Variable {} (column {}) is marked as " |
||
566 | "class and meta attribute".format( |
||
567 | name, openpyxl.cell.cell.get_column_letter(col + 1)) |
||
568 | ) |
||
569 | class_vars.append(var) |
||
570 | self.classvar_columns.append((col, var.val_from_str_add)) |
||
571 | elif is_meta: |
||
572 | metas.append(var) |
||
573 | self.meta_columns.append((col, var.val_from_str_add)) |
||
574 | else: |
||
575 | attributes.append(var) |
||
576 | self.attribute_columns.append((col, var.val_from_str_add)) |
||
577 | if attributes and not class_vars: |
||
578 | class_vars.append(attributes.pop(-1)) |
||
579 | self.classvar_columns.append(self.attribute_columns.pop(-1)) |
||
580 | self.first_data_row = 2 |
||
581 | return Domain(attributes, class_vars, metas) |
||
582 | |||
583 | def read_header(self, worksheet): |
||
584 | domain = self.read_header_3(worksheet) or \ |
||
585 | self.read_header_0(worksheet) or \ |
||
586 | self.read_header_1(worksheet) |
||
587 | if domain is False: |
||
588 | raise ValueError("Invalid header") |
||
589 | return domain |
||
590 | |||
591 | # noinspection PyPep8Naming,PyProtectedMember |
||
592 | def read_data(self, worksheet, table): |
||
593 | X, Y = table.X, table._Y |
||
0 ignored issues
–
show
It seems like
_Y was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
594 | W = table.W if table.W.shape[-1] else None |
||
595 | if self.basket_column >= 0: |
||
596 | # TODO how many columns?! |
||
0 ignored issues
–
show
|
|||
597 | table._Xsparse = sparse.lil_matrix(len(X), 100) |
||
0 ignored issues
–
show
It seems like
_Xsparse was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
598 | table.metas = metas = ( |
||
599 | np.empty((len(X), len(self.meta_columns)), dtype=object)) |
||
600 | sheet_rows = worksheet.rows |
||
601 | for _ in range(1, self.first_data_row): |
||
602 | sheet_rows.__next__() |
||
603 | line_count = 0 |
||
604 | Xr = None |
||
605 | for row in sheet_rows: |
||
606 | values = [cell.value for cell in row] |
||
607 | if all(value is None for value in values): |
||
608 | continue |
||
609 | if self.attribute_columns: |
||
610 | Xr = X[line_count] |
||
611 | for i, (col, reader) in enumerate(self.attribute_columns): |
||
612 | v = values[col] |
||
613 | Xr[i] = reader(v.strip() if isinstance(v, str) else v) |
||
614 | for i, (col, reader) in enumerate(self.classvar_columns): |
||
615 | v = values[col] |
||
616 | Y[line_count, i] = reader( |
||
617 | v.strip() if isinstance(v, str) else v) |
||
618 | if W is not None: |
||
619 | W[line_count] = float(values[self.weight_column]) |
||
620 | for i, (col, reader) in enumerate(self.meta_columns): |
||
621 | v = values[col] |
||
622 | metas[line_count, i] = reader( |
||
623 | v.strip() if isinstance(v, str) else v) |
||
624 | line_count += 1 |
||
625 | if line_count != len(X): |
||
0 ignored issues
–
show
This code seems to be duplicated in your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. ![]() |
|||
626 | del Xr, X, Y, W, metas |
||
627 | table.X.resize(line_count, len(table.domain.attributes)) |
||
628 | table._Y.resize(line_count, len(table.domain.class_vars)) |
||
0 ignored issues
–
show
It seems like
_Y was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
629 | if table.W.ndim == 1: |
||
630 | table.W.resize(line_count) |
||
631 | else: |
||
632 | table.W.resize((line_count, 0)) |
||
633 | table.metas.resize((line_count, len(self.meta_columns))) |
||
634 | table.n_rows = line_count |
||
635 | |||
636 | # noinspection PyUnresolvedReferences |
||
637 | @staticmethod |
||
638 | def reorder_values_array(arr, variables): |
||
639 | for col, var in enumerate(variables): |
||
640 | if getattr(var, "fix_order", False) and len(var.values) < 1000: |
||
641 | new_order = var.ordered_values(var.values) |
||
642 | if new_order == var.values: |
||
643 | continue |
||
644 | arr[:, col] += 1000 |
||
645 | for i, val in enumerate(var.values): |
||
646 | bn.replace(arr[:, col], 1000 + i, new_order.index(val)) |
||
647 | var.values = new_order |
||
648 | delattr(var, "fix_order") |
||
649 | |||
650 | # noinspection PyProtectedMember |
||
651 | def reorder_values(self, table): |
||
652 | self.reorder_values_array(table.X, table.domain.attributes) |
||
653 | self.reorder_values_array(table._Y, table.domain.class_vars) |
||
0 ignored issues
–
show
It seems like
_Y was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
654 | self.reorder_values_array(table.metas, table.domain.metas) |
||
655 | |||
656 | def read_file(self, file, cls=None): |
||
657 | from Orange.data import Table |
||
658 | |||
659 | if cls is None: |
||
660 | cls = Table |
||
661 | worksheet = self.open_workbook(file) |
||
662 | domain = self.read_header(worksheet) |
||
663 | table = cls.from_domain( |
||
664 | domain, |
||
665 | worksheet.get_highest_row() - self.first_data_row + 1, |
||
666 | self.weight_column >= 0) |
||
667 | self.read_data(worksheet, table) |
||
668 | self.reorder_values(table) |
||
669 | return table |
||
670 | |||
671 | |||
672 | @FileFormats.register("Pickled table", ".pickle") |
||
673 | class PickleFormat: |
||
674 | @classmethod |
||
675 | def read_file(cls, file, _=None): |
||
676 | with open(file, "rb") as f: |
||
677 | return pickle.load(f) |
||
678 | |||
679 | @classmethod |
||
680 | def write_file(cls, filename, table): |
||
681 | with open(filename, "wb") as f: |
||
682 | pickle.dump(table, f) |
||
683 | |||
684 | def write(self, filename, table): |
||
685 | self.write_file(filename, table) |
||
686 | |||
687 | |||
688 | @FileFormats.register("Dot Tree File", ".dot") |
||
689 | class DotFormat: |
||
690 | @classmethod |
||
691 | def write_graph(cls, filename, graph): |
||
692 | from sklearn import tree |
||
0 ignored issues
–
show
The import
sklearn could not be resolved.
This can be caused by one of the following: 1. Missing DependenciesThis error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml
before_commands:
- sudo pip install abc # Python2
- sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use
the command for the correct version.
2. Missing __init__.py filesThis error could also result from missing ![]() |
|||
693 | |||
694 | tree.export_graphviz(graph, out_file=filename) |
||
695 | |||
696 | def write(self, filename, tree): |
||
697 | if type(tree) == dict: |
||
698 | tree = tree['tree'] |
||
699 | self.write_graph(filename, tree) |
||
700 | |||
701 | |||
702 | @FileFormats.register("Fixed width textfile", ".fixed") |
||
703 | class FixedWidthFormat(TabDelimFormat): |
||
704 | """ |
||
705 | FixedWidthFormat reads tables from files where the columns have a |
||
706 | fixed width. The cells are space-padded to the left. |
||
707 | See datasets/glass.fixed and tests/test_fixedwidth_reader.py |
||
708 | |||
0 ignored issues
–
show
|
|||
709 | It is possible to determine the exact cell location of a specific |
||
710 | table cell within the file because of the fixed width columns. |
||
711 | This allows the FixedWidthFormat to be used with the LazyFile |
||
712 | widget to 'read' extremely large files. |
||
713 | |||
0 ignored issues
–
show
|
|||
714 | TODO: |
||
715 | - Add read_row() without reading entire file. |
||
716 | - Allow spaces in column names and cell values. |
||
717 | - Ensure compatibility with all tables in the tests directory. |
||
718 | - Do metas and class properly. |
||
719 | """ |
||
720 | def read_ends_columns(self, filename): |
||
0 ignored issues
–
show
This method could be written as a function/class method.
If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo:
def some_method(self, x, y):
return x + y;
could be written as class Foo:
@classmethod
def some_method(cls, x, y):
return x + y;
![]() |
|||
721 | """ |
||
722 | Returns the location where each column ends in a line in the |
||
723 | file. |
||
724 | TODO: |
||
725 | - Cleanup. |
||
726 | """ |
||
727 | ColumnInfo = namedtuple( |
||
728 | 'ColumnInfo', |
||
729 | ['name', 'tpe', 'flag', 'start', 'end', 'width', 'index'], |
||
730 | ) |
||
731 | with open(filename) as f: |
||
732 | f.seek(0) |
||
733 | l_names = f.readline() |
||
734 | l_types = f.readline() |
||
735 | l_flags = f.readline() |
||
736 | types = l_types.split() |
||
737 | ends = [] |
||
738 | for n in types: |
||
739 | position_start = ends[-1] if len(ends) else 0 |
||
740 | end = (" "+l_types.replace("\n"," ")).find(" "+n+" ", position_start) + len(n) |
||
741 | ends.append(end) |
||
742 | info_columns = [ |
||
743 | ColumnInfo( |
||
744 | name=l_names[start:end].strip(), |
||
745 | flag=l_flags[start:end].strip(), |
||
746 | tpe=tpe, |
||
747 | start=start, |
||
748 | end=end, |
||
749 | width=end-start, |
||
750 | index=inde, |
||
751 | ) for (inde, (start, end, tpe)) in enumerate(zip( |
||
752 | [0] + ends[:-1], |
||
753 | ends, |
||
754 | types, |
||
755 | )) |
||
756 | ] |
||
757 | return info_columns |
||
758 | |||
0 ignored issues
–
show
|
|||
759 | def read_header(self, filename): |
||
760 | """ |
||
761 | Reads the header of the fixed width file and returns the |
||
762 | Domain of the table. |
||
763 | |||
0 ignored issues
–
show
|
|||
764 | TODO: |
||
765 | - Use read_ends_columns() to determine the width of the |
||
766 | columns and use that to parse the lines, because this |
||
767 | will allow the use of spaces in column names. |
||
768 | """ |
||
769 | ends = self.read_ends_columns(filename) |
||
770 | names = [end.name for end in ends] |
||
771 | types = [end.tpe for end in ends] |
||
772 | flags = [end.flag for end in ends] |
||
773 | with open(filename) as f: |
||
774 | # Function based on read_header from TabDelimReader. |
||
775 | f.seek(0) |
||
776 | #names = f.readline().strip("\n\r").split() |
||
777 | #types = f.readline().strip("\n\r").split() |
||
778 | #flags = f.readline().strip("\n\r").split() |
||
779 | f.readline() |
||
780 | f.readline() |
||
781 | f.readline() |
||
782 | # Changed split on "\t" to split on spaces. |
||
783 | self.n_columns = len(names) |
||
0 ignored issues
–
show
|
|||
784 | if len(types) != self.n_columns: |
||
785 | raise ValueError("File contains %i variable names and %i types" % |
||
786 | (len(names), len(types))) |
||
787 | if len(flags) > self.n_columns: |
||
788 | raise ValueError("There are more flags than variables") |
||
789 | else: |
||
790 | flags += [""] * self.n_columns |
||
791 | attributes = [] |
||
792 | class_vars = [] |
||
793 | metas = [] |
||
794 | self.attribute_columns = [] |
||
0 ignored issues
–
show
|
|||
795 | self.classvar_columns = [] |
||
0 ignored issues
–
show
|
|||
796 | self.meta_columns = [] |
||
0 ignored issues
–
show
|
|||
797 | self.weight_column = -1 |
||
0 ignored issues
–
show
|
|||
798 | self.basket_column = -1 |
||
0 ignored issues
–
show
|
|||
799 | for col, (name, tpe, flag) in enumerate(zip(names, types, flags)): |
||
800 | tpe = tpe.strip() |
||
801 | flag = flag.split() |
||
802 | if "i" in flag or "ignore" in flag: |
||
803 | continue |
||
804 | if "b" in flag or "basket" in flag: |
||
805 | self.basket_column = col |
||
0 ignored issues
–
show
|
|||
806 | continue |
||
807 | is_class = "class" in flag |
||
808 | is_meta = "m" in flag or "meta" in flag or tpe in ["s", "string"] |
||
809 | is_weight = "w" in flag or "weight" in flag \ |
||
810 | or tpe in ["w", "weight"] |
||
811 | if is_weight: |
||
812 | if is_class: |
||
813 | raise ValueError("Variable {} (column {}) is marked as " |
||
814 | "class and weight".format(name, col)) |
||
815 | self.weight_column = col |
||
0 ignored issues
–
show
|
|||
816 | continue |
||
817 | if tpe in ["c", "continuous"]: |
||
818 | var = ContinuousVariable.make(name) |
||
819 | elif tpe in ["w", "weight"]: |
||
820 | var = None |
||
821 | elif tpe in ["d", "discrete"]: |
||
822 | var = DiscreteVariable.make(name) |
||
823 | elif tpe in ["s", "string"]: |
||
824 | var = StringVariable.make(name) |
||
825 | else: |
||
826 | values = [v.replace("\\ ", " ") |
||
827 | for v in self.non_escaped_spaces.split(tpe)] |
||
828 | var = DiscreteVariable.make(name, values, True) |
||
829 | var.fix_order = (isinstance(var, DiscreteVariable) |
||
830 | and not var.values) |
||
831 | if is_class: |
||
832 | if is_meta: |
||
833 | raise ValueError( |
||
834 | "Variable {} (column {}) is marked as " |
||
835 | "class and meta attribute".format(name, col)) |
||
836 | class_vars.append(var) |
||
837 | self.classvar_columns.append((col, var.val_from_str_add)) |
||
838 | elif is_meta: |
||
839 | metas.append(var) |
||
840 | self.meta_columns.append((col, var.val_from_str_add)) |
||
841 | else: |
||
842 | attributes.append(var) |
||
843 | self.attribute_columns.append((col, var.val_from_str_add)) |
||
844 | domain = Domain(attributes, class_vars, metas) |
||
845 | return domain |
||
846 | def count_lines(self, filename): |
||
847 | """ |
||
848 | Counts the number of lines in the file. This can be done |
||
849 | without reading the entire file because the file |
||
850 | has fixed width columns. |
||
851 | """ |
||
852 | len_file = os.stat(filename).st_size |
||
853 | with open(filename) as f: |
||
854 | f.seek(0) |
||
855 | line = f.readline() |
||
856 | len_line = len(line) |
||
857 | |||
0 ignored issues
–
show
|
|||
858 | count = int(len_file / len_line) - 3 |
||
859 | return count |
||
860 | def read_cell(self, filename, index_row, name_attribute): |
||
861 | """ |
||
862 | Reads one specific cell value without reading the entire file. |
||
863 | |||
0 ignored issues
–
show
|
|||
864 | TODO: |
||
865 | - Cleanup this function. |
||
866 | - Test with discrete and class attributes. |
||
867 | - Cache the header information. |
||
868 | """ |
||
869 | info_columns = self.read_ends_columns(filename) |
||
870 | header = self.read_header(filename) |
||
0 ignored issues
–
show
|
|||
871 | with open(filename) as f: |
||
872 | f.seek(0) |
||
873 | line = f.readline() |
||
874 | len_line1 = len(line) |
||
0 ignored issues
–
show
|
|||
875 | len_line = sum(ic.width for ic in info_columns) + 1 # for \n |
||
876 | col = [ic for ic in info_columns if ic.name == name_attribute][0] |
||
877 | with open(filename) as f: |
||
878 | f.seek( (3+index_row) * len_line + col.start ) |
||
879 | value = f.read(col.width) |
||
880 | value_n = None |
||
881 | # Parse the string in the correct format. This is a kludge |
||
882 | # based on code from read_data(). |
||
883 | if self.attribute_columns: |
||
884 | for i, (coli, reader) in enumerate(self.attribute_columns): |
||
0 ignored issues
–
show
|
|||
885 | if coli == col.index: |
||
886 | value_n = reader(value.strip()) |
||
887 | for i, (coli, reader) in enumerate(self.classvar_columns): |
||
888 | if coli == col.index: |
||
889 | value_n = reader(value.strip()) |
||
890 | return value_n |
||
891 | |||
0 ignored issues
–
show
|
|||
892 | def read_data(self, filename, table): |
||
893 | """ |
||
894 | Read the data portion of the file. |
||
895 | |||
0 ignored issues
–
show
|
|||
896 | This function is based on the one in TabDelimFormat. |
||
897 | TODO: |
||
898 | - Use the actual known width of the columns instead |
||
899 | of splitting on space, because that will allow spaces |
||
900 | to be part of the cell values. |
||
901 | That is, use read_ends_columns. |
||
902 | """ |
||
0 ignored issues
–
show
|
|||
903 | with open(filename) as f: |
||
904 | #X, Y = table.X, table.Y |
||
905 | X, Y = table.X, table._Y |
||
0 ignored issues
–
show
It seems like
_Y was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
906 | W = table.W if table.W.shape[-1] else None |
||
907 | f.seek(0) |
||
908 | f.readline() |
||
909 | f.readline() |
||
910 | f.readline() |
||
911 | padding = [""] * self.n_columns |
||
912 | if self.basket_column >= 0: |
||
913 | # TODO how many columns?! |
||
0 ignored issues
–
show
|
|||
914 | table._Xsparse = sparse.lil_matrix(len(X), 100) |
||
0 ignored issues
–
show
It seems like
_Xsparse was declared protected and should not be accessed from this context.
Prefixing a member variable class MyParent:
def __init__(self):
self._x = 1;
self.y = 2;
class MyChild(MyParent):
def some_method(self):
return self._x # Ok, since accessed from a child class
class AnotherClass:
def some_method(self, instance_of_my_child):
return instance_of_my_child._x # Would be flagged as AnotherClass is not
# a child class of MyParent
![]() |
|||
915 | table.metas = metas = ( |
||
916 | np.empty((len(X), len(self.meta_columns)), dtype=object)) |
||
917 | line_count = 0 |
||
918 | Xr = None |
||
919 | for lne in f: |
||
920 | values = lne.strip() |
||
921 | if not values: |
||
922 | continue |
||
923 | # Only difference with TabDelimReader |
||
924 | #values = values.split("\t") |
||
925 | values = values.split() |
||
926 | if len(values) > self.n_columns: |
||
927 | raise ValueError("Too many columns in line {}". |
||
928 | format(4 + line_count)) |
||
929 | elif len(values) < self.n_columns: |
||
930 | values += padding |
||
931 | if self.attribute_columns: |
||
932 | Xr = X[line_count] |
||
933 | for i, (col, reader) in enumerate(self.attribute_columns): |
||
934 | Xr[i] = reader(values[col].strip()) |
||
935 | for i, (col, reader) in enumerate(self.classvar_columns): |
||
936 | Y[line_count, i] = reader(values[col].strip()) |
||
937 | if W is not None: |
||
938 | W[line_count] = float(values[self.weight_column]) |
||
939 | for i, (col, reader) in enumerate(self.meta_columns): |
||
940 | metas[line_count, i] = reader(values[col].strip()) |
||
941 | line_count += 1 |
||
942 | if line_count != len(X): |
||
0 ignored issues
–
show
This code seems to be duplicated in your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. ![]() |
|||
943 | del Xr, X, Y, W, metas |
||
944 | table.X.resize(line_count, len(table.domain.attributes)) |
||
945 | table.Y.resize(line_count, len(table.domain.class_vars)) |
||
946 | if table.W.ndim == 1: |
||
947 | table.W.resize(line_count) |
||
948 | else: |
||
949 | table.W.resize((line_count, 0)) |
||
950 | table.metas.resize((line_count, len(self.meta_columns))) |
||
951 | table.n_rows = line_count |
||
952 | def read_file(self, filename, cls=None): |
||
953 | """ |
||
954 | Read a file. |
||
955 | |||
0 ignored issues
–
show
|
|||
956 | The distinction between read_file and _read_file cannot |
||
957 | be made because we cannot get the length of a stream etc. |
||
958 | """ |
||
959 | from ..data import Table |
||
960 | if cls is None: |
||
961 | cls = Table |
||
962 | domain = self.read_header(filename) |
||
963 | nExamples = self.count_lines(filename) |
||
964 | table = cls.from_domain(domain, nExamples, self.weight_column >= 0) |
||
965 | self.read_data(filename, table) |
||
966 | self.reorder_values(table) |
||
967 | return table |
||
968 |
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.py
files in your module folders. Make sure that you place one file in each sub-folder.