Passed
Push — 2.x ( a6588e...513289 )
by Ramon
09:37
created

InstrumentCSVResultsFileParser.splitline()   A

Complexity

Conditions 1

Size

Total Lines 8
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 5
dl 0
loc 8
rs 10
c 0
b 0
f 0
cc 1
nop 3
1
# -*- coding: utf-8 -*-
2
#
3
# This file is part of SENAITE.CORE.
4
#
5
# SENAITE.CORE is free software: you can redistribute it and/or modify it under
6
# the terms of the GNU General Public License as published by the Free Software
7
# Foundation, version 2.
8
#
9
# This program is distributed in the hope that it will be useful, but WITHOUT
10
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12
# details.
13
#
14
# You should have received a copy of the GNU General Public License along with
15
# this program; if not, write to the Free Software Foundation, Inc., 51
16
# Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17
#
18
# Copyright 2018-2025 by it's authors.
19
# Some rights reserved, see README and LICENSE.
20
21
import codecs
22
import csv
23
24
from bika.lims import deprecated
25
from senaite.core.exportimport.instruments.logger import Logger
26
from six.moves import StringIO
27
from zope.deprecation import deprecate
28
29
30
class InstrumentResultsFileParser(Logger):
31
    """Base parser
32
    """
33
34
    def __init__(self, infile, mimetype):
35
        Logger.__init__(self)
36
        self._infile = infile
37
        self._header = {}
38
        self._rawresults = {}
39
        self._mimetype = mimetype
40
        self._numline = 0
41
42
    def getInputFile(self):
43
        """ Returns the results input file
44
        """
45
        return self._infile
46
47
    def parse(self):
48
        """Parses the input file and populates the rawresults dict.
49
50
        See getRawResults() method for more info about rawresults format
51
52
        Returns True if the file has been parsed successfully.
53
54
        Is highly recommended to use _addRawResult method when adding raw
55
        results.
56
57
        IMPORTANT: To be implemented by child classes
58
        """
59
        raise NotImplementedError
60
61
    @deprecate("Please use getRawResults directly")
62
    def resume(self):
63
        """Resumes the parse process
64
65
        Called by the Results Importer after parse() call
66
        """
67
        if len(self.getRawResults()) == 0:
68
            self.warn("No results found")
69
            return False
70
        return True
71
72
    def getAttachmentFileType(self):
73
        """ Returns the file type name that will be used when creating the
74
            AttachmentType used by the importer for saving the results file as
75
            an attachment in each Analysis matched.
76
            By default returns self.getFileMimeType()
77
        """
78
        return self.getFileMimeType()
79
80
    def getFileMimeType(self):
81
        """ Returns the results file type
82
        """
83
        return self._mimetype
84
85
    def getHeader(self):
86
        """ Returns a dictionary with custom key, values
87
        """
88
        return self._header
89
90
    def _addRawResult(self, resid, values={}, override=False):
91
        """ Adds a set of raw results for an object with id=resid
92
            resid is usually an Analysis Request ID or Worksheet's Reference
93
            Analysis ID. The values are a dictionary in which the keys are
94
            analysis service keywords and the values, another dictionary with
95
            the key,value results.
96
            The column 'DefaultResult' must be provided, because is used to map
97
            to the column from which the default result must be retrieved.
98
99
            Example:
100
            resid  = 'DU13162-001-R1'
101
            values = {
102
                'D2': {'DefaultResult': 'Final Conc',
103
                       'Remarks':       '',
104
                       'Resp':          '5816',
105
                       'ISTD Resp':     '274638',
106
                       'Resp Ratio':    '0.0212',
107
                       'Final Conc':    '0.9145',
108
                       'Exp Conc':      '1.9531',
109
                       'Accuracy':      '98.19' },
110
111
                'D3': {'DefaultResult': 'Final Conc',
112
                       'Remarks':       '',
113
                       'Resp':          '5816',
114
                       'ISTD Resp':     '274638',
115
                       'Resp Ratio':    '0.0212',
116
                       'Final Conc':    '0.9145',
117
                       'Exp Conc':      '1.9531',
118
                       'Accuracy':      '98.19' }
119
                }
120
        """
121
        if override or resid not in self._rawresults.keys():
122
            self._rawresults[resid] = [values]
123
        else:
124
            self._rawresults[resid].append(values)
125
126
    def _emptyRawResults(self):
127
        """Remove all grabbed raw results
128
        """
129
        self._rawresults = {}
130
131
    def getObjectsTotalCount(self):
132
        """The total number of objects (ARs, ReferenceSamples, etc.) parsed
133
        """
134
        return len(self.getRawResults())
135
136
    def getResultsTotalCount(self):
137
        """The total number of analysis results parsed
138
        """
139
        count = 0
140
        for val in self.getRawResults().values():
141
            count += len(val)
142
        return count
143
144
    def getAnalysesTotalCount(self):
145
        """ The total number of different analyses parsed
146
        """
147
        return len(self.getAnalysisKeywords())
148
149
    def getAnalysisKeywords(self):
150
        """Return found analysis service keywords
151
        """
152
        analyses = []
153
        for rows in self.getRawResults().values():
154
            for row in rows:
155
                analyses = list(set(analyses + row.keys()))
156
        return analyses
157
158
    def getRawResults(self):
159
        """Returns a dictionary containing the parsed results data
160
161
        Each dict key is the results row ID (usually AR ID or Worksheet's
162
        Reference Sample ID). Each item is another dictionary, in which the key
163
        is a the AS Keyword.
164
165
        Inside the AS dict, the column 'DefaultResult' must be provided, that
166
        maps to the column from which the default result must be retrieved.
167
168
        If 'Remarks' column is found, it value will be set in Analysis Remarks
169
        field when using the deault Importer.
170
171
        Example:
172
173
            raw_results['DU13162-001-R1'] = [{
174
175
                'D2': {'DefaultResult': 'Final Conc',
176
                        'Remarks':       '',
177
                        'Resp':          '5816',
178
                        'ISTD Resp':     '274638',
179
                        'Resp Ratio':    '0.0212',
180
                        'Final Conc':    '0.9145',
181
                        'Exp Conc':      '1.9531',
182
                        'Accuracy':      '98.19' },
183
184
                'D3': {'DefaultResult': 'Final Conc',
185
                        'Remarks':       '',
186
                        'Resp':          '5816',
187
                        'ISTD Resp':     '274638',
188
                        'Resp Ratio':    '0.0212',
189
                        'Final Conc':    '0.9145',
190
                        'Exp Conc':      '1.9531',
191
                        'Accuracy':      '98.19' }]
192
193
            in which:
194
            - 'DU13162-001-R1' is the Analysis Request ID,
195
            - 'D2' column is an analysis service keyword,
196
            - 'DefaultResult' column maps to the column with default result
197
            - 'Remarks' column with Remarks results for that Analysis
198
            - The rest of the dict columns are results (or additional info)
199
              that can be set to the analysis if needed (the default importer
200
              will look for them if the analysis has Interim fields).
201
202
            In the case of reference samples:
203
            Control/Blank:
204
            raw_results['QC13-0001-0002'] = {...}
205
206
            Duplicate of sample DU13162-009 (from AR DU13162-009-R1)
207
            raw_results['QC-DU13162-009-002'] = {...}
208
209
        """
210
        return self._rawresults
211
212
213
class InstrumentCSVResultsFileParser(InstrumentResultsFileParser):
214
    """Parser for CSV files
215
    """
216
217
    def __init__(self, infile, encoding=None):
218
        InstrumentResultsFileParser.__init__(self, infile, 'CSV')
219
        # Some Instruments can generate files with different encodings, so we
220
        # may need this parameter
221
        self._encoding = encoding
222
223
    def parse(self):
224
        infile = self.getInputFile()
225
        self.log("Parsing file ${file_name}",
226
                 mapping={"file_name": infile.filename})
227
        jump = 0
228
        # We test in import functions if the file was uploaded
229
        try:
230
            if self._encoding:
231
                f = codecs.open(infile.name, "r", encoding=self._encoding)
232
            else:
233
                f = open(infile.name, "rU")
234
        except AttributeError:
235
            f = infile
236
        except IOError:
237
            f = infile.file
238
239
        for line in f.readlines():
240
            self._numline += 1
241
            if jump == -1:
242
                # Something went wrong. Finish
243
                self.err("File processing finished due to critical errors")
244
                return False
245
            if jump > 0:
246
                # Jump some lines
247
                jump -= 1
248
                continue
249
250
            if not line or not line.strip():
251
                continue
252
253
            line = line.strip()
254
            jump = 0
255
            if line:
256
                jump = self._parseline(line)
257
258
        self.log(
259
            "End of file reached successfully: ${total_objects} objects, "
260
            "${total_analyses} analyses, ${total_results} results",
261
            mapping={"total_objects": self.getObjectsTotalCount(),
262
                     "total_analyses": self.getAnalysesTotalCount(),
263
                     "total_results": self.getResultsTotalCount()}
264
        )
265
        return True
266
267
    def splitline(self, line, delimiter=","):
268
        """Parse a single CSV line
269
        """
270
        # use CSV library to correctly split quoted values
271
        fb = StringIO(line)
272
        reader = csv.reader(fb, delimiter=delimiter)
273
        parsed_line = next(reader)
274
        return [token.strip() for token in parsed_line]
275
276
    @deprecated(comment="Please use splitline instead",
277
                replacement="splitline")
278
    def splitLine(self, line):
279
        """Obsolete: use self.splitline instead
280
        """
281
        return self.splitline(line)
282
283
    def _parseline(self, line):
284
        """ Parses a line from the input CSV file and populates rawresults
285
            (look at getRawResults comment)
286
            returns -1 if critical error found and parser must end
287
            returns the number of lines to be jumped in next read. If 0, the
288
            parser reads the next line as usual
289
        """
290
        raise NotImplementedError
291
292
293
class InstrumentTXTResultsFileParser(InstrumentResultsFileParser):
294
    """Parser for TXT files
295
    """
296
297
    def __init__(self, infile, separator, encoding=None,):
298
        InstrumentResultsFileParser.__init__(self, infile, 'TXT')
299
        # Some Instruments can generate files with different encodings, so we
300
        # may need this parameter
301
        self._separator = separator
302
        self._encoding = encoding
303
304
    def parse(self):
305
        infile = self.getInputFile()
306
        self.log("Parsing file ${file_name}", mapping={"file_name": infile.filename})
307
        jump = 0
308
        lines = self.read_file(infile)
309
        for line in lines:
310
            self._numline += 1
311
            if jump == -1:
312
                # Something went wrong. Finish
313
                self.err("File processing finished due to critical errors")
314
                return False
315
            if jump > 0:
316
                # Jump some lines
317
                jump -= 1
318
                continue
319
320
            if not line:
321
                continue
322
323
            jump = 0
324
            if line:
325
                jump = self._parseline(line)
326
327
        self.log(
328
            "End of file reached successfully: ${total_objects} objects, "
329
            "${total_analyses} analyses, ${total_results} results",
330
            mapping={"total_objects": self.getObjectsTotalCount(),
331
                     "total_analyses": self.getAnalysesTotalCount(),
332
                     "total_results": self.getResultsTotalCount()}
333
        )
334
        return True
335
336
    def read_file(self, infile):
337
        """Given an input file read its contents, strip whitespace from the
338
         beginning and end of each line and return a list of the preprocessed
339
         lines read.
340
341
        :param infile: file that contains the data to be read
342
        :return: list of the read lines with stripped whitespace
343
        """
344
        try:
345
            encoding = self._encoding if self._encoding else None
346
            mode = 'r' if self._encoding else 'rU'
347
            with codecs.open(infile.name, mode, encoding=encoding) as f:
348
                lines = f.readlines()
349
        except AttributeError:
350
            lines = infile.readlines()
351
        lines = [line.strip() for line in lines]
352
        return lines
353
354
    def splitLine(self, line):
355
        sline = line.split(self._separator)
356
        return [token.strip() for token in sline]
357
358
    def _parseline(self, line):
359
        """ Parses a line from the input CSV file and populates rawresults
360
            (look at getRawResults comment)
361
            returns -1 if critical error found and parser must end
362
            returns the number of lines to be jumped in next read. If 0, the
363
            parser reads the next line as usual
364
        """
365
        raise NotImplementedError
366