senaite.core.exportimport.instruments.parser.InstrumentCSVResultsFileParser.splitline() - Code Metrics - Inspection of "Support date and datetime on result entry (#2690)" - senaite/senaite.core - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — 2.x ( a6588e...513289 )

by Ramon

created 2025-03-19 13:23 UTC

InstrumentCSVResultsFileParser.splitline() A

↳ Parent: senaite.core.exportimport.instruments.parser

Complexity

Conditions

Size

Total Lines	8
Code Lines	5

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	5
dl	0
loc	8
rs	10
c	0
b	0
f	0
cc	1
nop	3

# -*- coding: utf-8 -*-
#
# This file is part of SENAITE.CORE.
#
# SENAITE.CORE is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 51
# Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
# Copyright 2018-2025 by it's authors.
# Some rights reserved, see README and LICENSE.

import codecs
import csv

from bika.lims import deprecated
from senaite.core.exportimport.instruments.logger import Logger
from six.moves import StringIO
from zope.deprecation import deprecate


class InstrumentResultsFileParser(Logger):
    """Base parser
    """

    def __init__(self, infile, mimetype):
        Logger.__init__(self)
        self._infile = infile
        self._header = {}
        self._rawresults = {}
        self._mimetype = mimetype
        self._numline = 0

    def getInputFile(self):
        """ Returns the results input file
        """
        return self._infile

    def parse(self):
        """Parses the input file and populates the rawresults dict.

        See getRawResults() method for more info about rawresults format

        Returns True if the file has been parsed successfully.

        Is highly recommended to use _addRawResult method when adding raw
        results.

        IMPORTANT: To be implemented by child classes
        """
        raise NotImplementedError

    @deprecate("Please use getRawResults directly")
    def resume(self):
        """Resumes the parse process

        Called by the Results Importer after parse() call
        """
        if len(self.getRawResults()) == 0:
            self.warn("No results found")
            return False
        return True

    def getAttachmentFileType(self):
        """ Returns the file type name that will be used when creating the
            AttachmentType used by the importer for saving the results file as
            an attachment in each Analysis matched.
            By default returns self.getFileMimeType()
        """
        return self.getFileMimeType()

    def getFileMimeType(self):
        """ Returns the results file type
        """
        return self._mimetype

    def getHeader(self):
        """ Returns a dictionary with custom key, values
        """
        return self._header

    def _addRawResult(self, resid, values={}, override=False):
        """ Adds a set of raw results for an object with id=resid
            resid is usually an Analysis Request ID or Worksheet's Reference
            Analysis ID. The values are a dictionary in which the keys are
            analysis service keywords and the values, another dictionary with
            the key,value results.
            The column 'DefaultResult' must be provided, because is used to map
            to the column from which the default result must be retrieved.

            Example:
            resid  = 'DU13162-001-R1'
            values = {
                'D2': {'DefaultResult': 'Final Conc',
                       'Remarks':       '',
                       'Resp':          '5816',
                       'ISTD Resp':     '274638',
                       'Resp Ratio':    '0.0212',
                       'Final Conc':    '0.9145',
                       'Exp Conc':      '1.9531',
                       'Accuracy':      '98.19' },

                'D3': {'DefaultResult': 'Final Conc',
                       'Remarks':       '',
                       'Resp':          '5816',
                       'ISTD Resp':     '274638',
                       'Resp Ratio':    '0.0212',
                       'Final Conc':    '0.9145',
                       'Exp Conc':      '1.9531',
                       'Accuracy':      '98.19' }
                }
        """
        if override or resid not in self._rawresults.keys():
            self._rawresults[resid] = [values]
        else:
            self._rawresults[resid].append(values)

    def _emptyRawResults(self):
        """Remove all grabbed raw results
        """
        self._rawresults = {}

    def getObjectsTotalCount(self):
        """The total number of objects (ARs, ReferenceSamples, etc.) parsed
        """
        return len(self.getRawResults())

    def getResultsTotalCount(self):
        """The total number of analysis results parsed
        """
        count = 0
        for val in self.getRawResults().values():
            count += len(val)
        return count

    def getAnalysesTotalCount(self):
        """ The total number of different analyses parsed
        """
        return len(self.getAnalysisKeywords())

    def getAnalysisKeywords(self):
        """Return found analysis service keywords
        """
        analyses = []
        for rows in self.getRawResults().values():
            for row in rows:
                analyses = list(set(analyses + row.keys()))
        return analyses

    def getRawResults(self):
        """Returns a dictionary containing the parsed results data

        Each dict key is the results row ID (usually AR ID or Worksheet's
        Reference Sample ID). Each item is another dictionary, in which the key
        is a the AS Keyword.

        Inside the AS dict, the column 'DefaultResult' must be provided, that
        maps to the column from which the default result must be retrieved.

        If 'Remarks' column is found, it value will be set in Analysis Remarks
        field when using the deault Importer.

        Example:

            raw_results['DU13162-001-R1'] = [{

                'D2': {'DefaultResult': 'Final Conc',
                        'Remarks':       '',
                        'Resp':          '5816',
                        'ISTD Resp':     '274638',
                        'Resp Ratio':    '0.0212',
                        'Final Conc':    '0.9145',
                        'Exp Conc':      '1.9531',
                        'Accuracy':      '98.19' },

                'D3': {'DefaultResult': 'Final Conc',
                        'Remarks':       '',
                        'Resp':          '5816',
                        'ISTD Resp':     '274638',
                        'Resp Ratio':    '0.0212',
                        'Final Conc':    '0.9145',
                        'Exp Conc':      '1.9531',
                        'Accuracy':      '98.19' }]

            in which:
            - 'DU13162-001-R1' is the Analysis Request ID,
            - 'D2' column is an analysis service keyword,
            - 'DefaultResult' column maps to the column with default result
            - 'Remarks' column with Remarks results for that Analysis
            - The rest of the dict columns are results (or additional info)
              that can be set to the analysis if needed (the default importer
              will look for them if the analysis has Interim fields).

            In the case of reference samples:
            Control/Blank:
            raw_results['QC13-0001-0002'] = {...}

            Duplicate of sample DU13162-009 (from AR DU13162-009-R1)
            raw_results['QC-DU13162-009-002'] = {...}

        """
        return self._rawresults


class InstrumentCSVResultsFileParser(InstrumentResultsFileParser):
    """Parser for CSV files
    """

    def __init__(self, infile, encoding=None):
        InstrumentResultsFileParser.__init__(self, infile, 'CSV')
        # Some Instruments can generate files with different encodings, so we
        # may need this parameter
        self._encoding = encoding

    def parse(self):
        infile = self.getInputFile()
        self.log("Parsing file ${file_name}",
                 mapping={"file_name": infile.filename})
        jump = 0
        # We test in import functions if the file was uploaded
        try:
            if self._encoding:
                f = codecs.open(infile.name, "r", encoding=self._encoding)
            else:
                f = open(infile.name, "rU")
        except AttributeError:
            f = infile
        except IOError:
            f = infile.file

        for line in f.readlines():
            self._numline += 1
            if jump == -1:
                # Something went wrong. Finish
                self.err("File processing finished due to critical errors")
                return False
            if jump > 0:
                # Jump some lines
                jump -= 1
                continue

            if not line or not line.strip():
                continue

            line = line.strip()
            jump = 0
            if line:
                jump = self._parseline(line)

        self.log(
            "End of file reached successfully: ${total_objects} objects, "
            "${total_analyses} analyses, ${total_results} results",
            mapping={"total_objects": self.getObjectsTotalCount(),
                     "total_analyses": self.getAnalysesTotalCount(),
                     "total_results": self.getResultsTotalCount()}
        )
        return True

    def splitline(self, line, delimiter=","):
        """Parse a single CSV line
        """
        # use CSV library to correctly split quoted values
        fb = StringIO(line)
        reader = csv.reader(fb, delimiter=delimiter)
        parsed_line = next(reader)
        return [token.strip() for token in parsed_line]

    @deprecated(comment="Please use splitline instead",
                replacement="splitline")
    def splitLine(self, line):
        """Obsolete: use self.splitline instead
        """
        return self.splitline(line)

    def _parseline(self, line):
        """ Parses a line from the input CSV file and populates rawresults
            (look at getRawResults comment)
            returns -1 if critical error found and parser must end
            returns the number of lines to be jumped in next read. If 0, the
            parser reads the next line as usual
        """
        raise NotImplementedError


class InstrumentTXTResultsFileParser(InstrumentResultsFileParser):
    """Parser for TXT files
    """

    def __init__(self, infile, separator, encoding=None,):
        InstrumentResultsFileParser.__init__(self, infile, 'TXT')
        # Some Instruments can generate files with different encodings, so we
        # may need this parameter
        self._separator = separator
        self._encoding = encoding

    def parse(self):
        infile = self.getInputFile()
        self.log("Parsing file ${file_name}", mapping={"file_name": infile.filename})
        jump = 0
        lines = self.read_file(infile)
        for line in lines:
            self._numline += 1
            if jump == -1:
                # Something went wrong. Finish
                self.err("File processing finished due to critical errors")
                return False
            if jump > 0:
                # Jump some lines
                jump -= 1
                continue

            if not line:
                continue

            jump = 0
            if line:
                jump = self._parseline(line)

        self.log(
            "End of file reached successfully: ${total_objects} objects, "
            "${total_analyses} analyses, ${total_results} results",
            mapping={"total_objects": self.getObjectsTotalCount(),
                     "total_analyses": self.getAnalysesTotalCount(),
                     "total_results": self.getResultsTotalCount()}
        )
        return True

    def read_file(self, infile):
        """Given an input file read its contents, strip whitespace from the
         beginning and end of each line and return a list of the preprocessed
         lines read.

        :param infile: file that contains the data to be read
        :return: list of the read lines with stripped whitespace
        """
        try:
            encoding = self._encoding if self._encoding else None
            mode = 'r' if self._encoding else 'rU'
            with codecs.open(infile.name, mode, encoding=encoding) as f:
                lines = f.readlines()
        except AttributeError:
            lines = infile.readlines()
        lines = [line.strip() for line in lines]
        return lines

    def splitLine(self, line):
        sline = line.split(self._separator)
        return [token.strip() for token in sline]

    def _parseline(self, line):
        """ Parses a line from the input CSV file and populates rawresults
            (look at getRawResults comment)
            returns -1 if critical error found and parser must end
            returns the number of lines to be jumped in next read. If 0, the
            parser reads the next line as usual
        """
        raise NotImplementedError


1			# -- coding: utf-8 --
2			#
3			# This file is part of SENAITE.CORE.
4			#
5			# SENAITE.CORE is free software: you can redistribute it and/or modify it under
6			# the terms of the GNU General Public License as published by the Free Software
7			# Foundation, version 2.
8			#
9			# This program is distributed in the hope that it will be useful, but WITHOUT
10			# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11			# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12			# details.
13			#
14			# You should have received a copy of the GNU General Public License along with
15			# this program; if not, write to the Free Software Foundation, Inc., 51
16			# Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17			#
18			# Copyright 2018-2025 by it's authors.
19			# Some rights reserved, see README and LICENSE.
20
21			import codecs
22			import csv
23
24			from bika.lims import deprecated
25			from senaite.core.exportimport.instruments.logger import Logger
26			from six.moves import StringIO
27			from zope.deprecation import deprecate
28
29
30			class InstrumentResultsFileParser(Logger):
31			"""Base parser
32			"""
33
34			def __init__(self, infile, mimetype):
35			Logger.__init__(self)
36			self._infile = infile
37			self._header = {}
38			self._rawresults = {}
39			self._mimetype = mimetype
40			self._numline = 0
41
42			def getInputFile(self):
43			""" Returns the results input file
44			"""
45			return self._infile
46
47			def parse(self):
48			"""Parses the input file and populates the rawresults dict.
49
50			See getRawResults() method for more info about rawresults format
51
52			Returns True if the file has been parsed successfully.
53
54			Is highly recommended to use _addRawResult method when adding raw
55			results.
56
57			IMPORTANT: To be implemented by child classes
58			"""
59			raise NotImplementedError
60
61			@deprecate("Please use getRawResults directly")
62			def resume(self):
63			"""Resumes the parse process
64
65			Called by the Results Importer after parse() call
66			"""
67			if len(self.getRawResults()) == 0:
68			self.warn("No results found")
69			return False
70			return True
71
72			def getAttachmentFileType(self):
73			""" Returns the file type name that will be used when creating the
74			AttachmentType used by the importer for saving the results file as
75			an attachment in each Analysis matched.
76			By default returns self.getFileMimeType()
77			"""
78			return self.getFileMimeType()
79
80			def getFileMimeType(self):
81			""" Returns the results file type
82			"""
83			return self._mimetype
84
85			def getHeader(self):
86			""" Returns a dictionary with custom key, values
87			"""
88			return self._header
89
90			def _addRawResult(self, resid, values={}, override=False):
91			""" Adds a set of raw results for an object with id=resid
92			resid is usually an Analysis Request ID or Worksheet's Reference
93			Analysis ID. The values are a dictionary in which the keys are
94			analysis service keywords and the values, another dictionary with
95			the key,value results.
96			The column 'DefaultResult' must be provided, because is used to map
97			to the column from which the default result must be retrieved.
98
99			Example:
100			resid = 'DU13162-001-R1'
101			values = {
102			'D2': {'DefaultResult': 'Final Conc',
103			'Remarks': '',
104			'Resp': '5816',
105			'ISTD Resp': '274638',
106			'Resp Ratio': '0.0212',
107			'Final Conc': '0.9145',
108			'Exp Conc': '1.9531',
109			'Accuracy': '98.19' },
110
111			'D3': {'DefaultResult': 'Final Conc',
112			'Remarks': '',
113			'Resp': '5816',
114			'ISTD Resp': '274638',
115			'Resp Ratio': '0.0212',
116			'Final Conc': '0.9145',
117			'Exp Conc': '1.9531',
118			'Accuracy': '98.19' }
119			}
120			"""
121			if override or resid not in self._rawresults.keys():
122			self._rawresults[resid] = [values]
123			else:
124			self._rawresults[resid].append(values)
125
126			def _emptyRawResults(self):
127			"""Remove all grabbed raw results
128			"""
129			self._rawresults = {}
130
131			def getObjectsTotalCount(self):
132			"""The total number of objects (ARs, ReferenceSamples, etc.) parsed
133			"""
134			return len(self.getRawResults())
135
136			def getResultsTotalCount(self):
137			"""The total number of analysis results parsed
138			"""
139			count = 0
140			for val in self.getRawResults().values():
141			count += len(val)
142			return count
143
144			def getAnalysesTotalCount(self):
145			""" The total number of different analyses parsed
146			"""
147			return len(self.getAnalysisKeywords())
148
149			def getAnalysisKeywords(self):
150			"""Return found analysis service keywords
151			"""
152			analyses = []
153			for rows in self.getRawResults().values():
154			for row in rows:
155			analyses = list(set(analyses + row.keys()))
156			return analyses
157
158			def getRawResults(self):
159			"""Returns a dictionary containing the parsed results data
160
161			Each dict key is the results row ID (usually AR ID or Worksheet's
162			Reference Sample ID). Each item is another dictionary, in which the key
163			is a the AS Keyword.
164
165			Inside the AS dict, the column 'DefaultResult' must be provided, that
166			maps to the column from which the default result must be retrieved.
167
168			If 'Remarks' column is found, it value will be set in Analysis Remarks
169			field when using the deault Importer.
170
171			Example:
172
173			raw_results['DU13162-001-R1'] = [{
174
175			'D2': {'DefaultResult': 'Final Conc',
176			'Remarks': '',
177			'Resp': '5816',
178			'ISTD Resp': '274638',
179			'Resp Ratio': '0.0212',
180			'Final Conc': '0.9145',
181			'Exp Conc': '1.9531',
182			'Accuracy': '98.19' },
183
184			'D3': {'DefaultResult': 'Final Conc',
185			'Remarks': '',
186			'Resp': '5816',
187			'ISTD Resp': '274638',
188			'Resp Ratio': '0.0212',
189			'Final Conc': '0.9145',
190			'Exp Conc': '1.9531',
191			'Accuracy': '98.19' }]
192
193			in which:
194			- 'DU13162-001-R1' is the Analysis Request ID,
195			- 'D2' column is an analysis service keyword,
196			- 'DefaultResult' column maps to the column with default result
197			- 'Remarks' column with Remarks results for that Analysis
198			- The rest of the dict columns are results (or additional info)
199			that can be set to the analysis if needed (the default importer
200			will look for them if the analysis has Interim fields).
201
202			In the case of reference samples:
203			Control/Blank:
204			raw_results['QC13-0001-0002'] = {...}
205
206			Duplicate of sample DU13162-009 (from AR DU13162-009-R1)
207			raw_results['QC-DU13162-009-002'] = {...}
208
209			"""
210			return self._rawresults
211
212
213			class InstrumentCSVResultsFileParser(InstrumentResultsFileParser):
214			"""Parser for CSV files
215			"""
216
217			def __init__(self, infile, encoding=None):
218			InstrumentResultsFileParser.__init__(self, infile, 'CSV')
219			# Some Instruments can generate files with different encodings, so we
220			# may need this parameter
221			self._encoding = encoding
222
223			def parse(self):
224			infile = self.getInputFile()
225			self.log("Parsing file ${file_name}",
226			mapping={"file_name": infile.filename})
227			jump = 0
228			# We test in import functions if the file was uploaded
229			try:
230			if self._encoding:
231			f = codecs.open(infile.name, "r", encoding=self._encoding)
232			else:
233			f = open(infile.name, "rU")
234			except AttributeError:
235			f = infile
236			except IOError:
237			f = infile.file
238
239			for line in f.readlines():
240			self._numline += 1
241			if jump == -1:
242			# Something went wrong. Finish
243			self.err("File processing finished due to critical errors")
244			return False
245			if jump > 0:
246			# Jump some lines
247			jump -= 1
248			continue
249
250			if not line or not line.strip():
251			continue
252
253			line = line.strip()
254			jump = 0
255			if line:
256			jump = self._parseline(line)
257
258			self.log(
259			"End of file reached successfully: ${total_objects} objects, "
260			"${total_analyses} analyses, ${total_results} results",
261			mapping={"total_objects": self.getObjectsTotalCount(),
262			"total_analyses": self.getAnalysesTotalCount(),
263			"total_results": self.getResultsTotalCount()}
264			)
265			return True
266
267			def splitline(self, line, delimiter=","):
268			"""Parse a single CSV line
269			"""
270			# use CSV library to correctly split quoted values
271			fb = StringIO(line)
272			reader = csv.reader(fb, delimiter=delimiter)
273			parsed_line = next(reader)
274			return [token.strip() for token in parsed_line]
275
276			@deprecated(comment="Please use splitline instead",
277			replacement="splitline")
278			def splitLine(self, line):
279			"""Obsolete: use self.splitline instead
280			"""
281			return self.splitline(line)
282
283			def _parseline(self, line):
284			""" Parses a line from the input CSV file and populates rawresults
285			(look at getRawResults comment)
286			returns -1 if critical error found and parser must end
287			returns the number of lines to be jumped in next read. If 0, the
288			parser reads the next line as usual
289			"""
290			raise NotImplementedError
291
292
293			class InstrumentTXTResultsFileParser(InstrumentResultsFileParser):
294			"""Parser for TXT files
295			"""
296
297			def __init__(self, infile, separator, encoding=None,):
298			InstrumentResultsFileParser.__init__(self, infile, 'TXT')
299			# Some Instruments can generate files with different encodings, so we
300			# may need this parameter
301			self._separator = separator
302			self._encoding = encoding
303
304			def parse(self):
305			infile = self.getInputFile()
306			self.log("Parsing file ${file_name}", mapping={"file_name": infile.filename})
307			jump = 0
308			lines = self.read_file(infile)
309			for line in lines:
310			self._numline += 1
311			if jump == -1:
312			# Something went wrong. Finish
313			self.err("File processing finished due to critical errors")
314			return False
315			if jump > 0:
316			# Jump some lines
317			jump -= 1
318			continue
319
320			if not line:
321			continue
322
323			jump = 0
324			if line:
325			jump = self._parseline(line)
326
327			self.log(
328			"End of file reached successfully: ${total_objects} objects, "
329			"${total_analyses} analyses, ${total_results} results",
330			mapping={"total_objects": self.getObjectsTotalCount(),
331			"total_analyses": self.getAnalysesTotalCount(),
332			"total_results": self.getResultsTotalCount()}
333			)
334			return True
335
336			def read_file(self, infile):
337			"""Given an input file read its contents, strip whitespace from the
338			beginning and end of each line and return a list of the preprocessed
339			lines read.
340
341			:param infile: file that contains the data to be read
342			:return: list of the read lines with stripped whitespace
343			"""
344			try:
345			encoding = self._encoding if self._encoding else None
346			mode = 'r' if self._encoding else 'rU'
347			with codecs.open(infile.name, mode, encoding=encoding) as f:
348			lines = f.readlines()
349			except AttributeError:
350			lines = infile.readlines()
351			lines = [line.strip() for line in lines]
352			return lines
353
354			def splitLine(self, line):
355			sline = line.split(self._separator)
356			return [token.strip() for token in sline]
357
358			def _parseline(self, line):
359			""" Parses a line from the input CSV file and populates rawresults
360			(look at getRawResults comment)
361			returns -1 if critical error found and parser must end
362			returns the number of lines to be jumped in next read. If 0, the
363			parser reads the next line as usual
364			"""
365			raise NotImplementedError
366

senaite / senaite.core

Push — 2.x ( a6588e...513289 )

InstrumentCSVResultsFileParser.splitline() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like