Completed
Pull Request — master (#448)
by
unknown
02:51
created

ResultsProcessor.__init__()   A

Complexity

Conditions 1

Size

Total Lines 3

Duplication

Lines 0
Ratio 0 %
Metric Value
dl 0
loc 3
rs 10
cc 1
1
import _mssql
2
from tempfile import NamedTemporaryFile
3
import csv
4
import os
5
import sys
6
7
__all__ = [
8
    'ResultsProcessor'
9
]
10
11
12
# noinspection PyUnusedLocal
13
# noinspection PyMethodMayBeStaticInspection
14
class ResultsProcessor(object):
15
    """
16
    Utility for processing action response, e.g. changing returned results or passing data via disk
17
    """
18
19
    NO_DATA = 2
20
21
    def __init__(self, config, logger):
22
        self.config = config
23
        self.logger = logger
24
25
    def execute_scalar(self, response, cursor):
26
        """
27
        Returns the scalar response.
28
        """
29
        if not response:
30
            sys.exit(self.NO_DATA)
31
        return response
32
33
    def execute_row(self, response, cursor):
34
        """
35
        Returns a mapping of column name to value for a row.
36
        """
37
        row = self._filter_numbered_columns(response or {})
38
        if not row:
39
            sys.exit(self.NO_DATA)
40
        return row
41
42
    def execute_insert(self, response, cursor):
43
        """
44
        Returns identity value of the inserted row.
45
        """
46
        return cursor.identity
47
48
    def execute_non_query(self, response, cursor):
49
        """
50
        Returns the number of rows affected by the non-query.
51
        """
52
        if cursor.rows_affected == 0:
53
            sys.exit(self.NO_DATA)
54
        return cursor.rows_affected
55
56
    def execute_query(self, response, cursor):
57
        """
58
        Writes results to CSV for downstream processing. Each result set is written to its own file.
59
        Returns a list of all file names in order of result set for use by downstream actions.
60
61
        Checks `output_csv` section in `config.yaml` to determine where to write CSV files. You can
62
        specify the output `directory` as well as the file `prefix` and `suffix`.
63
64
        Tries writing to `$TMPDIR`, `$TEMP`, and `$TMP` in order before falling back to platform-
65
        specific locations. See https://docs.python.org/2/library/tempfile.html#tempfile.tempdir.
66
        """
67
        output_files = []
68
        while True:
69
            with self._get_output_file() as csv_file:
70
                # Since pythonrunner runs as root, only root can read and write CSV file by default
71
                # Let's chmod it so downstream processes which run as stanley can read and write
72
                os.chmod(csv_file.name, 0o666)  # race condition w/ open() OK since increasing perms
73
                try:
74
                    # pylint: disable=no-member
75
                    # Grab the first row so we can read the headers and write them to the CSV
76
                    first_row = self._filter_numbered_columns(
77
                        next(cursor.get_iterator(_mssql.ROW_FORMAT_DICT)))
78
                except StopIteration:
79
                    # the last result set will always be empty, so remove the file created for it
80
                    os.unlink(csv_file.name)
81
                    # we're done, no more result sets
82
                    break
83
                output_files.append(csv_file.name)
84
                writer = csv.DictWriter(csv_file, fieldnames=first_row.keys())
85
                writer.writeheader()
86
                writer.writerow(first_row)
87
                for row in cursor:
88
                    writer.writerow(self._filter_numbered_columns(row))
89
        if not output_files:
90
            self.logger.info("Query returned no results, failing")
91
            sys.exit(self.NO_DATA)
92
        return {"output_files": output_files}
93
94
    def _filter_numbered_columns(self, row):
95
        """only return columns by name, not column number"""
96
        return {k: v for k, v in row.iteritems() if not isinstance(k, (int, long))}
0 ignored issues
show
Comprehensibility Best Practice introduced by
Undefined variable 'long'
Loading history...
97
98
    def _get_output_file(self, prefix='mssql-query.', suffix='.csv'):
99
        output_config = self.config.get('output_csv', {})
100
        return NamedTemporaryFile(
101
            dir=output_config.get('directory', None),
102
            prefix=output_config.get('output_prefix', prefix),
103
            suffix=output_config.get('output_suffix', suffix),
104
            delete=False)
105