Test Failed
Pull Request — master (#593)
by
unknown
13:45
created

EvaluationPlaneHandler._post_impl()   D

Complexity

Conditions 13

Size

Total Lines 82
Code Lines 58

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 43
CRAP Score 13.3099

Importance

Changes 0
Metric Value
eloc 58
dl 0
loc 82
ccs 43
cts 49
cp 0.8776
rs 4.2
c 0
b 0
f 0
cc 13
nop 1
crap 13.3099

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like tabpy.tabpy_server.handlers.evaluation_plane_handler.EvaluationPlaneHandler._post_impl() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1 1
import pandas
2 1
import pyarrow
3 1
import uuid
4 1
5 1
from tabpy.tabpy_server.handlers import BaseHandler, arrow_client
6 1
import json
7 1
import simplejson
8 1
import logging
9 1
from tabpy.tabpy_server.common.util import format_exception
10
import requests
11
from tornado import gen
12 1
from datetime import timedelta
13 1
from tabpy.tabpy_server.handlers.util import AuthErrorStates
14 1
15 1
class RestrictedTabPy:
16 1
    def __init__(self, protocol, port, logger, timeout, headers):
17 1
        self.protocol = protocol
18 1
        self.port = port
19
        self.logger = logger
20 1
        self.timeout = timeout
21
        self.headers = headers
22
23
    def query(self, name, *args, **kwargs):
24
        url = f"{self.protocol}://localhost:{self.port}/query/{name}"
25
        self.logger.log(logging.DEBUG, f"Querying {url}...")
26
        internal_data = {"data": args or kwargs}
27
        data = json.dumps(internal_data)
28
        headers = self.headers
29
        response = requests.post(
30
            url=url, data=data, headers=headers, timeout=self.timeout, verify=False
31
        )
32 1
        return response.json()
33
34
35
class EvaluationPlaneDisabledHandler(BaseHandler):
36
    """
37 1
    EvaluationPlaneDisabledHandler responds with error message when ad-hoc scripts have been disabled.
38 1
    """
39 1
40
    def initialize(self, executor, app):
41 1
        super(EvaluationPlaneDisabledHandler, self).initialize(app)
42
        self.executor = executor
43 1
44 1
    @gen.coroutine
45 1
    def post(self):
46 1
        self.error_out(404, "Ad-hoc scripts have been disabled on this analytics extension, please contact your "
47
                            "administrator.")
48
49
50 1
class EvaluationPlaneHandler(BaseHandler):
51
    """
52
    EvaluationPlaneHandler is responsible for running arbitrary python scripts.
53
    """
54
55 1
    def initialize(self, executor, app):
56 1
        super(EvaluationPlaneHandler, self).initialize(app)
57 1
        self.executor = executor
58 1
        self._error_message_timeout = (
59
            f"User defined script timed out. "
60
            f"Timeout is set to {self.eval_timeout} s."
61
        )
62
63 1
    @gen.coroutine
64
    def _post_impl(self):
65 1
        body = json.loads(self.request.body.decode("utf-8"))
66 1
        self.logger.log(logging.DEBUG, f"Processing POST request '{body}'...")
67 1
        if "script" not in body:
68 1
            self.error_out(400, "Script is empty.")
69 1
            return
70
71
        # Transforming user script into a proper function.
72 1
        user_code = body["script"]
73 1
        arguments = None
74 1
        arguments_str = ""
75 1
        if "dataPath" in body:
76 1
            # arrow flight scenario
77
            arrow_data = self.get_arrow_data(body["dataPath"])
78 1
            if arrow_data is not None:
79 1
                arguments = {"_arg1": arrow_data}
80
        elif "data" in body:
81
            # backwarding
82
            arguments = body["data"]
83
84 1
        if arguments is not None:
85 1
            if not isinstance(arguments, dict):
86 1
                self.error_out(
87 1
                    400, "Script parameters need to be provided as a dictionary."
88
                )
89 1
                return
90
            args_in = sorted(arguments.keys())
91
            n = len(arguments)
92
            if sorted('_arg'+str(i+1) for i in range(n)) == args_in:
93
                arguments_str = ", " + ", ".join(args_in)
94 1
            else:
95 1
                self.error_out(
96 1
                    400,
97 1
                    "Variables names should follow "
98
                    "the format _arg1, _arg2, _argN",
99 1
                )
100
                return
101
        function_to_evaluate = f"def _user_script(tabpy{arguments_str}):\n"
102
        for u in user_code.splitlines():
103 1
            function_to_evaluate += " " + u + "\n"
104 1
105 1
        self.logger.log(
106
            logging.INFO, f"function to evaluate={function_to_evaluate}"
107
        )
108
109
        print(f"function to evaluate={function_to_evaluate}")
110
        # arrow_data = self.get_arrow_data('input.csv')
111
        # arguments['_arg1'] = arrow_data
112
113
        print(f"arguments={arguments}")
114 1
        # print(f"input arrow data={arrow_data}")
115 1
116
        try:
117 1
            result = yield self._call_subprocess(function_to_evaluate, arguments)
118 1
            # result = yield self._call_subprocess(function_to_evaluate, arrowData)
119
        except (
120 1
            gen.TimeoutError,
121
            requests.exceptions.ConnectTimeout,
122 1
            requests.exceptions.ReadTimeout,
123 1
        ):
124 1
            self.logger.log(logging.ERROR, self._error_message_timeout)
125
            self.error_out(408, self._error_message_timeout)
126 1
            return
127 1
128 1
        if result is not None:
129 1
            if "dataPath" in body:
130 1
                # arrow flight scenario
131 1
                output_data_id = str(uuid.uuid4())
132 1
                self.upload_arrow_data(result, output_data_id, {
133 1
                    'removeOnDelete': 'True',
134
                    'linkedIDs': body["dataPath"]
135
                })
136
                result = { 'outputDataPath': output_data_id }
137
                self.logger.log(logging.WARN, f'outputDataPath={output_data_id}')
138
            else:
139
                if isinstance(result, pandas.DataFrame):
140
                    result = result.to_dict(orient='list')
141
            self.write(simplejson.dumps(result, ignore_nan=True))
142
        else:
143
            self.write("null")
144 1
        self.finish()
145
146 1
    def get_arrow_data(self, filename):
147
        scheme = "grpc+tcp"
148
        host = "localhost"
149
        port = 5005
150 1
151
        connection_args = {}
152
        client = pyarrow.flight.FlightClient(f"{scheme}://{host}:{port}", **connection_args)
153
        return arrow_client.get_flight_by_path(filename, client)
154
155
    def upload_arrow_data(self, data, filename, metadata):
156 1
        scheme = "grpc+tcp"
157
        host = "localhost"
158
        port = 5005
159
160 1
        connection_args = {}
161 1
        client = pyarrow.flight.FlightClient(f"{scheme}://{host}:{port}", **connection_args)
162
        return arrow_client.upload_data(client, data, filename, metadata)
163
164
    @gen.coroutine
165
    def post(self):
166
        if self.should_fail_with_auth_error() != AuthErrorStates.NONE:
167
            self.fail_with_auth_error()
168
            return
169
170
        self._add_CORS_header()
171
        try:
172
            yield self._post_impl()
173
        except Exception as e:
174
            import traceback
175
            print(traceback.format_exc())
176
            err_msg = f"{e.__class__.__name__} : {str(e)}"
177
            if err_msg != "KeyError : 'response'":
178
                err_msg = format_exception(e, "POST /evaluate")
179
                self.error_out(500, "Error processing script", info=err_msg)
180
            else:
181
                self.error_out(
182
                    404,
183
                    "Error processing script",
184
                    info="The endpoint you're "
185
                    "trying to query did not respond. Please make sure the "
186
                    "endpoint exists and the correct set of arguments are "
187
                    "provided.",
188
                )
189
190
    @gen.coroutine
191
    def _call_subprocess(self, function_to_evaluate, arguments):
192
        restricted_tabpy = RestrictedTabPy(
193
            self.protocol, self.port, self.logger, self.eval_timeout, self.request.headers
194
        )
195
        # Exec does not run the function, so it does not block.
196
        exec(function_to_evaluate, globals())
197
198
        # 'noqa' comments below tell flake8 to ignore undefined _user_script
199
        # name - the name is actually defined with user script being wrapped
200
        # in _user_script function (constructed as a striong) and then executed
201
        # with exec() call above.
202
        future = self.executor.submit(_user_script,  # noqa: F821
203
                                      restricted_tabpy,
204
                                      **arguments if arguments is not None else None)
205
206
        ret = yield gen.with_timeout(timedelta(seconds=self.eval_timeout), future)
207
        raise gen.Return(ret)
208