Passed
Push — develop ( 97ab08...cc204f )
by Plexxi
05:16 queued 02:34
created

FilesController.__init__()   A

Complexity

Conditions 1

Size

Total Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
c 1
b 0
f 0
dl 0
loc 3
rs 10
1
# Licensed to the StackStorm, Inc ('StackStorm') under one or more
2
# contributor license agreements.  See the NOTICE file distributed with
3
# this work for additional information regarding copyright ownership.
4
# The ASF licenses this file to You under the Apache License, Version 2.0
5
# (the "License"); you may not use this file except in compliance with
6
# the License.  You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
16
import codecs
17
import mimetypes
18
import os
19
20
import six
21
import pecan
22
from pecan import abort, expose, request, response
23
from pecan.rest import RestController
24
from wsgiref.handlers import format_date_time
25
26
from st2api.controllers.v1.packs import BasePacksController
27
from st2common.exceptions.db import StackStormDBObjectNotFoundError
28
from st2common import log as logging
29
from st2common.models.api.base import jsexpose
30
from st2common.models.api.pack import PackAPI
31
from st2common.persistence.pack import Pack
32
from st2common.content.utils import get_pack_file_abs_path
33
from st2common.rbac.types import PermissionType
34
from st2common.rbac.utils import assert_request_user_has_resource_db_permission
35
from st2common.rbac.decorators import request_user_has_resource_db_permission
36
37
http_client = six.moves.http_client
38
39
__all__ = [
40
    'FilesController',
41
    'FileController'
42
]
43
44
http_client = six.moves.http_client
45
46
LOG = logging.getLogger(__name__)
47
48
BOM_LEN = len(codecs.BOM_UTF8)
49
50
# Maximum file size in bytes. If the file on disk is larger then this value, we don't include it
51
# in the response. This prevents DDoS / exhaustion attacks.
52
MAX_FILE_SIZE = (500 * 1000)
53
54
# File paths in the file controller for which RBAC checks are not performed
55
WHITELISTED_FILE_PATHS = [
56
    'icon.png'
57
]
58
59
60
class BaseFileController(BasePacksController):
61
    model = PackAPI
62
    access = Pack
63
64
    supported_filters = {}
65
    query_options = {}
66
67
    @jsexpose()
68
    def get_all(self, **kwargs):
69
        return abort(404)
70
71
    def _get_file_size(self, file_path):
72
        return self._get_file_stats(file_path=file_path)[0]
73
74
    def _get_file_stats(self, file_path):
75
        try:
76
            file_stats = os.stat(file_path)
77
        except OSError:
78
            return (None, None)
79
80
        return file_stats.st_size, file_stats.st_mtime
81
82
    def _get_file_content(self, file_path):
83
        with codecs.open(file_path, 'rb') as fp:
84
            content = fp.read()
85
86
        return content
87
88
    def _process_file_content(self, content):
89
        """
90
        This method processes the file content and removes unicode BOM character if one is present.
91
92
        Note: If we don't do that, files view explodes with "UnicodeDecodeError: ... invalid start
93
        byte" because the json.dump doesn't know how to handle BOM character.
94
        """
95
        if content.startswith(codecs.BOM_UTF8):
96
            content = content[BOM_LEN:]
97
98
        return content
99
100
101
class FilesController(BaseFileController):
102
    """
103
    Controller which allows user to retrieve content of all the files inside the pack.
104
    """
105
106
    def __init__(self):
107
        super(FilesController, self).__init__()
108
        self.get_one_db_method = self._get_by_ref_or_id
109
110
    @request_user_has_resource_db_permission(permission_type=PermissionType.PACK_VIEW)
111
    @jsexpose(arg_types=[str], status_code=http_client.OK)
112
    def get_one(self, ref_or_id):
113
        """
114
            Outputs the content of all the files inside the pack.
115
116
            Handles requests:
117
                GET /packs/views/files/<pack_ref_or_id>
118
        """
119
        pack_db = self._get_by_ref_or_id(ref_or_id=ref_or_id)
120
121
        if not pack_db:
122
            msg = 'Pack with ref_or_id "%s" does not exist' % (ref_or_id)
123
            raise StackStormDBObjectNotFoundError(msg)
124
125
        pack_ref = pack_db.ref
126
        pack_files = pack_db.files
127
128
        result = []
129
        for file_path in pack_files:
130
            normalized_file_path = get_pack_file_abs_path(pack_ref=pack_ref, file_path=file_path)
131
            if not normalized_file_path or not os.path.isfile(normalized_file_path):
132
                # Ignore references to files which don't exist on disk
133
                continue
134
135
            file_size = self._get_file_size(file_path=normalized_file_path)
136
            if file_size is not None and file_size > MAX_FILE_SIZE:
137
                LOG.debug('Skipping file "%s" which size exceeds max file size (%s bytes)' %
138
                          (normalized_file_path, MAX_FILE_SIZE))
139
                continue
140
141
            content = self._get_file_content(file_path=normalized_file_path)
142
143
            include_file = self._include_file(file_path=file_path, content=content)
144
            if not include_file:
145
                LOG.debug('Skipping binary file "%s"' % (normalized_file_path))
146
                continue
147
148
            item = {
149
                'file_path': file_path,
150
                'content': content
151
            }
152
            result.append(item)
153
        return result
154
155
    def _include_file(self, file_path, content):
156
        """
157
        Method which returns True if the following file content should be included in the response.
158
159
        Right now we exclude any file with UTF8 BOM character in it - those are most likely binary
160
        files such as icon, etc.
161
        """
162
        if codecs.BOM_UTF8 in content[:1024]:
163
            return False
164
165
        if "\0" in content[:1024]:
166
            # Found null byte, most likely a binary file
167
            return False
168
169
        return True
170
171
172
class FileController(BaseFileController):
173
    """
174
    Controller which allows user to retrieve content of a specific file in a pack.
175
    """
176
177
    @expose()
178
    def get_one(self, ref_or_id, *file_path_components):
179
        """
180
            Outputs the content of a specific file in a pack.
181
182
            Handles requests:
183
                GET /packs/views/file/<pack_ref_or_id>/<file path>
184
        """
185
        pack_db = self._get_by_ref_or_id(ref_or_id=ref_or_id)
186
187
        if not pack_db:
188
            msg = 'Pack with ref_or_id "%s" does not exist' % (ref_or_id)
189
            raise StackStormDBObjectNotFoundError(msg)
190
191
        if not file_path_components:
192
            raise ValueError('Missing file path')
193
194
        file_path = os.path.join(*file_path_components)
195
        pack_ref = pack_db.ref
196
197
        # Note: Until list filtering is in place we don't require RBAC check for icon file
198
        if file_path not in WHITELISTED_FILE_PATHS:
199
            assert_request_user_has_resource_db_permission(request=pecan.request,
200
               resource_db=pack_db, permission_type=PermissionType.PACK_VIEW)
201
202
        normalized_file_path = get_pack_file_abs_path(pack_ref=pack_ref, file_path=file_path)
203
        if not normalized_file_path or not os.path.isfile(normalized_file_path):
204
            # Ignore references to files which don't exist on disk
205
            raise StackStormDBObjectNotFoundError('File "%s" not found' % (file_path))
206
207
        file_size, file_mtime = self._get_file_stats(file_path=normalized_file_path)
208
209
        if not self._is_file_changed(file_mtime):
210
            self._add_cache_headers(file_mtime)
211
            response.status = http_client.NOT_MODIFIED
212
            return response
213
214
        if file_size is not None and file_size > MAX_FILE_SIZE:
215
            msg = ('File %s exceeds maximum allowed file size (%s bytes)' %
216
                   (file_path, MAX_FILE_SIZE))
217
            raise ValueError(msg)
218
219
        content_type = mimetypes.guess_type(normalized_file_path)[0] or 'application/octet-stream'
220
221
        self._add_cache_headers(file_mtime)
222
        response.headers['Content-Type'] = content_type
223
        response.body = self._get_file_content(file_path=normalized_file_path)
224
        return response
225
226
    def _is_file_changed(self, file_mtime):
227
        if_none_match = request.headers.get('If-None-Match', None)
228
        if_modified_since = request.headers.get('If-Modified-Since', None)
229
230
        # For if_none_match check against what would be the ETAG value
231
        if if_none_match:
232
            return repr(file_mtime) != if_none_match
233
234
        # For if_modified_since check against file_mtime
235
        if if_modified_since:
236
            return if_modified_since != format_date_time(file_mtime)
237
238
        # Neither header is provided therefore assume file is changed.
239
        return True
240
241
    def _add_cache_headers(self, file_mtime):
242
        # Add both Last-Modified and ETag headers as per recommendations in RFC2616
243
        response.headers['Last-Modified'] = format_date_time(file_mtime)
244
        response.headers['ETag'] = repr(file_mtime)
245
246
247
class PackViewsController(RestController):
248
    files = FilesController()
249
    file = FileController()
250