Completed
Push — master ( c1ccae...04cc9f )
by Manas
01:08 queued 52s
created

parse_datetime()   C

Complexity

Conditions 7

Size

Total Lines 29

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 7
dl 0
loc 29
rs 5.5
1
#!/usr/bin/python
2
# flake8: noqa
3
# pylint: skip-file
4
"""
5
This is a Python client for the Lastline Analyst API.
6
7
The :py:class:`AnalysisClient` class implements
8
the client side of the Lastline Analyst API methods.
9
It can be imported into Python client code
10
that uses the API.
11
12
The client is available at
13
https://analysis.lastline.com/docs/llapi_client/analysis_apiclient.py.
14
15
Requirements
16
+++++++++++++++++++
17
18
The Analysis API client requires:
19
20
- python 2.6 or 2.7.
21
- The python requests module.
22
- The python pycurl module.
23
- To use the client as a python shell, the ipython module.
24
25
Required python modules can be installed
26
using tools such as easy_install or pip, e.g.::
27
28
    easy_install requests
29
    pip install ipython
30
31
Analysis Client Shell
32
+++++++++++++++++++++++
33
34
Running the analysis API client from the command line,
35
it provides a shell for manually sending requests
36
to the Lastline Analyst API. This can be used to
37
try out the API by analyzing files or URLs.
38
39
This is an IPython shell, so you can take
40
advantage of tab auto-completion and other
41
convenient features of IPython.
42
43
Once the shell is started, the current context
44
contains an 'analysis' object. This is an :py:class:`AnalysisClient`,
45
which can be used to access the functionality
46
of the lastline Analysis API.
47
48
To start the shell, invoke::
49
50
    python analysis_apiclient.py API_KEY API_TOKEN
51
52
replacing API_KEY and API_TOKEN with your API credentials.
53
54
By default, the client connects to an API instance running in the Lastline cloud
55
at https://analysis.lastline.com . To connect to a different instance, for
56
example when using a Lastline On-Premise installation, please use the
57
*--api-url* parameter to point to the URL of the On-Premise API. For example, to
58
connect to a Lastline Analyst On-Premise running at *analyst.lastline.local*,
59
use::
60
61
    python analysis_apiclient.py --api-url https://analyst.lastline.local/ API_KEY API_TOKEN
62
63
"""
64
import collections
65
import datetime
66
import sys
67
import time
68
69
try:
70
    import json
71
    import StringIO
72
    import requests
73
    if __name__ == "__main__":
74
        import optparse
75
        import IPython
76
except ImportError, e:
77
    if __name__ == "__main__":
78
        print >> sys.stderr, \
79
            "A module required for running the analysis API example \
80
            shell was not found:"
81
        print >> sys.stderr, "\t'%s'" % str(e)
82
        print >> sys.stderr, "Please install the missing module."
83
        print >> sys.stderr, "For this, you can use tools such as easy_install or pip:"
84
        print >> sys.stderr, "\t easy_install <MODULE_NAME>"
85
        print >> sys.stderr, "\t pip install <MODULE_NAME>"
86
        sys.exit(1)
87
    else:
88
        raise
89
90
try:
91
    from llapi_client import get_proxies_from_config
92
except ImportError:
93
    # Non-Lastline environment. Reading from config not support/needed.
94
    get_proxies_from_config = None
95
96
try:
97
    requests_version = requests.__version__
98
    if not requests_version.startswith('2.2'):
99
        raise Exception()
100
except Exception:
101
    requests_version = '?'
102
    print >> sys.stderr, "Warning: Your version of requests (%s) might not " \
103
                         "be compatible with this module." % requests_version
104
    print >> sys.stderr, "Officially supported are versions 2.2.x"
105
106
107
# copied these values from Lastline utility code (llapi) to make them available
108
# to users of client code. please keep in sync!
109
ANALYSIS_API_FILE_NOT_AVAILABLE = 101
110
ANALYSIS_API_UNKNOWN_RESOURCE_TYPE = 102
111
ANALYSIS_API_UNKNOWN_ANALYSIS_TYPE = 103
112
ANALYSIS_API_INVALID_CREDENTIALS = 104
113
ANALYSIS_API_INVALID_UUID = 105
114
ANALYSIS_API_NO_RESULT_FOUND = 106
115
ANALYSIS_API_TEMPORARILY_UNAVAILABLE = 107
116
ANALYSIS_API_PERMISSION_DENIED = 108
117
ANALYSIS_API_FILE_TOO_LARGE = 109
118
ANALYSIS_API_INVALID_DOMAIN = 110
119
ANALYSIS_API_INVALID_D_METADATA = 112
120
ANALYSIS_API_INVALID_FILE_TYPE = 113
121
ANALYSIS_API_INVALID_ARTIFACT_UUID = 114
122
ANALYSIS_API_SUBMISSION_LIMIT_EXCEEDED = 115
123
ANALYSIS_API_INVALID_HASH_ALGORITHM = 116
124
ANALYSIS_API_INVALID_URL = 117
125
ANALYSIS_API_INVALID_REPORT_VERSION = 118
126
ANALYSIS_API_FILE_EXTRACTION_FAILED = 119
127
128
129
class Error(Exception):
130
    """
131
    Base exception class for this module.
132
    """
133
134
135
class InvalidSubApiType(Error):
136
    """
137
    Exception for invalid sub API operations.
138
139
    The analysis API consists of a number of views (sub APIs):
140
    (only analysis for now)
141
    Operations involving parts other than these will
142
    raise this exceptions.
143
    """
144
    def __init__(self, sub_api_type):
145
        Error.__init__(self)
146
        self.sub_api_type = sub_api_type
147
148
    def __str__(self):
149
        return "Invalid sub API '%s', expecting one of (%s)" % \
150
            (self.sub_api_type, ','.join(AnalysisClientBase.SUB_APIS))
151
152
153
class InvalidFormat(Error):
154
    """
155
    Invalid format requested.
156
    """
157
    def __init__(self, requested_format):
158
        Error.__init__(self)
159
        self.format = requested_format
160
161
    def __str__(self):
162
        return "Requested Invalid Format '%s', expecting one of (%s)" % \
163
            (self.format, ','.join(AnalysisClientBase.FORMATS))
164
165
166
class CommunicationError(Error):
167
    """
168
    Contacting Malscape failed.
169
    """
170
    def __init__(self, msg=None, error=None):
171
        Error.__init__(self, msg or error or '')
172
        self.__error = error
173
174
    def internal_error(self):
175
        return self.__error
176
177
178
class InvalidAnalysisAPIResponse(Error):
179
    """
180
    An AnalysisAPI response was not in the expected format
181
    """
182
183
184
class AnalysisAPIError(Error):
185
    """
186
    Analysis API returned an error.
187
188
    The `error_code` member of this exception
189
    is the :ref:`error code returned by the API<error_codes>`.
190
    """
191
    def __init__(self, msg, error_code):
192
        Error.__init__(self)
193
        self.msg = msg
194
        self.error_code = error_code
195
196
    def __str__(self):
197
        if self.error_code:
198
            return "Analysis API error (%s): %s" % (self.error_code, self.msg)
199
        return "Analysis API error: %s" % self.msg
200
201
202
class RequestError(AnalysisAPIError):
203
    """
204
    Exception class to group errors that are permanent request errors when
205
    following the "malscape protocol". These errors indicate a problem with the
206
    request sent to the server - if you repeat the same request, you cannot
207
    expect a different error.
208
209
    This group excludes temporary errors, such as authentication problems.
210
    """
211
212
213
class SubmissionInvalidError(RequestError):
214
    """
215
    Exception class to group errors that are permanent submission errors. See
216
    `RequestError` for details.
217
    """
218
219
220
class FileNotAvailableError(AnalysisAPIError):
221
    def __init__(self, msg, error_code=ANALYSIS_API_FILE_NOT_AVAILABLE):
222
        AnalysisAPIError.__init__(self, msg, error_code)
223
224
225
class InvalidCredentialsError(AnalysisAPIError):
226
    def __init__(self, msg, error_code=ANALYSIS_API_INVALID_CREDENTIALS):
227
        AnalysisAPIError.__init__(self, msg, error_code)
228
229
230
class InvalidUUIDError(RequestError):
231
    def __init__(self, msg, error_code=ANALYSIS_API_INVALID_UUID):
232
        RequestError.__init__(self, msg, error_code)
233
234
235
class NoResultFoundError(AnalysisAPIError):
236
    def __init__(self, msg, error_code=ANALYSIS_API_NO_RESULT_FOUND):
237
        AnalysisAPIError.__init__(self, msg, error_code)
238
239
240
class TemporarilyUnavailableError(AnalysisAPIError):
241
    def __init__(self, msg, error_code=ANALYSIS_API_TEMPORARILY_UNAVAILABLE):
242
        AnalysisAPIError.__init__(self, msg, error_code)
243
244
245
class PermissionDeniedError(AnalysisAPIError):
246
    def __init__(self, msg, error_code=ANALYSIS_API_PERMISSION_DENIED):
247
        AnalysisAPIError.__init__(self, msg, error_code)
248
249
250
class FileTooLargeError(SubmissionInvalidError):
251
    def __init__(self, msg, error_code=ANALYSIS_API_FILE_TOO_LARGE):
252
        SubmissionInvalidError.__init__(self, msg, error_code)
253
254
255
class InvalidFileTypeError(SubmissionInvalidError):
256
    def __init__(self, msg, error_code=ANALYSIS_API_INVALID_FILE_TYPE):
257
        SubmissionInvalidError.__init__(self, msg, error_code)
258
259
260
class InvalidMetadataError(SubmissionInvalidError):
261
    def __init__(self, msg, error_code=ANALYSIS_API_INVALID_D_METADATA):
262
        SubmissionInvalidError.__init__(self, msg, error_code)
263
264
265
class InvalidArtifactError(RequestError):
266
    def __init__(self, msg, error_code=ANALYSIS_API_INVALID_ARTIFACT_UUID):
267
        RequestError.__init__(self, msg, error_code)
268
269
270
class SubmissionLimitExceededError(AnalysisAPIError):
271
    def __init__(self, msg, error_code=ANALYSIS_API_SUBMISSION_LIMIT_EXCEEDED):
272
        AnalysisAPIError.__init__(self, msg, error_code)
273
274
275
class InvalidHashAlgorithmError(RequestError):
276
    def __init__(self, msg, error_code=ANALYSIS_API_INVALID_HASH_ALGORITHM):
277
        RequestError.__init__(self, msg, error_code)
278
279
280
class InvalidURLError(SubmissionInvalidError):
281
    def __init__(self, msg, error_code=ANALYSIS_API_INVALID_URL):
282
        SubmissionInvalidError.__init__(self, msg, error_code)
283
284
285
class InvalidReportVersionError(RequestError):
286
    def __init__(self, msg, error_code=ANALYSIS_API_INVALID_REPORT_VERSION):
287
        RequestError.__init__(self, msg, error_code)
288
289
290
class FileExtractionFailedError(SubmissionInvalidError):
291
    def __init__(self, msg, error_code=ANALYSIS_API_FILE_EXTRACTION_FAILED):
292
        SubmissionInvalidError.__init__(self, msg, error_code)
293
294
295
#################
296
# client
297
#################
298
299
__COMPLETED_TASK_FIELDS = [
300
    "task_uuid",
301
    "score"
302
]
303
CompletedTask = collections.namedtuple("CompletedTask", __COMPLETED_TASK_FIELDS)
304
305
306
def get_time():
307
    """
308
    trivial wrapper around time.time to make testing easier
309
    """
310
    return time.time()
311
312
313
def purge_none(d):
314
    """
315
    Purge None entries from a dictionary
316
    """
317
    for k in d.keys():
318
        if d[k] is None:
319
            del d[k]
320
    return d
321
322
323
def parse_datetime(d):
324
    """
325
    Parse a datetime as formatted in one of the following formats:
326
327
    date: %Y-%m-%d'
328
    datetime: '%Y-%m-%d %H:%M:%S'
329
    datetime with microseconds: '%Y-%m-%d %H:%M:%S.%f'
330
331
    Can also handle a datetime.date or datetime.datetime object,
332
    (or anything that has year, month and day attributes)
333
    and converts it to datetime.datetime
334
    """
335
    if hasattr(d, "year") and hasattr(d, "month") and hasattr(d, "day"):
336
        return datetime.datetime(d.year, d.month, d.day)
337
338
    try:
339
        return datetime.datetime.strptime(
340
            d, AnalysisClientBase.DATETIME_MSEC_FMT)
341
    except ValueError: pass
342
343
    try:
344
        return datetime.datetime.strptime(d, AnalysisClientBase.DATETIME_FMT)
345
    except ValueError: pass
346
347
    try:
348
        return datetime.datetime.strptime(d, AnalysisClientBase.DATE_FMT)
349
    except ValueError:
350
        raise ValueError("Date '%s' does not match format '%s'" % (
351
                         d, "%Y-%m-%d[ %H:%M:%S[.%f]]'"))
352
353
354
class TaskCompletion(object):
355
    """
356
    Helper class to get score for all completed tasks
357
358
    :param analysis_client: analysis_apiclient.AnalysisClientBase
359
360
    Sample usage:
361
362
    tc = TaskCompletion(my_analysis_client)
363
    for completed_task in tc.get_completed(start,end):
364
        print completed_task.task_uuid, completed_task.score
365
366
    """
367
    def __init__(self, analysis_client):
368
        self.__analysis_client = analysis_client
369
370
    def get_completed(self, after, before):
371
        """
372
        Return scores of tasks completed in the specified time range.
373
374
        This takes care of using the analysis API's pagination
375
        to make sure it gets all tasks.
376
377
        :param after: datetime.datetime
378
        :param before: datetime.datetime
379
380
        :yield: sequence of `CompletedTask`
381
382
        :raise: InvalidAnalysisAPIResponse if response
383
            does not have the format we expect
384
        """
385
        try:
386
            while True:
387
                result = self.__analysis_client.get_completed(
388
                    after=after,
389
                    before=before,
390
                    include_score=True)
391
392
                data = result["data"]
393
                tasks = data["tasks"]
394
                if not tasks:
395
                    break
396
397
                for task_uuid, score  in tasks.iteritems():
398
                    yield CompletedTask(task_uuid=task_uuid,
399
                                        score=score)
400
401
                more = int(data["more_results_available"])
402
                if not more:
403
                    break
404
405
                last_ts = parse_datetime(data["before"])
406
                if last_ts >= before:
407
                    break
408
409
                after = last_ts
410
411
        except (KeyError, ValueError, TypeError, AttributeError):
412
            # attributeError needed in case iteritems is missing (not a dict)
413
            # let's give it the trace of the original exception, so we know
414
            # what the specific problem is!
415
            trace = sys.exc_info()[2]
416
            raise InvalidAnalysisAPIResponse("Unable to parse response to get_completed()"), None, trace
417
418
419
class AnalysisClientBase(object):
420
    """
421
    A client for the Lastline analysis API.
422
423
    This is an abstract base class: concrete
424
    subclasses just need to implement the _api_request
425
    method to actually send the API request to the server.
426
427
    :param base_url: URL where the lastline analysis API is located. (required)
428
    :param logger: if provided, should be a python logging.Logger object
429
        or object with similar interface.
430
    """
431
    SUB_APIS = ('analysis', 'management', 'research')
432
433
    DATETIME_FMT = '%Y-%m-%d %H:%M:%S'
434
    DATETIME_MSEC_FMT = DATETIME_FMT + '.%f'
435
    DATE_FMT = '%Y-%m-%d'
436
437
    FORMATS = ["json", "xml", "pdf", "rtf"]
438
439
    REQUEST_PERFDATA = False
440
441
    ERRORS = {
442
        ANALYSIS_API_FILE_NOT_AVAILABLE: FileNotAvailableError,
443
        ANALYSIS_API_INVALID_CREDENTIALS: InvalidCredentialsError,
444
        ANALYSIS_API_INVALID_UUID: InvalidUUIDError,
445
        ANALYSIS_API_NO_RESULT_FOUND: NoResultFoundError,
446
        ANALYSIS_API_TEMPORARILY_UNAVAILABLE: TemporarilyUnavailableError,
447
        ANALYSIS_API_PERMISSION_DENIED: PermissionDeniedError,
448
        ANALYSIS_API_FILE_TOO_LARGE: FileTooLargeError,
449
        ANALYSIS_API_INVALID_FILE_TYPE: InvalidFileTypeError,
450
        ANALYSIS_API_INVALID_DOMAIN: InvalidMetadataError,
451
        ANALYSIS_API_INVALID_D_METADATA: InvalidMetadataError,
452
        ANALYSIS_API_INVALID_ARTIFACT_UUID: InvalidArtifactError,
453
        ANALYSIS_API_SUBMISSION_LIMIT_EXCEEDED: SubmissionLimitExceededError,
454
        ANALYSIS_API_INVALID_HASH_ALGORITHM: InvalidHashAlgorithmError,
455
        ANALYSIS_API_INVALID_URL: InvalidURLError,
456
        ANALYSIS_API_INVALID_REPORT_VERSION: InvalidReportVersionError,
457
        ANALYSIS_API_FILE_EXTRACTION_FAILED: FileExtractionFailedError,
458
      }
459
460
    def __init__(self, base_url, logger=None, config=None):
461
        self.__logger = logger
462
        self.__base_url = base_url
463
        self.__config = config
464
465
    def _logger(self):
466
        return self.__logger
467
468
    def __build_url(self, sub_api, parts, requested_format="json"):
469
        if sub_api not in AnalysisClientBase.SUB_APIS:
470
            raise InvalidSubApiType(sub_api)
471
        if requested_format not in AnalysisClientBase.FORMATS:
472
            raise InvalidFormat(requested_format)
473
        num_parts = 2 + len(parts)
474
        pattern = "/".join(["%s"] * num_parts) + ".%s"
475
        params = [self.__base_url, sub_api] + parts + [requested_format]
476
        return pattern % tuple(params)
477
478
    def __build_file_download_url(self, sub_api, parts):
479
        """
480
        Generate a URL to a direct file download
481
        """
482
        if sub_api not in AnalysisClientBase.SUB_APIS:
483
            raise InvalidSubApiType(sub_api)
484
        num_parts = 2 + len(parts)
485
        pattern = "/".join(["%s"] * num_parts)
486
        params = [self.__base_url, sub_api] + parts
487
        return pattern % tuple(params)
488
489
    def _check_file_like(self, f, param_name):
490
        if not hasattr(f, 'read'):
491
            raise AttributeError("The %s parameter is not a file-like " \
492
                                 "object" % param_name)
493
494
    def submit_exe_hash(self,
495
                        md5=None,
496
                        sha1=None,
497
                        download_ip=None,
498
                        download_port=None,
499
                        download_url=None,
500
                        download_host=None,
501
                        download_path=None,
502
                        download_agent=None,
503
                        download_referer=None,
504
                        download_request=None,
505
                        full_report_score=None,
506
                        bypass_cache=None,
507
                        raw=False,
508
                        verify=True):
509
        """
510
        Submit a file by hash.
511
512
        Deprecated version of submit_file_hash() - see below
513
        """
514
        return self.submit_file_hash(md5, sha1,
515
                        download_ip=download_ip,
516
                        download_port=download_port,
517
                        download_url=download_url,
518
                        download_host=download_host,
519
                        download_path=download_path,
520
                        download_agent=download_agent,
521
                        download_referer=download_referer,
522
                        download_request=download_request,
523
                        full_report_score=full_report_score,
524
                        bypass_cache=bypass_cache,
525
                        raw=raw,
526
                        verify=verify)
527
528
    def submit_file_hash(self,
529
                        md5=None,
530
                        sha1=None,
531
                        download_ip=None,
532
                        download_port=None,
533
                        download_url=None,
534
                        download_host=None,
535
                        download_path=None,
536
                        download_agent=None,
537
                        download_referer=None,
538
                        download_request=None,
539
                        full_report_score=None,
540
                        bypass_cache=None,
541
                        backend=None,
542
                        require_file_analysis=True,
543
                        mime_type=None,
544
                        analysis_timeout=None,
545
                        analysis_env=None,
546
                        allow_network_traffic=None,
547
                        filename=None,
548
                        keep_file_dumps=None,
549
                        keep_memory_dumps=None,
550
                        keep_behavior_log=None,
551
                        push_to_portal_account=None,
552
                        raw=False,
553
                        verify=True,
554
                        server_ip=None,
555
                        server_port=None,
556
                        server_host=None,
557
                        client_ip=None,
558
                        client_port=None,
559
                        is_download=True,
560
                        protocol="http",
561
                        apk_package_name=None,
562
                        report_version=None):
563
        """
564
        Submit a file by hash.
565
566
        Either an md5 or a sha1 parameter must be provided.
567
        If both are provided, they should be consistent.
568
569
        For return values and error codes please
570
        see :py:meth:`malscape.api.views.analysis.submit_file`.
571
572
        If there is an error and `raw` is not set,
573
        a :py:class:`AnalysisAPIError` exception will be raised.
574
575
        :param md5: md5 hash of file.
576
        :param sha1: sha1 hash of file.
577
        :param download_ip: DEPRECATED! Use server_ip instead.
578
        :param download_port: DEPRECATED! Use server_port instead.
579
        :param download_url: DEPRECATED! replaced by the download_host
580
            and download_path parameters
581
        :param download_host: DEPRECATED! Use server_host instead.
582
        :param download_path: host path from which the submitted file
583
            was originally downloaded, as a string of bytes (not unicode)
584
        :param download_agent: HTTP user-agent header that was used
585
            when the submitted file was originally downloaded,
586
            as a string of bytes (not unicode)
587
        :param download_referer: HTTP referer header that was used
588
            when the submitted file was originally downloaded,
589
            as a string of bytes (not unicode)
590
        :param download_request: full HTTP request with
591
            which the submitted file was originally downloaded,
592
            as a string of bytes (not unicode)
593
        :param full_report_score: if set, this value (between -1 and 101)
594
            determines starting at which scores a full report is returned.
595
            -1 and 101 indicate "never return full report";
596
            0 indicates "return full report at all times"
597
        :param bypass_cache: if True, the API will not serve a cached
598
            result. NOTE: This requires special privileges.
599
        :param require_file_analysis: if True, the submission requires an
600
            analysis run to be started. If False, the API will attempt to
601
            base a decision solely on static information such as
602
            download source reputation and hash lookups. Requires special
603
            permissions
604
        :param mime_type: the mime-type of the file; This value should be
605
            set when require_file_analysis is True to enforce getting the
606
            most information available
607
        :param analysis_timeout: timeout in seconds after which to terminate
608
            analysis. The analysis engine might decide to extend this timeout
609
            if necessary. If all analysis subjects terminate before this timeout
610
            analysis might be shorter
611
        :param analysis_env: environment in which to run analysis. This includes
612
            the operating system as well as version of tools such as Microsoft
613
            Office. Example usage:
614
            - windows7:office2003, or
615
            - windowsxp
616
            By default, analysis will run on all available operating systems
617
            using the most applicable tools.
618
        :param allow_network_traffic: if False, all network connections will be
619
            redirected to a honeypot. Requires special permissions.
620
        :param filename: filename to use during analysis. If none is passed,
621
            the analysis engine will pick an appropriate name automatically.
622
            An easy way to pass this value is to use 'file_stream.name' for most
623
            file-like objects
624
        :param keep_file_dumps: if True, all files generated during
625
            analysis will be kept for post-processing. NOTE: This can generate
626
            large volumes of data and is not recommended. Requires special
627
            permissions
628
        :param keep_memory_dumps: if True, all buffers allocated during
629
            analysis will be kept for post-processing. NOTE: This can generate
630
            *very* large volumes of data and is not recommended. Requires
631
            special permissions
632
        :param keep_behavior_log: if True, the raw behavior log extracted during
633
            analysis will be kept for post-processing. NOTE: This can generate
634
            *very very* large volumes of data and is not recommended. Requires
635
            special permissions
636
        :param push_to_portal_account: if set, a successful submission will be
637
            pushed to the web-portal using the specified account
638
        :param backend: DEPRECATED! Don't use
639
        :param verify: if False, disable SSL-certificate verification
640
        :param raw: if True, return the raw json results of the API query
641
        :param server_ip: ASCII dotted-quad representation of the IP address of
642
            the server-side endpoint.
643
        :param server_port: integer representation of the port number
644
            of the server-side endpoint of the flow tuple.
645
        :param server_host: hostname of the server-side endpoint of
646
            the connection, as a string of bytes (not unicode).
647
        :param client_ip: ASCII dotted-quad representation of the IP address of
648
            the client-side endpoint.
649
        :param client_port: integer representation of the port number
650
            of the client-side endpoint of the flow tuple.
651
        :param is_download: Boolean; True if the transfer happened in the
652
            server -> client direction, False otherwise (client -> server).
653
        :param protocol: app-layer protocol in which the file got
654
            transferred. Short ASCII string.
655
        :param apk_package_name: package name for APK files. Don't specify
656
            manually.
657
        :param report_version: Version name of the Report that will be returned
658
                               (optional);
659
        """
660
        if self.__logger and backend:
661
            self.__logger.warning("Ignoring deprecated parameter 'backend'")
662
663
        url = self.__build_url("analysis", ["submit", "file"])
664
        # These options require special permissions, so we should not set them
665
        # if not specified
666
        if allow_network_traffic is not None:
667
            allow_network_traffic = allow_network_traffic and 1 or 0
668
        if keep_file_dumps is not None:
669
            keep_file_dumps = keep_file_dumps and 1 or 0
670
        if keep_memory_dumps is not None:
671
            keep_memory_dumps = keep_memory_dumps and 1 or 0
672
        if keep_behavior_log is not None:
673
            keep_behavior_log = keep_behavior_log and 1 or 0
674
        params = purge_none({
675
            "md5": md5,
676
            "sha1": sha1,
677
            "full_report_score": full_report_score,
678
            "bypass_cache": bypass_cache and 1 or None,
679
            "require_file_analysis": require_file_analysis and 1 or 0,
680
            "mime_type": mime_type,
681
            "download_ip": download_ip,
682
            "download_port": download_port,
683
            # analysis-specific options:
684
            "analysis_timeout": analysis_timeout or None,
685
            "analysis_env": analysis_env,
686
            "allow_network_traffic": allow_network_traffic,
687
            "filename": filename,
688
            "keep_file_dumps": keep_file_dumps,
689
            "keep_memory_dumps": keep_memory_dumps,
690
            "keep_behavior_log": keep_behavior_log,
691
            "push_to_portal_account": push_to_portal_account or None,
692
            "server_ip": server_ip,
693
            "server_port": server_port,
694
            "server_host": server_host,
695
            "client_ip": client_ip,
696
            "client_port": client_port,
697
            "is_download": is_download,
698
            "protocol": protocol,
699
            "apk_package_name": apk_package_name,
700
            "report_version": report_version,
701
          })
702
        # using and-or-trick to convert to a StringIO if it is not None
703
        # this just wraps it into a file-like object
704
        files = purge_none({
705
            "download_url": download_url is not None and \
706
                               StringIO.StringIO(download_url) or None,
707
            "download_host": download_host is not None and \
708
                               StringIO.StringIO(download_host) or None,
709
            "download_path": download_path is not None and \
710
                               StringIO.StringIO(download_path) or None,
711
            "download_agent": download_agent is not None and \
712
                               StringIO.StringIO(download_agent) or None,
713
            "download_referer": download_referer is not None and \
714
                               StringIO.StringIO(download_referer) or None,
715
            "download_request": download_request is not None and \
716
                               StringIO.StringIO(download_request) or None,
717
            "server_host": server_host is not None and \
718
                               StringIO.StringIO(server_host) or None,
719
          })
720
        return self._api_request(url, params, files=files, post=True,
721
                                 raw=raw, verify=verify)
722
723
    def submit_exe_file(self,
724
                        file_stream,
725
                        download_ip=None,
726
                        download_port=None,
727
                        download_url=None,
728
                        download_host=None,
729
                        download_path=None,
730
                        download_agent=None,
731
                        download_referer=None,
732
                        download_request=None,
733
                        full_report_score=None,
734
                        bypass_cache=None,
735
                        delete_after_analysis=False,
736
                        raw=False,
737
                        verify=True):
738
        """
739
        Submit a file by uploading it.
740
741
        Deprecated version of submit_file() - see below
742
        """
743
        return self.submit_file(file_stream,
744
                        download_ip=download_ip,
745
                        download_port=download_port,
746
                        download_url=download_url,
747
                        download_host=download_host,
748
                        download_path=download_path,
749
                        download_agent=download_agent,
750
                        download_referer=download_referer,
751
                        download_request=download_request,
752
                        full_report_score=full_report_score,
753
                        bypass_cache=bypass_cache,
754
                        delete_after_analysis=delete_after_analysis,
755
                        raw=raw,
756
                        verify=verify)
757
758
    def submit_file(self, file_stream,
759
                    download_ip=None,
760
                    download_port=None,
761
                    download_url=None,
762
                    download_host=None,
763
                    download_path=None,
764
                    download_agent=None,
765
                    download_referer=None,
766
                    download_request=None,
767
                    full_report_score=None,
768
                    bypass_cache=None,
769
                    delete_after_analysis=None,
770
                    backend=None,
771
                    analysis_timeout=None,
772
                    analysis_env=None,
773
                    allow_network_traffic=None,
774
                    filename=None,
775
                    keep_file_dumps=None,
776
                    keep_memory_dumps=None,
777
                    keep_behavior_log=None,
778
                    push_to_portal_account=None,
779
                    raw=False,
780
                    verify=True,
781
                    server_ip=None,
782
                    server_port=None,
783
                    server_host=None,
784
                    client_ip=None,
785
                    client_port=None,
786
                    is_download=True,
787
                    protocol="http",
788
                    apk_package_name=None,
789
                    password=None,
790
                    report_version=None):
791
        """
792
        Submit a file by uploading it.
793
794
        For return values and error codes please
795
        see :py:meth:`malscape.api.views.analysis.submit_file`.
796
797
        If there is an error and `raw` is not set,
798
        a :py:class:`AnalysisAPIError` exception will be raised.
799
800
        :param file_stream: file-like object containing
801
            the file to upload.
802
        :param download_ip: DEPRECATED! Use server_ip instead.
803
        :param download_port: DEPRECATED! Use server_port instead.
804
        :param download_url: DEPRECATED! replaced by the download_host
805
            and download_path parameters
806
        :param download_host: DEPRECATED! Use server_host instead.
807
        :param download_path: host path from which the submitted file
808
            was originally downloaded, as a string of bytes (not unicode)
809
        :param download_agent: HTTP user-agent header that was used
810
            when the submitted file was originally downloaded,
811
            as a string of bytes (not unicode)
812
        :param download_referer: HTTP referer header that was used
813
            when the submitted file was originally downloaded,
814
            as a string of bytes (not unicode)
815
        :param download_request: full HTTP request with
816
            which the submitted file was originally downloaded,
817
            as a string of bytes (not unicode)
818
        :param full_report_score: if set, this value (between -1 and 101)
819
            determines starting at which scores a full report is returned.
820
            -1 and 101 indicate "never return full report";
821
            0 indicates "return full report at all times"
822
        :param bypass_cache: if True, the API will not serve a cached
823
            result. NOTE: This requires special privileges.
824
        :param delete_after_analysis: if True, the backend will delete the
825
            file after analysis is done (and noone previously submitted
826
            this file with this flag set)
827
        :param analysis_timeout: timeout in seconds after which to terminate
828
            analysis. The analysis engine might decide to extend this timeout
829
            if necessary. If all analysis subjects terminate before this timeout
830
            analysis might be shorter
831
        :param analysis_env: environment in which to run analysis. This includes
832
            the operating system as well as version of tools such as Microsoft
833
            Office. Example usage:
834
            - windows7:office2003, or
835
            - windowsxp
836
            By default, analysis will run on all available operating systems
837
            using the most applicable tools.
838
        :param allow_network_traffic: if False, all network connections will be
839
            redirected to a honeypot. Requires special permissions.
840
        :param filename: filename to use during analysis. If none is passed,
841
            the analysis engine will pick an appropriate name automatically.
842
            An easy way to pass this value is to use 'file_stream.name' for most
843
            file-like objects
844
        :param keep_file_dumps: if True, all files generated during
845
            analysis will be kept for post-processing. NOTE: This can generate
846
            large volumes of data and is not recommended. Requires special
847
            permissions
848
        :param keep_memory_dumps: if True, all buffers allocated during
849
            analysis will be kept for post-processing. NOTE: This can generate
850
            large volumes of data and is not recommended. Requires special
851
            permissions
852
        :param keep_behavior_log: if True, the raw behavior log extracted during
853
            analysis will be kept for post-processing. NOTE: This can generate
854
            *very very* large volumes of data and is not recommended. Requires
855
            special permissions
856
        :param push_to_portal_account: if set, a successful submission will be
857
            pushed to the web-portal using the specified username
858
        :param backend: DEPRECATED! Don't use
859
        :param verify: if False, disable SSL-certificate verification
860
        :param raw: if True, return the raw JSON results of the API query
861
        :param server_ip: ASCII dotted-quad representation of the IP address of
862
            the server-side endpoint.
863
        :param server_port: integer representation of the port number
864
            of the server-side endpoint of the flow tuple.
865
        :param server_host: hostname of the server-side endpoint of
866
            the connection, as a string of bytes (not unicode).
867
        :param client_ip: ASCII dotted-quad representation of the IP address of
868
            the client-side endpoint.
869
        :param client_port: integer representation of the port number
870
            of the client-side endpoint of the flow tuple.
871
        :param is_download: Boolean; True if the transfer happened in the
872
            server -> client direction, False otherwise (client -> server).
873
        :param protocol: app-layer protocol in which the file got
874
            transferred. Short ASCII string.
875
        :param report_version: Version name of the Report that will be returned
876
                               (optional);
877
        :param apk_package_name: package name for APK files. Don't specify
878
            manually.
879
        :param password: password used to unpack encrypted archives
880
        """
881
        if self.__logger and backend:
882
            self.__logger.warning("Ignoring deprecated parameter 'backend'")
883
884
        self._check_file_like(file_stream, "file_stream")
885
        url = self.__build_url("analysis", ["submit", "file"])
886
        # These options require special permissions, so we should not set them
887
        # if not specified
888
        if allow_network_traffic is not None:
889
            allow_network_traffic = allow_network_traffic and 1 or 0
890
        if keep_file_dumps is not None:
891
            keep_file_dumps = keep_file_dumps and 1 or 0
892
        if keep_memory_dumps is not None:
893
            keep_memory_dumps = keep_memory_dumps and 1 or 0
894
        if keep_behavior_log is not None:
895
            keep_behavior_log = keep_behavior_log and 1 or 0
896
        params = purge_none({
897
            "bypass_cache": bypass_cache and 1 or None,
898
            "full_report_score": full_report_score,
899
            "delete_after_analysis": delete_after_analysis and 1 or 0,
900
            "download_ip": download_ip,
901
            "download_port": download_port,
902
            # analysis-specific options:
903
            "analysis_timeout": analysis_timeout or None,
904
            "analysis_env": analysis_env,
905
            "allow_network_traffic": allow_network_traffic,
906
            "filename": filename,
907
            "keep_file_dumps": keep_file_dumps,
908
            "keep_memory_dumps": keep_memory_dumps,
909
            "keep_behavior_log": keep_behavior_log,
910
            "push_to_portal_account": push_to_portal_account or None,
911
            "server_ip": server_ip,
912
            "server_port": server_port,
913
            "server_host": server_host,
914
            "client_ip": client_ip,
915
            "client_port": client_port,
916
            "is_download": is_download,
917
            "protocol": protocol,
918
            "apk_package_name": apk_package_name,
919
            "password": password,
920
            "report_version": report_version,
921
          })
922
923
        # If an explicit filename was provided, we can pass it down to
924
        # python-requests to use it in the multipart/form-data.
925
        # This avoids having python-requests trying to guess the filename
926
        # based on stream attributes.
927
        named_stream = (filename, file_stream) if filename else file_stream
928
929
        # using and-or-trick to convert to a StringIO if it is not None
930
        # this just wraps it into a file-like object
931
        files = purge_none({
932
            "file": named_stream,
933
            "download_url": download_url is not None and \
934
                                  StringIO.StringIO(download_url) or None,
935
            "download_host": download_host is not None and \
936
                                  StringIO.StringIO(download_host) or None,
937
            "download_path": download_path is not None and \
938
                                  StringIO.StringIO(download_path) or None,
939
            "download_agent": download_agent is not None and \
940
                                  StringIO.StringIO(download_agent) or None,
941
            "download_referer": download_referer is not None and \
942
                                  StringIO.StringIO(download_referer) or None,
943
            "download_request": download_request is not None and \
944
                                  StringIO.StringIO(download_request) or None,
945
            "server_host": server_host is not None and \
946
                                  StringIO.StringIO(server_host) or None,
947
          })
948
        return self._api_request(url, params, files=files, post=True,
949
                                 raw=raw, verify=verify)
950
951
952
    def submit_file_metadata(self, md5, sha1,
953
                                   download_ip,
954
                                   download_port,
955
                                   download_host=None,
956
                                   download_path=None,
957
                                   download_agent=None,
958
                                   download_referer=None,
959
                                   download_request=None,
960
                                   raw=False,
961
                                   verify=True):
962
        """
963
        Submit metadata regarding a file download.
964
965
        Both the md5 and the sha1 parameter must be provided.
966
967
        If there is an error and `raw` is not set,
968
        a :py:class:`AnalysisAPIError` exception will be raised.
969
970
        :param md5: md5 hash of the downloaded file.
971
        :param sha1: sha1 hash of the downloaded file.
972
        :param download_ip: ASCII dotted-quad representation of the IP address
973
            from which the file has been downloaded
974
        :param download_port: integer representation of the port number
975
            from which the file has been downloaded
976
        :param download_host: host from which the submitted file
977
            was originally downloaded, as a string of bytes (not unicode)
978
        :param download_path: host path from which the submitted file
979
            was originally downloaded, as a string of bytes (not unicode)
980
        :param download_agent: HTTP user-agent header that was used
981
            when the submitted file was originally downloaded,
982
            as a string of bytes (not unicode)
983
        :param download_referer: HTTP referer header that was used
984
            when the submitted file was originally downloaded,
985
            as a string of bytes (not unicode)
986
        :param download_request: full HTTP request with
987
            which the submitted file was originally downloaded,
988
            as a string of bytes (not unicode)
989
        :param verify: if False, disable SSL-certificate verification
990
        :param raw: if True, return the raw json results of the API query
991
        """
992
        url = self.__build_url("analysis", ["submit", "download"])
993
        params = {
994
            "md5": md5,
995
            "sha1": sha1,
996
            "download_ip": download_ip,
997
            "download_port": download_port
998
          }
999
        #using and-or-trick to convert to a StringIO if it is not None
1000
        #this just wraps it into a file-like object
1001
        files = {
1002
            "download_host": download_host is not None and \
1003
                                   StringIO.StringIO(download_host) or None,
1004
            "download_path": download_path is not None and \
1005
                                   StringIO.StringIO(download_path) or None,
1006
            "download_agent": download_agent is not None and \
1007
                                   StringIO.StringIO(download_agent) or None,
1008
            "download_referer": download_referer is not None and \
1009
                                   StringIO.StringIO(download_referer) or None,
1010
            "download_request": download_request is not None and \
1011
                                   StringIO.StringIO(download_request) or None
1012
1013
          }
1014
        purge_none(files)
1015
        purge_none(params)
1016
        return self._api_request(url, params, files=files, post=True,
1017
                                 raw=raw, verify=verify)
1018
1019
1020
    def submit_url(self,
1021
                   url,
1022
                   referer=None,
1023
                   full_report_score=None,
1024
                   bypass_cache=None,
1025
                   backend=None,
1026
                   analysis_timeout=None,
1027
                   push_to_portal_account=None,
1028
                   raw=False,
1029
                   verify=True,
1030
                   user_agent=None,
1031
                   report_version=None):
1032
        """
1033
        Submit a url.
1034
1035
        For return values and error codes please
1036
        see :py:meth:`malscape.api.views.analysis.submit_url`.
1037
1038
        If there is an error and `raw` is not set,
1039
        a :py:class:`AnalysisAPIError` exception will be raised.
1040
1041
        :param url: url to analyze
1042
        :param referer: referer header to use for analysis
1043
        :param full_report_score: if set, this value (between -1 and 101)
1044
            determines starting at which scores a full report is returned.
1045
            -1 and 101 indicate "never return full report";
1046
            0 indicates "return full report at all times"
1047
        :param bypass_cache: if True, the API will not serve a cached
1048
            result. NOTE: This requires special privileges.
1049
        :param analysis_timeout: timeout in seconds after which to terminate
1050
            analysis. The analysis engine might decide to extend this timeout
1051
            if necessary. If all analysis subjects terminate before this timeout
1052
            analysis might be shorter
1053
        :param push_to_portal_account: if set, a successful submission will be
1054
            pushed to the web-portal using the specified account
1055
        :param backend: DEPRECATED! Don't use
1056
        :param verify: if False, disable SSL-certificate verification
1057
        :param raw: if True, return the raw JSON results of the API query
1058
        :param report_version: Version name of the Report that will be returned
1059
                               (optional);
1060
        :param user_agent: user agent header to use for analysis
1061
        """
1062
        if self.__logger and backend:
1063
            self.__logger.warning("Ignoring deprecated parameter 'backend'")
1064
1065
        api_url = self.__build_url("analysis", ["submit", "url"])
1066
        params = purge_none({
1067
            "url":url,
1068
            "referer":referer,
1069
            "full_report_score":full_report_score,
1070
            "bypass_cache":bypass_cache and 1 or None,
1071
            "analysis_timeout": analysis_timeout or None,
1072
            "push_to_portal_account": push_to_portal_account or None,
1073
            "user_agent": user_agent or None,
1074
            "report_version" : report_version,
1075
          })
1076
        return self._api_request(api_url, params, post=True,
1077
                                 raw=raw, verify=verify)
1078
1079
    def get_result(self,
1080
                   uuid,
1081
                   report_uuid=None,
1082
                   full_report_score=None,
1083
                   include_scoring_components=None,
1084
                   raw=False,
1085
                   requested_format="json",
1086
                   verify=True,
1087
                   report_version=None):
1088
        """
1089
        Get results for a previously submitted
1090
        analysis task.
1091
1092
        For return values and error codes please
1093
        see :py:meth:`malscape.api.views.analysis.get_results`.
1094
1095
        If there is an error and `raw` is not set,
1096
        a :py:class:`AnalysisAPIError` exception will be raised.
1097
1098
        :param uuid: the unique identifier of the submitted task,
1099
            as returned in the task_uuid field of submit methods.
1100
        :param report_uuid: if set, include this report in the result.
1101
        :param full_report_score: if set, this value (between -1 and 101)
1102
            determines starting at which scores a full report is returned.
1103
            -1 and 101 indicate "never return full report";
1104
            0 indicates "return full report at all times"
1105
        :param include_scoring_components: if True, the result will contain
1106
            details of all components contributing to the overall score.
1107
            Requires special permissions
1108
        :param raw: if True, return the raw JSON/XML results of the API query.
1109
        :param requested_format: JSON, XML, PDF, or RTF.
1110
            If format is not JSON, this implies `raw`.
1111
        :param report_version: Version of the report to be returned
1112
                               (optional)
1113
        """
1114
        # better: use 'get_results()' but that would break
1115
        # backwards-compatibility
1116
        url = self.__build_url('analysis', ['get'],
1117
                               requested_format=requested_format)
1118
        params = purge_none({
1119
            'uuid': uuid,
1120
            'report_uuid': report_uuid,
1121
            'full_report_score': full_report_score,
1122
            'include_scoring_components': include_scoring_components and 1 or 0,
1123
            'report_version': report_version
1124
          })
1125
        if requested_format.lower() != 'json':
1126
            raw = True
1127
        return self._api_request(url,
1128
                                 params,
1129
                                 raw=raw,
1130
                                 requested_format=requested_format,
1131
                                 post=True,
1132
                                 verify=verify)
1133
1134
    def get_result_summary(self, uuid, raw=False,
1135
                           requested_format="json",
1136
                           score_only=False,
1137
                           verify=True):
1138
        """
1139
        Get result summary for a previously submitted analysis task.
1140
1141
        For return values and error codes please
1142
        see :py:meth:`malscape.api.views.analysis.get_result`.
1143
1144
        If there is an error and `raw` is not set,
1145
        a :py:class:`AnalysisAPIError` exception will be raised.
1146
1147
        :param uuid: the unique identifier of the submitted task,
1148
            as returned in the task_uuid field of submit methods.
1149
        :param raw: if True, return the raw JSON/XML results of the API query.
1150
        :param requested_format: JSON or XML. If format is not JSON, this
1151
            implies `raw`.
1152
        :param score_only: if True, return even less data (only score and
1153
            threat/threat-class classification).
1154
        """
1155
        url = self.__build_url("analysis", ["get_result"],
1156
                               requested_format=requested_format)
1157
        params = {
1158
            'uuid': uuid,
1159
            'score_only': score_only and 1 or 0,
1160
          }
1161
        if requested_format.lower() != "json":
1162
            raw = True
1163
        return self._api_request(url,
1164
                                 params,
1165
                                 raw=raw,
1166
                                 requested_format=requested_format,
1167
                                 post=True,
1168
                                 verify=verify)
1169
1170
    def get_result_artifact(self, uuid, report_uuid, artifact_name,
1171
                            raw=False, verify=True):
1172
        """
1173
        Get artifact generated by an analysis result for a previously
1174
        submitted analysis task.
1175
1176
        :param uuid: the unique identifier of the submitted task,
1177
            as returned in the task_uuid field of submit methods.
1178
        :param report_uuid: the unique report identifier returned as part of
1179
            the dictionary returned by get_result()
1180
        :param artifact_name: the name of the artifact as mentioned in the
1181
            given report in the dictionary returned by get_result()
1182
        :param raw: if True, return the raw JSON/XML results of the API query.
1183
        """
1184
        url = self.__build_file_download_url("analysis",
1185
                                             ["get_result_artifact"])
1186
        params = {
1187
            'uuid': uuid,
1188
            'artifact_uuid': "%s:%s" % (report_uuid, artifact_name)
1189
          }
1190
1191
        # NOTE: This API request is completely different because it
1192
        # returns real HTTP status-codes (and errors) directly
1193
        try:
1194
            result = self._api_request(url, params, requested_format='raw',
1195
                                       raw=raw, post=True, verify=verify)
1196
            if not result:
1197
                raise InvalidArtifactError()
1198
1199
        except CommunicationError, exc:
1200
            internal_error = str(exc.internal_error())
1201
            if internal_error == '410':
1202
                raise InvalidArtifactError("The artifact is no longer " \
1203
                                           "available")
1204
            if internal_error == '404':
1205
                raise InvalidArtifactError("The artifact could not be found")
1206
1207
            if internal_error == '412':
1208
                raise InvalidUUIDError()
1209
1210
            if internal_error == '412':
1211
                raise InvalidUUIDError()
1212
1213
            if internal_error == '401':
1214
                raise PermissionDeniedError()
1215
1216
            # we have nothing more specific to say -- raise the
1217
            # original CommunicationError
1218
            raise
1219
1220
        return StringIO.StringIO(result)
1221
1222
    def query_task_artifact(self, uuid, artifact_name, raw=False, verify=True):
1223
        """
1224
        Query if a specific task artifact is available for download.
1225
1226
        :param uuid: the unique identifier of the submitted task,
1227
            as returned in the task_uuid field of submit methods.
1228
        :param artifact_name: the name of the artifact
1229
        :param raw: if True, return the raw JSON/XML results of the API query.
1230
        """
1231
        url = self.__build_url("analysis", ["query_task_artifact"])
1232
        params = purge_none({
1233
            'uuid': uuid,
1234
            'artifact_name': artifact_name,
1235
          })
1236
        return self._api_request(url, params, raw=raw, verify=verify)
1237
1238
    def completed(self, after, before=None, raw=False, verify=True):
1239
        """
1240
        Deprecated. Use 'get_completed()'
1241
        """
1242
        return self.get_completed(after, before=before,
1243
                                  verify=verify, raw=raw)
1244
1245
    def get_completed(self, after, before=None, raw=False, verify=True,
1246
                      include_score=False):
1247
        """
1248
        Get the list of uuids of tasks that were completed
1249
        within a given time frame.
1250
1251
        The main use-case for this method is to periodically
1252
        request a list of uuids completed since the last
1253
        time this method was invoked, and then fetch
1254
        each result with `get_results()`.
1255
1256
        Date parameters to this method can be:
1257
         - date string: %Y-%m-%d'
1258
         - datetime string: '%Y-%m-%d %H:%M:%S'
1259
         - datetime.datetime object
1260
1261
        All times are in UTC.
1262
1263
        For return values and error codes please
1264
        see :py:meth:`malscape.api.views.analysis.get_completed`.
1265
1266
        If there is an error and `raw` is not set,
1267
        a :py:class:`AnalysisAPIError` exception will be raised.
1268
1269
        :param after: Request tasks completed after this time.
1270
        :param before: Request tasks completed before this time.
1271
        :param include_score: If True, the response contains scores together
1272
            with the task-UUIDs that have completed
1273
        :param raw: if True, return the raw JSON results of the API query.
1274
        """
1275
        # better: use 'get_completed()' but that would break
1276
        # backwards-compatibility
1277
        url = self.__build_url("analysis", ["completed"])
1278
        if hasattr(before, "strftime"):
1279
            before = before.strftime(AnalysisClientBase.DATETIME_FMT)
1280
        if hasattr(after, "strftime"):
1281
            after = after.strftime(AnalysisClientBase.DATETIME_FMT)
1282
        params = purge_none({
1283
            'before': before,
1284
            'after': after,
1285
            'include_score': include_score and 1 or 0,
1286
          })
1287
        return self._api_request(url, params, raw=raw, post=True, verify=verify)
1288
1289
    def get_progress(self, uuid, raw=False):
1290
        """
1291
        Get a progress estimate for a previously submitted analysis task.
1292
1293
        For return values and error codes please
1294
        see :py:meth:`malscape.api.views.analysis.get_results`.
1295
1296
        If there is an error and `raw` is not set,
1297
        a :py:class:`AnalysisAPIError` exception will be raised.
1298
1299
        :param uuid: the unique identifier of the submitted task,
1300
            as returned in the task_uuid field of submit methods.
1301
        :param raw: if True, return the raw JSON/XML results of the API query.
1302
        :param requested_format: JSON or XML. If format is not JSON, this implies `raw`.
1303
        """
1304
        url = self.__build_url('analysis', ['get_progress'])
1305
        params = { 'uuid': uuid }
1306
        return self._api_request(url, params, raw=raw, post=True)
1307
1308
    def query_file_hash(self, hash_value=None, algorithm=None, block_size=None,
1309
                        md5=None, sha1=None, mmh3=None, raw=False):
1310
        """
1311
        Search for existing analysis results with the given file-hash.
1312
1313
        :param hash_value: The (partial) file-hash.
1314
        :param algorithm: One of MD5/SHA1/MMH3
1315
        :param block_size: Size of the block (at file start) used for generating
1316
            the hash-value. By default (or if 0), the entire file is assumed.
1317
        :param md5: Helper to quickly set `hash_value` and `algorithm`
1318
        :param sha1: Helper to quickly set `hash_value` and `algorithm`
1319
        :param mmh3: Helper to quickly set `hash_value` and `algorithm`
1320
        :param raw: if True, return the raw JSON/XML results of the API query.
1321
        :param requested_format: JSON or XML. If format is not JSON, this
1322
            implies `raw`.
1323
        """
1324
        if md5 or sha1 or mmh3:
1325
            if hash_value or algorithm:
1326
                raise TypeError("Conflicting values passed for hash/algorithm")
1327
            if md5 and not sha1 and not mmh3:
1328
                hash_value = md5
1329
                algorithm = 'md5'
1330
            elif sha1 and not md5 and not mmh3:
1331
                hash_value = sha1
1332
                algorithm = 'sha1'
1333
            elif mmh3 and not md5 and not sha1:
1334
                hash_value = mmh3
1335
                algorithm = 'mmh3'
1336
            else:
1337
                raise TypeError("Conflicting values passed for hash/algorithm")
1338
        elif not hash_value or not algorithm:
1339
            raise TypeError("Missing values for hash_value/algorithm")
1340
1341
        url = self.__build_url('analysis', ['query/file_hash'])
1342
        params = purge_none({
1343
            'hash_value': hash_value,
1344
            'hash_algorithm': algorithm,
1345
            'hash_block_size': block_size,
1346
          })
1347
        return self._api_request(url, params, raw=raw, post=True)
1348
1349
    def is_blocked_file_hash(self, hash_value=None, algorithm=None,
1350
                             block_size=None, md5=None, sha1=None, mmh3=None,
1351
                             raw=False):
1352
        """
1353
        Check if the given file-hash belongs to a malicious file and we have
1354
        gathered enough information to block based on this (partial) hash.
1355
1356
        :param hash_value: The (partial) file-hash.
1357
        :param algorithm: One of MD5/SHA1/MMH3
1358
        :param block_size: Size of the block (at file start) used for generating
1359
            the hash-value. By default (or if 0), the entire file is assumed.
1360
        :param md5: Helper to quickly set `hash_value` and `algorithm`
1361
        :param sha1: Helper to quickly set `hash_value` and `algorithm`
1362
        :param mmh3: Helper to quickly set `hash_value` and `algorithm`
1363
        :param raw: if True, return the raw JSON/XML results of the API query.
1364
        :param requested_format: JSON or XML. If format is not JSON, this implies `raw`.
1365
        """
1366
        if md5 or sha1 or mmh3:
1367
            if hash_value or algorithm:
1368
                raise TypeError("Conflicting values passed for hash/algorithm")
1369
            if md5 and not sha1 and not mmh3:
1370
                hash_value = md5
1371
                algorithm = 'md5'
1372
            elif sha1 and not md5 and not mmh3:
1373
                hash_value = sha1
1374
                algorithm = 'sha1'
1375
            elif mmh3 and not md5 and not sha1:
1376
                hash_value = mmh3
1377
                algorithm = 'mmh3'
1378
            else:
1379
                raise TypeError("Conflicting values passed for hash/algorithm")
1380
        elif not hash_value or not algorithm:
1381
            raise TypeError("Missing values for hash_value/algorithm")
1382
1383
        url = self.__build_url('analysis', ['query/is_blocked_file_hash'])
1384
        params = purge_none({
1385
            'hash_value': hash_value,
1386
            'hash_algorithm': algorithm,
1387
            'hash_block_size': block_size,
1388
          })
1389
        return self._api_request(url, params, raw=raw, post=True)
1390
1391
    def query_analysis_engine_tasks(self, analysis_engine_task_uuids,
1392
                                    analysis_engine='analyst', raw=False):
1393
        """
1394
        Provide a set of task UUIDs from an analysis engine (such as Analyst
1395
        Scheduler or Anubis) and find completed tasks that contain this analysis
1396
        engine task.
1397
1398
        For return values and error codes please
1399
        see :py:meth:`malscape.api.views.analysis.query_analysis_engine_tasks`.
1400
1401
        If there is an error and `raw` is not set,
1402
        a :py:class:`AnalysisAPIError` exception will be raised.
1403
1404
        :param analysis_engine_task_uuids: List of analysis engine task UUIDs to
1405
            search.
1406
        :param analysis_engine: The analysis engine the task refers to.
1407
        :param raw: if True, return the raw JSON results of the API query.
1408
        """
1409
        url = self.__build_url('analysis', ['query/analysis_engine_tasks'])
1410
        params = purge_none({
1411
            'analysis_engine_task_uuids': ','.join(analysis_engine_task_uuids),
1412
            'analysis_engine': analysis_engine,
1413
        })
1414
        return self._api_request(url, params, post=True, raw=raw)
1415
1416
    def analyze_sandbox_result(self, analysis_task_uuid,
1417
                               analysis_engine='anubis',
1418
                               full_report_score=None,
1419
                               bypass_cache=False,
1420
                               raw=False):
1421
        """
1422
        Provide a task UUID from an analysis engine (such as Analyst Scheduler
1423
        or Anubis) and trigger scoring of the activity captured by the analysis
1424
        report.
1425
1426
        Similar to submitting by exe hash (md5/sha1) but we can enforce
1427
        the precise analysis result (if there are multiple) that we want
1428
        to score
1429
1430
        For return values and error codes please
1431
        see :py:meth:`malscape.api.views.analysis.analyze_sandbox_result`.
1432
1433
        If there is an error and `raw` is not set,
1434
        a :py:class:`AnalysisAPIError` exception will be raised.
1435
1436
        :param analysis_task_uuid: The sandbox task UUID to analyze/import.
1437
        :param analysis_engine: The sandbox the task refers to.
1438
        :param full_report_score: if set, this value (between -1 and 101)
1439
            determines starting at which scores a full report is returned.
1440
            -1 and 101 indicate "never return full report";
1441
            0 indicates "return full report at all times"
1442
        :param bypass_cache: if True, the API will not serve a cached
1443
            result. NOTE: This requires special privileges.
1444
        :param raw: if True, return the raw JSON results of the API query.
1445
        """
1446
        url = self.__build_url('analysis', ['analyze_sandbox_result'])
1447
        params = {
1448
            'analysis_task_uuid':analysis_task_uuid,
1449
            'analysis_engine': analysis_engine,
1450
            'full_report_score': full_report_score,
1451
            'bypass_cache': bypass_cache and 1 or None,
1452
          }
1453
        purge_none(params)
1454
        return self._api_request(url, params, raw=raw)
1455
1456
    def _api_request(self,
1457
                     url,
1458
                     params=None,
1459
                     files=None,
1460
                     timeout=None,
1461
                     post=False,
1462
                     raw=False,
1463
                     requested_format="json",
1464
                     verify=True):
1465
        """
1466
        Send an API request and return the results.
1467
1468
        :param url: API URL to fetch.
1469
        :param params: GET or POST parameters.
1470
        :param files: files to upload with request.
1471
        :param timeout: request timeout in seconds.
1472
        :param post: use HTTP POST instead of GET
1473
        :param raw: return the raw json results of API query
1474
        :param requested_foramt: JSON or XML. If format is not JSON, this implies `raw`.
1475
        """
1476
        raise NotImplementedError("%s does not implement api_request()" % self.__class__.__name__)
1477
1478
    def _process_response_page(self, page, raw, requested_format):
1479
        """
1480
        Helper for formatting/processing api response before returning it.
1481
        """
1482
        if raw or requested_format.lower() != "json":
1483
            return page
1484
1485
        #why does pylint think result is a bool??
1486
        #pylint: disable=E1103
1487
        result = json.loads(page)
1488
        success = result['success']
1489
        if success:
1490
            return result
1491
        else:
1492
            error_code = result.get('error_code', None)
1493
            # raise the most specific error we can
1494
            exception_class = AnalysisClientBase.ERRORS.get(error_code) or \
1495
                              AnalysisAPIError
1496
            raise exception_class(result['error'], error_code)
1497
1498
    def rescore_task(self, uuid=None, md5=None, sha1=None,
1499
                     min_score=0, max_score=100,
1500
                     threat=None, threat_class=None,
1501
                     force_local=False, raw=False):
1502
        """
1503
        Enforce re-scoring of a specific task or multiple tasks based on the
1504
        submitted file. Requires specific permissions.
1505
1506
        At least one of uuid/md5 must be provided. If sha1 is given, it must
1507
        match with the md5 that was provided. Existing manual-score threat/
1508
        threat-class information will not be overwritten unless an empty-
1509
        string ('') is passed to this function.
1510
1511
        This API-call returns the task-UUIDs that were triggered for rescoring.
1512
1513
        NOTE: Even when a single task-UUID is passed, the API might decide to
1514
        re-score all tasks for the same file!
1515
1516
        :param uuid: the unique identifier of the submitted task,
1517
            as returned in the task_uuid field of submit methods.
1518
        :param md5: the md5 hash of the submitted file.
1519
        :param sha1: the sha1 hash of the submitted file.
1520
        :param force_local: if True, enforce that the manual score is applied
1521
            only locally. This is the default for on-premise instances and
1522
            cannot be enforced there. Requires special permissions.
1523
        :param raw: if True, return the raw JSON/XML results of the API query.
1524
        """
1525
        assert uuid or md5, "Please provide task-uuid/md5"
1526
        url = self.__build_url('management', ['rescore'])
1527
        params = purge_none({
1528
            'uuid': uuid,
1529
            'md5': md5,
1530
            'sha1': sha1,
1531
            'min_score': min_score,
1532
            'max_score': max_score,
1533
            'threat': threat,
1534
            'threat_class': threat_class,
1535
            # use the default if no force is set
1536
            'force_local': force_local and 1 or None,
1537
        })
1538
        return self._api_request(url, params, raw=raw, post=True)
1539
1540
    def rescore_scanner(self, scanner, after, before,
1541
                         min_score=0, max_score=100,
1542
                         min_scanner_score=0, max_scanner_score=100,
1543
                         max_version=None, test_flag=None, force=False,
1544
                         raw=False):
1545
        """
1546
        Find tasks that triggered a certain scanner and mark them for
1547
        reprocessing.
1548
1549
        This API-call returns the task-UUIDs that were triggered for rescoring.
1550
1551
        :param scanner: Name of the scanner.
1552
        :param after: Reprocess tasks completed after this time.
1553
        :param before: Reprocess tasks completed before this time.
1554
        :param min_score: Minimum score of tasks to reprocess.
1555
        :param max_score: Maximum score of tasks to reprocess.
1556
        :param min_scanner_score: Minimum score of scanner detection (on backend
1557
            task) to reprocess.
1558
        :param max_scanner_score: Maximum score of scanner detection (on backend
1559
            task) to reprocess.
1560
        :param max_version: Maximum version of scanner to reprocess.
1561
        :param test_flag: If True, only affect backend-tasks where the scanner
1562
            was in *test* mode; if False, only affect backend-tasks where the
1563
            scanner was in *real* mode; otherwise affect all backend-tasks
1564
            regardless of the *test* flag.
1565
        :param force: By default, the API will refuse rescoring any scanners that
1566
            affect more than 100 tasks. To rescore large amounts, distribute the
1567
            work over multiple time-windows. This safety can be disabled by
1568
            setting the *force* parameter to True.
1569
        """
1570
        if hasattr(before, "strftime"):
1571
            before = before.strftime(AnalysisClientBase.DATETIME_FMT)
1572
        if hasattr(after, "strftime"):
1573
            after = after.strftime(AnalysisClientBase.DATETIME_FMT)
1574
1575
        url = self.__build_url('management', ['rescore_scanner'])
1576
        params = purge_none({
1577
            'scanner': scanner,
1578
            'after': after,
1579
            'before': before,
1580
            'min_score': min_score,
1581
            'max_score': max_score,
1582
            'min_scanner_score': min_scanner_score,
1583
            'max_scanner_score': max_scanner_score,
1584
            'max_version': max_version,
1585
        })
1586
        if test_flag is not None:
1587
            params['test_flag'] = test_flag and 1 or 0
1588
        if force:
1589
            params['force'] = 1
1590
        return self._api_request(url, params, raw=raw, post=True)
1591
1592
    def suppress_scanner(self, scanner, max_version, raw=False):
1593
        """
1594
        Mark a scanner as suppressed.
1595
1596
        :param scanner: Name of the scanner.
1597
        :param max_version: Version of scanner up to which it is supposed to be
1598
            suppressed. So, if the first scanner-version that should be used
1599
            for scoring is X, provide (X-1).
1600
        """
1601
        url = self.__build_url('management', ['suppress_scanner'])
1602
        params = purge_none({
1603
            'scanner': scanner,
1604
            'max_version': max_version,
1605
        })
1606
        return self._api_request(url, params, raw=raw, post=True)
1607
1608
    def create_ticket(self, uuid=None, md5=None, sha1=None,
1609
                      min_score=0, max_score=100, summary=None, labels=None,
1610
                      is_false_negative=False, is_false_positive=False,
1611
                      is_from_customer=False, is_from_partner=False,
1612
                      force=False, raw=False):
1613
        """
1614
        Enforce re-scoring of a specific task or multiple tasks based on the
1615
        submitted file. Requires specific permissions.
1616
1617
        At least one of uuid/md5/sha1 must be provided. If both file-hashes are
1618
        provided, they must match the same file.
1619
1620
        :param uuid: the unique identifier of the submitted task,
1621
            as returned in the task_uuid field of submit methods.
1622
        :param md5: the md5 hash of the submitted file.
1623
        :param sha1: the sha1 hash of the submitted file.
1624
        :param force: if True, enforce the generation of a ticket, even if none
1625
            of the task-analysis rules would have generated a ticket
1626
        :param min_score: Limit generation of tickets to tasks above the given
1627
            threshold
1628
        :param max_score: Limit generation of tickets to tasks below the given
1629
            threshold
1630
        :param summary: Optional summary (title) to use for the ticket.
1631
        :param labels: Optional set of labels to assign to a task
1632
        :param is_false_negative: Helper parameter to add the standard FN label
1633
        :param is_false_positive: Helper parameter to add the standard FP label
1634
        :param is_from_customer: Helper parameter to add the standard
1635
            from-customer label
1636
        :param is_from_partner: Helper parameter to add the standard
1637
            from-partner label
1638
        :param raw: if True, return the raw JSON/XML results of the API query.
1639
        """
1640
        assert uuid or md5 or sha1, "Please provide task-uuid/md5/sha1"
1641
        url = self.__build_url('management', ['create_ticket'])
1642
        if labels:
1643
            labels = set(labels)
1644
        else:
1645
            labels = set()
1646
        if is_false_negative:
1647
            labels.add('false_negatives')
1648
        if is_false_positive:
1649
            labels.add('false_positives')
1650
        if is_from_customer:
1651
            labels.add('from-customer')
1652
        if is_from_partner:
1653
            labels.add('from-partner')
1654
        if labels:
1655
            labels_list = ','.join(labels)
1656
        else:
1657
            labels_list = None
1658
        params = purge_none({
1659
            'uuid': uuid,
1660
            'md5': md5,
1661
            'sha1': sha1,
1662
            'min_score': min_score,
1663
            'max_score': max_score,
1664
            'force': force and 1 or 0,
1665
            'summary': summary,
1666
            'labels': labels_list,
1667
        })
1668
        return self._api_request(url, params, raw=raw, post=True)
1669
1670
    # pylint: disable=W0613
1671
    # raw, query_end, query_start are unused
1672
    def get_license_activity(self, query_start=None, query_end=None,
1673
                             raw=False):
1674
        """
1675
        Fetch license activity information.
1676
1677
        DEPRECATED. DO NOT USE
1678
        """
1679
        assert False, "Call to deprecated API function"
1680
    # pylint: enable=W0613
1681
1682
    def get_detections(self, report_uuid, raw=False):
1683
        """
1684
        Retrieve full internal scoring details. Requires special permissions
1685
1686
        :param report_uuid: Backend-report UUID as returned by `get_result`
1687
        :returns: Dictionary with detailed detection information
1688
        """
1689
        url = self.__build_url('research', [ 'get_detections' ])
1690
        params = { 'report_uuid': report_uuid }
1691
        return self._api_request(url, params, raw=raw, post=True)
1692
1693
    def get_backend_scores(self, md5=None, sha1=None, raw=False):
1694
        """
1695
        Download detailed detection information for all backend results for a
1696
        file.
1697
1698
        :param md5: MD5 of the file to query
1699
        :param sha1: SHA1 of the file to query
1700
        :returns: Dictionary with detailed detection information
1701
        """
1702
        assert md5 or sha1, "Need to provide one of md5/sha1"
1703
        url = self.__build_url('research', [ 'get_backend_scores' ])
1704
        params = purge_none({
1705
            'file_md5': md5,
1706
            'file_sha1': sha1,
1707
        })
1708
        return self._api_request(url, params, raw=raw, post=True)
1709
1710
1711
class AnalysisClient(AnalysisClientBase):
1712
    """
1713
    Client for the Analysis API.
1714
1715
    A client for the Analysis API that accesses the API through the web,
1716
    using key and api token for authentication, and the python
1717
    requests module for sending requests.
1718
1719
    :param base_url: URL where the lastline analysis API is located. (required)
1720
    :param key: API key for the Lastline Analyst API (required)
1721
    :param api_token: API token for the Lastline Analyst API (required)
1722
    :param logger: if provided, should be a python logging.Logger object
1723
        or object with similar interface.
1724
    :param ca_bundle: if provided, location of Certification Authority bundle
1725
        to use for authentication. This should not be required
1726
        if certificates are properly setup on the system.
1727
    :param verify_ssl: if True, verify SSL certificates. This overrides the
1728
        per-call parameter
1729
    :param proxies: dictionay with per-protocol proxy to use to use
1730
        (e.g. { 'http': 'localhost:3128', 'https': 'localhost:3128' }
1731
    :param timeout: default timeout (in seconds) to use for network requests.
1732
        Set to None to disable timeouts
1733
    """
1734
    def __init__(self,
1735
                 base_url,
1736
                 key,
1737
                 api_token,
1738
                 logger=None,
1739
                 ca_bundle=None,
1740
                 verify_ssl=True,
1741
                 use_curl=False,
1742
                 timeout=60,
1743
                 proxies=None,
1744
                 config=None):
1745
        AnalysisClientBase.__init__(self, base_url, logger, config)
1746
        self.__key = key
1747
        self.__api_token = api_token
1748
        self.__ca_bundle = ca_bundle
1749
        self.__verify_ssl = verify_ssl
1750
        self.__logger = logger
1751
        self.__timeout = timeout
1752
        if use_curl and logger:
1753
            logger.warning("Ingoring deprecated use_curl option")
1754
        if proxies is None and config:
1755
            self.__proxies = get_proxies_from_config(config)
1756
        else:
1757
            self.__proxies = proxies
1758
        self.__session = requests.session()
1759
1760
    def set_key(self, key):
1761
        self.__key = key
1762
1763
    def _api_request(self,
1764
                     url,
1765
                     params=None,
1766
                     files=None,
1767
                     timeout=None,
1768
                     post=False,
1769
                     raw=False,
1770
                     requested_format="json",
1771
                     verify=True):
1772
        if self._logger():
1773
            self._logger().info("Requesting %s" % url)
1774
        if requested_format.lower() != "json":
1775
            raw = True
1776
        if not params:
1777
            params = {}
1778
        params["key"] = self.__key
1779
        # NOTE: certain functions allow access without an api-token. Then,
1780
        # a valid license-key is sufficient. We must not pass an invalid
1781
        # or empty, however
1782
        if self.__api_token:
1783
            params["api_token"] = self.__api_token
1784
        if self.REQUEST_PERFDATA:
1785
            # we allow anyone setting this flag, but only admins will get
1786
            # any data back
1787
            params['perfdata'] = 1
1788
1789
        method = "GET"
1790
        data = None
1791
        if post or files:
1792
            method = "POST"
1793
            data = params
1794
            params = None
1795
1796
        if not self.__verify_ssl or not verify:
1797
            verify_ca_bundle = False
1798
        elif self.__ca_bundle:
1799
            verify_ca_bundle = self.__ca_bundle
1800
        else:
1801
            verify_ca_bundle = True
1802
1803
        try:
1804
            response = self.__session.\
1805
                request(method, url,
1806
                        params=params, data=data, files=files,
1807
                        timeout=timeout or self.__timeout,
1808
                        verify=verify_ca_bundle,
1809
                        proxies=self.__proxies)
1810
            # raise if anything went wrong
1811
            response.raise_for_status()
1812
        except requests.RequestException, exc:
1813
            if self.__logger:
1814
                self.__logger.error("Error contacting Malscape API: %s", exc)
1815
            # raise a wrapped exception
1816
            raise CommunicationError(error=exc)
1817
1818
        # Get the response content, as a unicode string if the response is
1819
        # textual, as a regular string otherwise.
1820
        content_type = response.headers.get("content-type")
1821
        if content_type and \
1822
                (content_type.startswith("application/json") or
1823
                 content_type.startswith("text/")):
1824
            data = response.text
1825
        else:
1826
            data = response.content
1827
1828
        return self._process_response_page(data, raw, requested_format)
1829
1830
1831
def init_shell(banner):
1832
    """Set up the iPython shell."""
1833
    try:
1834
        #this import can fail, that's why it's in a try block!
1835
        #pylint: disable=E0611
1836
        #pylint: disable=F0401
1837
        from IPython.frontend.terminal.embed import InteractiveShellEmbed #@UnresolvedImport
1838
        #pylint: enable=E0611
1839
        #pylint: enable=F0401
1840
        shell = InteractiveShellEmbed(banner1=banner)
1841
    except ImportError: # iPython < 0.11
1842
        # iPython <0.11 does have a Shell member
1843
        shell = IPython.Shell.IPShellEmbed() #pylint: disable=E1101
1844
        shell.set_banner(banner)
1845
1846
    return shell
1847
1848
1849
BANNER = """
1850
--------------------------------------
1851
Lastline Analyst API shell
1852
--------------------------------------
1853
1854
The 'analysis' object is an AnalysisClient,
1855
which can be used to access the functionality
1856
of the lastline Analysis API.
1857
1858
This is an IPython shell, so you can take
1859
advantage of tab auto-completion and other
1860
convenient features of IPython.
1861
"""
1862
URL = "https://analysis.lastline.com"
1863
def main(argv):
1864
    parser = optparse.OptionParser(usage="""
1865
Run client for analysis api with the provided credentials
1866
1867
    %prog access_key api_token
1868
1869
""")
1870
    parser.add_option("-u", "--api-url", dest="api_url",
1871
        type="string", default=URL,
1872
        help="send API requests to this URL (debugging purposes)")
1873
1874
    (cmdline_options, args) = parser.parse_args(argv[1:])
1875
    if len(args) != 2:
1876
        parser.print_help()
1877
        return 1
1878
1879
    namespace = {}
1880
    namespace["analysis"] = AnalysisClient(cmdline_options.api_url,
1881
                                           key=args[0],
1882
                                           api_token=args[1])
1883
1884
    shell = init_shell(BANNER)
1885
    shell(local_ns=namespace, global_ns=namespace)
1886
1887
    return 0
1888
1889
1890
if __name__ == "__main__":
1891
    sys.exit(main(sys.argv))
1892