Passed
Push — master ( 688c39...a1688e )
by Randy
01:49
created

libs.analytics.Analytics.stop()   A

Complexity

Conditions 1

Size

Total Lines 7
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 3
dl 0
loc 7
rs 10
c 0
b 0
f 0
cc 1
nop 1
1
import ipaddress
2
import json
3
import sys
4
from configparser import ConfigParser
5
from hashlib import sha256
6
from threading import Thread, Lock
7
from urllib.parse import urlparse
8
9
import requests
10
import validators
11
from url_normalize import url_normalize
12
13
from .callback import WebServer
14
from .cron import Cron
15
from .data import Data
16
from .initialize import Initialize
17
from .survey import View, GoogleSafeBrowsing, PhishTank
18
from .tools import Tools
19
20
"""
21
    Copyright (c) 2020 Star Inc.(https://starinc.xyz)
22
23
    This Source Code Form is subject to the terms of the Mozilla Public
24
    License, v. 2.0. If a copy of the MPL was not distributed with this
25
    file, You can obtain one at http://mozilla.org/MPL/2.0/.
26
"""
27
28
29
class Analytics:
30
    # Loading Configs
31
    cfg = {}
32
    config = ConfigParser()
33
    config.read("config.ini")
34
35
    def __init__(self):
36
        Initialize(self)
37
        # Initialization
38
        self.data_control = Data(self)
39
        self.view_survey = View(self)
40
        self.cron_job = Cron(self)
41
        self.safe_browsing = GoogleSafeBrowsing(
42
            self.cfg["Google Safe Browsing"]["google_api_key"]
43
        )
44
        self.phishtank = PhishTank(
45
            self.cfg["PhishTank"]["username"],
46
            self.cfg["PhishTank"]["api_key"]
47
        )
48
        Tools.set_ready(False)
49
50
    def start(self, port: int = 2020):
51
        """
52
        Start web service
53
        :param port: integer of port to listen online
54
        :return:
55
        """
56
        try:
57
            server = WebServer(self)
58
            self.cron_job.start()
59
            while not Tools.check_ready():
60
                pass
61
            print(
62
                Tools.get_time(),
63
                "[Start] Listening WebServer on port {}".format(port)
64
            )
65
            server.listen(port)
66
        except KeyboardInterrupt:
67
            self.stop()
68
69
    def stop(self):
70
        """
71
        Shutdown web service
72
        :return:
73
        """
74
        self.cron_job.stop()
75
        sys.exit(0)
76
77
    async def server_response(self, message: str):
78
        """
79
        Check responses from web service
80
        :param message: string of JSON format
81
        :return: dict to response
82
        """
83
        try:
84
            req_res = json.loads(message)
85
        except json.decoder.JSONDecodeError:
86
            return {"status": 401}
87
        if req_res.get("version") is not None:
88
            try:
89
                return await self._server_response(req_res)
90
            except:
91
                error_report = Tools.error_report()
92
                Tools.logger(error_report)
93
                return {"status": 500}
94
        return {"status": 400}
95
96
    async def _server_response(self, data: dict):
97
        """
98
        Handle responses from web service
99
        :param data: dict from message decoded
100
        :return: dict to response
101
        """
102
        if data.get("version") < 1:
103
            return {
104
                "status": 505
105
            }
106
107
        if "url" in data and validators.url(data["url"]):
108
            return await self.analyze(data)
109
110
        return {
111
            "status": 401
112
        }
113
114
    async def analyze(self, data: dict):
115
        """
116
        Do analysis from URL sent by message with databases
117
        :param data: dict from message decoded
118
        :return: dict to response
119
        """
120
        url = url_normalize(data.get("url"))
121
        url_hash = sha256(url.encode("utf-8")).hexdigest()
122
123
        try:
124
            response = requests.get(url)
125
        except requests.exceptions.ConnectionError as e:
126
            return {
127
                "status": 403,
128
                "reason": str(e)
129
            }
130
131
        if response.status_code != 200:
132
            return {
133
                "status": 404,
134
                "http_code": response.status_code
135
            }
136
137
        if "text/html" not in response.headers["content-type"]:
138
            return {
139
                "status": 405
140
            }
141
142
        url = response.url
143
144
        host = urlparse(url).hostname if urlparse(url).hostname != "localhost" else "127.0.0.1"
145
        if (validators.ipv4(host) or validators.ipv6(host)) and ipaddress.ip_address(host).is_private:
146
            return {
147
                "status": 403,
148
                "reason": "forbidden"
149
            }
150
151
        cache = self.data_control.find_result_cache_by_url_hash(url_hash)
152
153
        if cache is not None:
154
            score = cache
155
156
        elif self.data_control.check_trustlist(url):
157
            score = 1
158
159
        elif self.data_control.check_trust_domain(host):
160
            score = 1
161
162
        elif self.data_control.check_blacklist(url):
163
            score = 0
164
165
        elif self.data_control.check_warnlist(url):
166
            score = 0.5
167
168
        elif self.safe_browsing.lookup([url]):
169
            score = 0
170
            self.data_control.mark_as_blacklist(url)
171
172
        else:
173
            score = await self._deep_analyze(data, url)
174
175
        if cache is None:
176
            self.data_control.upload_result_cache(url_hash, score)
177
178
        return {
179
            "status": 200,
180
            "url": url,
181
            "trust_score": score
182
        }
183
184
    async def _deep_analyze(self, data: dict, url: str):
185
        """
186
        Analyze URL with PageView
187
        :param data: dict from message decoded
188
        :param url: URL that latest get via `requests`
189
        :return: float of the-trust-score between 0 to 1
190
        """
191
        if "type" in data:
192
            target_type = data.get("type")
193
        else:
194
            target_type = 0
195
196
        origin_urls = []
197
        async for origin_url in self.view_survey.analyze(target_type, url):
198
            if origin_url:
199
                origin_urls.append(origin_url)
200
201
        if origin_urls:
202
            origin_urls_json = json.dumps(origin_urls)
203
            self.data_control.mark_as_warnlist(url, origin_urls_json)
204
            return 0.5
205
        return 1
206
207
    async def gen_sample(self):
208
        """
209
        Generate PageView samples with trustlist
210
        :return:
211
        """
212
        await self.view_survey.generate()
213
214
    def update_blacklist_from_phishtank(self):
215
        """
216
        Update database for blacklist from PhishTank
217
        :return:
218
        """
219
        try:
220
            blacklist = self.phishtank.get_database()
221
        except OSError:
222
            print(Tools.get_time(), "[Notice] PhishTank forbidden temporary.")
223
            return
224
225
        thread = None
226
        lock = Lock()
227
228
        def _upload(data):
229
            """
230
            Child function, to upload data to database
231
            :param data: dict
232
            :return:
233
            """
234
            for target in data:
235
                lock.acquire()
0 ignored issues
show
introduced by
The variable lock does not seem to be defined for all execution paths.
Loading history...
236
                if not self.data_control.check_blacklist(target.get("url")):
237
                    self.data_control.mark_as_blacklist(target.get("url"))
238
                lock.release()
239
240
        for part in Tools.lists_separate(blacklist, 100):
241
            thread = Thread(
242
                target=_upload,
243
                args=(
244
                    part,
245
                )
246
            )
247
            thread.start()
248
249
        if thread:
250
            thread.join()
251