1 | #!/usr/bin/python |
||
2 | # coding=utf-8 |
||
3 | |||
4 | import requests |
||
5 | import httplib |
||
6 | import time |
||
7 | import sys |
||
8 | |||
9 | from system.logging import Logger |
||
10 | from utils import Utils |
||
11 | # |
||
12 | # |
||
13 | # Incident Statuses |
||
14 | # |
||
15 | # 0 = Scheduled |
||
16 | # 1 = Investigating |
||
17 | # 2 = Identified |
||
18 | # 3 = Watching |
||
19 | # 4 = Fixed |
||
20 | # |
||
21 | # Component Statuses |
||
22 | # |
||
23 | # 1 = Operational |
||
24 | # 2 = Performance Issues |
||
25 | # 3 = Partial Outage |
||
26 | # 4 = Major Outage |
||
27 | |||
28 | |||
29 | |||
30 | class Cachet(object): |
||
31 | httpErrors = { |
||
32 | # Cloudflare Errors |
||
33 | 520: "Web server is returning an unknown error (Cloudflare)", |
||
34 | 521: "Web server is down (Cloudflare)", |
||
35 | 522: "Connection timed out (Cloudflare)", |
||
36 | 523: "Origin is unreachable (Cloudflare)", |
||
37 | 524: "A timeout occurred (Cloudflare)", |
||
38 | 525: "SSL handshake failed (Cloudflare)", |
||
39 | 526: "Invalid SSL certificate (Cloudflare)", |
||
40 | # Nginx Errors |
||
41 | 444: "No Response (Nginx)", |
||
42 | 494: "Request Header Too Large (Nginx)", |
||
43 | 495: "Cert Error (Nginx)", |
||
44 | 496: "No Cert (Nginx)", |
||
45 | 497: "HTTP to HTTPS (Nginx)", |
||
46 | 499: "Client Closed Request (Nginx)", |
||
47 | # Other |
||
48 | # 1xx |
||
49 | 102: "Server has received and is processing the request", |
||
50 | 103: "resume aborted PUT or POST requests", |
||
51 | 122: "URI is longer than a maximum of 2083 characters", |
||
52 | # 2xx |
||
53 | 207: "XML return with possible multiple seperate responses.", |
||
54 | 208: "The results are previously returned.", |
||
55 | 226: "The request has been fulfilled amd response is in instance manipulations.", |
||
56 | # 3xx |
||
57 | 308: "Please connect again to the different URL using the same method.", |
||
58 | # 4xx |
||
59 | 418: "I'm a teapot.", |
||
60 | 420: "Method Failure", |
||
61 | 421: "Enhance Your Calm", |
||
62 | 422: "Unprocessable Entity", |
||
63 | 423: "Locked", |
||
64 | 424: "Failed Dependency", |
||
65 | 426: "Upgrade Required", |
||
66 | 428: "Precondition Required", |
||
67 | 429: "Too Many Requests", |
||
68 | 431: "Request Header Fields Too Large", |
||
69 | 440: "Login Timeout (Microsoft)", |
||
70 | 449: "Retry With (Microsoft)", |
||
71 | 450: "Blocked by Windows Parental Controls", |
||
72 | 451: "Unavailable For Legal Reasons", |
||
73 | # 5xx |
||
74 | 506: "Variant Also Negotiates (RFC 2295)", |
||
75 | 507: "Insufficient Storage (WebDAV; RFC 4918)", |
||
76 | 508: "Loop Detected (WebDAV; RFC 5842)", |
||
77 | 509: "Bandwidth Limit Exceeded (Apache bw/limited extension)", |
||
78 | 510: "Not Extended (RFC 2774)", |
||
79 | 511: "Network Authentication Required" |
||
80 | |||
81 | } |
||
82 | |||
83 | def __init__(self): |
||
84 | self.logs = Logger() |
||
85 | self.utils = Utils() |
||
86 | self.config = self.utils.readConfig() |
||
87 | self.base_url = self.config['api_url'] |
||
88 | self.api_url = '%s/api/v1' % self.config['api_url'] |
||
89 | self.api_token = self.config['api_token'] |
||
90 | self.maxRetries = self.config['retries'] |
||
91 | |||
92 | try: |
||
93 | if self.checkInitialPing() == 200: |
||
94 | self.checkSites() |
||
95 | else: |
||
96 | exit() |
||
97 | except Exception as e: |
||
98 | self.logs.error(e) |
||
99 | exit() |
||
100 | |||
101 | def checkSites(self): |
||
102 | # Count how many sites to monitor |
||
103 | monitor_count = len(self.config['monitoring']) |
||
104 | x = 0 |
||
105 | |||
106 | # Loop through sites to monitor |
||
107 | while x < monitor_count: |
||
108 | |||
109 | isEnabled = self.config['monitoring'][x]['enabled'] |
||
110 | status_codes = self.config['monitoring'][x]['expected_status_code'] |
||
111 | url = self.config['monitoring'][x]['url'] |
||
112 | request_method = self.config['monitoring'][x]['method'] |
||
113 | c_id = self.config['monitoring'][x]['component_id'] |
||
114 | localtime = time.asctime(time.localtime(time.time())) |
||
115 | current_status = self.utils.getComponentsByID(c_id).json()['data']['status'] |
||
116 | incident_id = self.checkForIncident(c_id) |
||
117 | check_timeout = self.config['monitoring'][x]['timeout'] |
||
118 | |||
119 | try: |
||
120 | if isEnabled: |
||
121 | if request_method.lower() == "get": |
||
122 | r = requests.get(url, verify=True, timeout=check_timeout) |
||
123 | # self.utils.postMetricsPointsByID(1, r.elapsed.total_seconds() * 1000) |
||
124 | View Code Duplication | if r.status_code not in status_codes and r.status_code not in self.httpErrors: |
|
125 | error_code = '%s check **failed** - %s \n\n`%s %s HTTP StatusError: %s`' % ( |
||
126 | url, localtime, request_method, url, httplib.responses[r.status_code]) |
||
127 | c_status = 4 |
||
128 | if not incident_id: |
||
129 | self.utils.postIncidents('%s: HTTP Status Error' % url, error_code, 1, 1, |
||
130 | component_id=c_id, component_status=c_status) |
||
131 | if current_status is not 4: |
||
132 | self.utils.putComponentsByID(c_id, status=c_status) |
||
133 | self.logs.warn("%s" % error_code.replace('\n', '').replace('`', '')) |
||
134 | elif r.status_code not in status_codes and r.status_code in self.httpErrors: |
||
135 | error_code = '%s check **failed** - %s \n\n`%s %s HTTP Status Error: %s`' % ( |
||
136 | url, localtime, request_method, url, self.httpErrors[r.status_code]) |
||
137 | c_status = 4 |
||
138 | if not incident_id: |
||
139 | self.utils.postIncidents('%s: HTTP Status Error' % url, error_code, 1, 1, |
||
140 | component_id=c_id, component_status=c_status) |
||
141 | if current_status is not 4: |
||
142 | self.utils.putComponentsByID(c_id, status=c_status) |
||
143 | self.logs.warn("%s" % error_code.replace('\n', '').replace('`', '')) |
||
144 | View Code Duplication | elif request_method.lower() == "post": |
|
0 ignored issues
–
show
Duplication
introduced
by
![]() |
|||
145 | r = requests.get(url, verify=True, timeout=check_timeout) |
||
146 | if r.status_code not in status_codes and r.status_code not in self.httpErrors: |
||
147 | error_code = '%s check **failed** - %s \n\n`%s %s HTTP Status Error: %s`' % ( |
||
148 | url, localtime, request_method, url, httplib.responses[r.status_code]) |
||
149 | c_status = 4 |
||
150 | if not incident_id: |
||
151 | self.utils.postIncidents('%s: HTTP Status Error' % url, error_code, 1, 1, |
||
152 | component_id=c_id, component_status=c_status) |
||
153 | if current_status is not 4: |
||
154 | self.utils.putComponentsByID(c_id, status=c_status) |
||
155 | self.logs.warn("%s" % error_code.replace('\n', '').replace('`', '')) |
||
156 | elif r.status_code not in status_codes and r.status_code in self.httpErrors: |
||
157 | error_code = '%s check **failed** - %s \n\n`%s %s HTTP Status Error: %s`' % ( |
||
158 | url, localtime, request_method, url, self.httpErrors[r.status_code]) |
||
159 | c_status = 4 |
||
160 | if not incident_id: |
||
161 | self.utils.postIncidents('%s: HTTP Status Error' % url, error_code, 1, 1, |
||
162 | component_id=c_id, component_status=c_status) |
||
163 | if current_status is not 4: |
||
164 | self.utils.putComponentsByID(c_id, status=c_status) |
||
165 | self.logs.warn("%s" % error_code.replace('\n', '').replace('`', '')) |
||
166 | except requests.exceptions.HTTPError as e: |
||
167 | error_code = '%s check **failed** - %s \n\n`%s %s HTTP Error: %s`' % ( |
||
168 | url, localtime, request_method, url, e) |
||
169 | c_status = 4 |
||
170 | if not incident_id: |
||
171 | self.utils.postIncidents('%s: HTTP Error' % url, error_code, 1, 1, component_id=c_id, |
||
172 | component_status=c_status) |
||
173 | if current_status is not 4: |
||
174 | self.utils.putComponentsByID(c_id, status=c_status) |
||
175 | self.logs.warn(error_code.replace('\n', '').replace('`', '')) |
||
176 | except requests.exceptions.SSLError as e: |
||
177 | error_code = '%s check **failed** - %s \n\n`%s %s SSL Error: %s`' % ( |
||
178 | url, localtime, request_method, url, e) |
||
179 | c_status = 4 |
||
180 | if not incident_id: |
||
181 | self.utils.postIncidents('%s: SSL Error' % url, error_code, 1, 1, component_id=c_id, |
||
182 | component_status=c_status) |
||
183 | if current_status is not 4: |
||
184 | self.utils.putComponentsByID(c_id, status=c_status) |
||
185 | self.logs.warn(error_code.replace('\n', '').replace('`', '')) |
||
186 | except requests.exceptions.ConnectionError as e: |
||
187 | error_code = '%s check **failed** - %s \n\n`%s %s Connection Error: %s`' % ( |
||
188 | url, localtime, request_method, url, e) |
||
189 | c_status = 4 |
||
190 | if not incident_id: |
||
191 | self.utils.postIncidents('%s: Connection Error' % url, error_code, 1, 1, component_id=c_id, |
||
192 | component_status=c_status) |
||
193 | if current_status is not 4: |
||
194 | self.utils.putComponentsByID(c_id, status=c_status) |
||
195 | self.logs.warn(error_code.replace('\n', '').replace('`', '')) |
||
196 | except requests.exceptions.Timeout as e: |
||
197 | error_code = '%s check **failed** - %s \n\n`%s %s Request Timeout: %s`' % ( |
||
198 | url, localtime, request_method, url, e) |
||
199 | c_status = 2 |
||
200 | if not incident_id: |
||
201 | self.utils.postIncidents('%s: Request Timeout' % url, error_code, 1, 1, component_id=c_id, |
||
202 | component_status=c_status) |
||
203 | if current_status is not 4: |
||
204 | self.utils.putComponentsByID(c_id, status=c_status) |
||
205 | self.logs.warn(error_code.replace('\n', '').replace('`', '')) |
||
206 | except requests.exceptions.TooManyRedirects as e: |
||
207 | error_code = '%s check **failed** - %s \n\n`%s %s Too Many Redirects: %s`' % ( |
||
208 | url, localtime, request_method, url, e) |
||
209 | c_status = 4 |
||
210 | if not incident_id: |
||
211 | self.utils.postIncidents('%s: Too Many Redirects' % url, error_code, 1, 1, component_id=c_id, |
||
212 | component_status=c_status) |
||
213 | if current_status is not 4: |
||
214 | self.utils.putComponentsByID(c_id, status=c_status) |
||
215 | self.logs.warn(error_code.replace('\n', '').replace('`', '')) |
||
216 | except requests.exceptions.RetryError as e: |
||
217 | error_code = '%s check **failed** - %s \n\n`%s %s Retry Error: %s`' % ( |
||
218 | url, localtime, request_method, url, e) |
||
219 | c_status = 4 |
||
220 | if not incident_id: |
||
221 | self.utils.postIncidents('%s: Retry Error' % url, error_code, 1, 1, component_id=c_id, |
||
222 | component_status=c_status) |
||
223 | if current_status is not 4: |
||
224 | self.utils.putComponentsByID(c_id, status=c_status) |
||
225 | self.logs.warn(error_code.replace('\n', '').replace('`', '')) |
||
226 | except httplib.BadStatusLine as e: |
||
227 | self.logs.error("%s \nCould not fetch %s" % (e, url)) |
||
228 | except Exception as e: |
||
229 | error_code = '%s check **failed** - %s \n\n`%s %s Unexpected Error: %s`' % ( |
||
230 | url, localtime, request_method, url, e) |
||
231 | c_status = 4 |
||
232 | if not incident_id: |
||
233 | self.utils.postIncidents('%s: Unexpected Error' % url, error_code, 1, 1, component_id=c_id, |
||
234 | component_status=c_status) |
||
235 | if current_status is not 4: |
||
236 | self.utils.putComponentsByID(c_id, status=c_status) |
||
237 | self.logs.error(error_code.replace('\n', '').replace('`', '')) |
||
238 | else: |
||
239 | if r.status_code in status_codes: |
||
240 | if current_status is not 1 and not incident_id: |
||
241 | self.utils.putComponentsByID(c_id, status=1) |
||
242 | self.logs.info("Issue with %s has been resolved" % url) |
||
243 | elif current_status is not 1 and incident_id: |
||
244 | incident_description = "Resolved at %s\n\n***\n\n%s" % ( |
||
245 | localtime, self.getIncidentInfo(incident_id)) |
||
246 | self.utils.putIncidentsByID(incident_id, message=incident_description, status=4, |
||
247 | component_id=c_id, component_status=1) |
||
248 | else: |
||
249 | self.logs.info("%s no issues found" % url) |
||
250 | |||
251 | x += 1 |
||
252 | self.logs.info("############################") |
||
253 | |||
254 | def checkForIncident(self, component_id): |
||
255 | current_incidents = self.utils.getIncidents().json() |
||
256 | incidents = len(current_incidents['data']) |
||
257 | x = 0 |
||
258 | |||
259 | while x < incidents: |
||
260 | incident_id = current_incidents['data'][x]['id'] |
||
261 | incident_component_id = current_incidents['data'][x]['component_id'] |
||
262 | incident_status = current_incidents['data'][x]['status'] |
||
263 | |||
264 | if component_id == incident_component_id and incident_status is not 4: |
||
265 | return incident_id |
||
266 | x += 1 |
||
267 | |||
268 | def getIncidentInfo(self, i_id): |
||
269 | incident = self.utils.getIncidentsByID(i_id).json() |
||
270 | i_description = incident['data']['message'] |
||
271 | return i_description |
||
272 | |||
273 | def checkInitialPing(self): |
||
274 | rPing = requests.get(self.base_url, verify=True, timeout=10) |
||
275 | return rPing.status_code |
||
276 |