1 | 'use strict'; |
||
2 | |||
3 | const Wappalyzer = require('./wappalyzer'); |
||
4 | const request = require('request'); |
||
0 ignored issues
–
show
Unused Code
introduced
by
Loading history...
|
|||
5 | const url = require('url'); |
||
6 | const fs = require('fs'); |
||
7 | const Browser = require('zombie'); |
||
8 | |||
9 | const json = JSON.parse(fs.readFileSync(__dirname + '/apps.json')); |
||
0 ignored issues
–
show
|
|||
10 | |||
11 | const extensions = /^([^.]+$|\.(asp|aspx|cgi|htm|html|jsp|php)$)/; |
||
12 | |||
13 | class Driver { |
||
14 | constructor(pageUrl, options) { |
||
15 | this.options = Object.assign({}, { |
||
16 | password: '', |
||
17 | proxy: null, |
||
18 | username: '', |
||
19 | chunkSize: 5, |
||
20 | debug: false, |
||
21 | delay: 500, |
||
22 | htmlMaxCols: 2000, |
||
23 | htmlMaxRows: 3000, |
||
24 | maxDepth: 3, |
||
25 | maxUrls: 10, |
||
26 | maxWait: 5000, |
||
27 | recursive: false, |
||
28 | userAgent: 'Mozilla/5.0 (compatible; Wappalyzer)', |
||
29 | }, options || {}); |
||
30 | |||
31 | this.options.debug = Boolean(+this.options.debug); |
||
32 | this.options.recursive = Boolean(+this.options.recursive); |
||
33 | this.options.delay = this.options.recursive ? parseInt(this.options.delay, 10) : 0; |
||
34 | this.options.maxDepth = parseInt(this.options.maxDepth, 10); |
||
35 | this.options.maxUrls = parseInt(this.options.maxUrls, 10); |
||
36 | this.options.maxWait = parseInt(this.options.maxWait, 10); |
||
37 | this.options.htmlMaxCols = parseInt(this.options.htmlMaxCols, 10); |
||
38 | this.options.htmlMaxRows = parseInt(this.options.htmlMaxRows, 10); |
||
39 | |||
40 | this.origPageUrl = url.parse(pageUrl); |
||
41 | this.analyzedPageUrls = []; |
||
42 | this.apps = []; |
||
43 | this.meta = {}; |
||
44 | |||
45 | this.wappalyzer = new Wappalyzer(); |
||
46 | |||
47 | this.wappalyzer.apps = json.apps; |
||
48 | this.wappalyzer.categories = json.categories; |
||
49 | |||
50 | this.wappalyzer.parseJsPatterns(); |
||
51 | |||
52 | this.wappalyzer.driver.log = (message, source, type) => this.log(message, source, type); |
||
53 | this.wappalyzer.driver.displayApps = (detected, meta, context) => this.displayApps(detected, meta, context); |
||
54 | |||
55 | process.on('uncaughtException', e => this.wappalyzer.log('Uncaught exception: ' + e.message, 'driver', 'error')); |
||
56 | } |
||
57 | |||
58 | analyze() { |
||
59 | this.time = { |
||
60 | start: new Date().getTime(), |
||
61 | last: new Date().getTime(), |
||
62 | } |
||
63 | |||
64 | return this.crawl(this.origPageUrl); |
||
65 | } |
||
66 | |||
67 | log(message, source, type) { |
||
68 | this.options.debug && console.log('[wappalyzer ' + type + ']', '[' + source + ']', message); |
||
69 | } |
||
70 | |||
71 | displayApps(detected, meta) { |
||
72 | this.meta = meta; |
||
73 | |||
74 | Object.keys(detected).forEach(appName => { |
||
75 | const app = detected[appName]; |
||
76 | |||
77 | var categories = []; |
||
78 | |||
79 | app.props.cats.forEach(id => { |
||
80 | var category = {}; |
||
81 | |||
82 | category[id] = json.categories[id].name; |
||
83 | |||
84 | categories.push(category) |
||
85 | }); |
||
86 | |||
87 | if ( !this.apps.some(detectedApp => detectedApp.name === app.name) ) { |
||
88 | this.apps.push({ |
||
89 | name: app.name, |
||
90 | confidence: app.confidenceTotal.toString(), |
||
91 | version: app.version, |
||
92 | icon: app.props.icon || 'default.svg', |
||
93 | website: app.props.website, |
||
94 | categories |
||
95 | }); |
||
96 | } |
||
97 | }); |
||
98 | } |
||
99 | |||
100 | fetch(pageUrl, index, depth) { |
||
101 | // Return when the URL is a duplicate or maxUrls has been reached |
||
102 | if ( this.analyzedPageUrls.indexOf(pageUrl.href) !== -1 || this.analyzedPageUrls.length >= this.options.maxUrls ) { |
||
103 | return Promise.resolve(); |
||
104 | } |
||
105 | |||
106 | this.analyzedPageUrls.push(pageUrl.href); |
||
107 | |||
108 | const timerScope = { |
||
109 | last: new Date().getTime() |
||
110 | }; |
||
111 | |||
112 | this.timer('fetch; url: ' + pageUrl.href + '; depth: ' + depth + '; delay: ' + ( this.options.delay * index ) + 'ms', timerScope); |
||
113 | |||
114 | return new Promise(resolve => this.sleep(this.options.delay * index).then(() => this.visit(pageUrl, timerScope, resolve))); |
||
115 | } |
||
116 | |||
117 | visit(pageUrl, timerScope, resolve) { |
||
118 | const browser = new Browser({ |
||
119 | proxy: this.options.proxy, |
||
120 | silent: true, |
||
121 | strictSSL: false, |
||
122 | userAgent: this.options.userAgent, |
||
123 | waitDuration: this.options.maxWait, |
||
124 | }); |
||
125 | |||
126 | browser.on('authenticate', auth => { |
||
127 | auth.username = this.options.username; |
||
128 | auth.password = this.options.password; |
||
129 | }); |
||
130 | |||
131 | this.timer('browser.visit start; url: ' + pageUrl.href, timerScope); |
||
132 | |||
133 | browser.visit(pageUrl.href, () => { |
||
134 | this.timer('browser.visit end; url: ' + pageUrl.href, timerScope); |
||
135 | |||
136 | if ( !this.responseOk(browser, pageUrl) ) { |
||
137 | return resolve(); |
||
138 | } |
||
139 | |||
140 | const headers = this.getHeaders(browser); |
||
141 | const html = this.getHtml(browser); |
||
142 | const scripts = this.getScripts(browser); |
||
143 | const js = this.getJs(browser); |
||
144 | const cookies = this.getCookies(browser); |
||
145 | |||
146 | this.wappalyzer.analyze(pageUrl, { |
||
147 | headers, |
||
148 | html, |
||
149 | scripts, |
||
150 | js, |
||
151 | cookies, |
||
152 | }) |
||
153 | .then(() => { |
||
154 | const links = Array.prototype.reduce.call( |
||
155 | browser.document.getElementsByTagName('a'), (results, link) => { |
||
156 | if ( link.protocol.match(/https?:/) && link.hostname === this.origPageUrl.hostname && extensions.test(link.pathname) ) { |
||
157 | link.hash = ''; |
||
158 | |||
159 | results.push(url.parse(link.href)); |
||
160 | } |
||
161 | |||
162 | return results; |
||
163 | }, [] |
||
164 | ); |
||
165 | |||
166 | return resolve(links); |
||
167 | }); |
||
168 | }); |
||
169 | } |
||
170 | |||
171 | responseOk(browser, pageUrl) { |
||
172 | // Validate response |
||
173 | const resource = browser.resources.length ? browser.resources.filter(resource => resource.response).shift() : null; |
||
174 | |||
175 | if ( !resource ) { |
||
176 | this.wappalyzer.log('No response from server; url: ' + pageUrl.href, 'driver', 'error'); |
||
177 | |||
178 | return false; |
||
179 | } |
||
180 | |||
181 | if ( resource.response.status !== 200 ) { |
||
182 | this.wappalyzer.log('Response was not OK; status: ' + resource.response.status + ' ' + resource.response.statusText + '; url: ' + pageUrl.href, 'driver', 'error'); |
||
183 | |||
184 | return false; |
||
185 | } |
||
186 | |||
187 | const headers = this.getHeaders(browser); |
||
188 | |||
189 | // Validate content type |
||
190 | const contentType = headers.hasOwnProperty('content-type') ? headers['content-type'].shift() : null; |
||
191 | |||
192 | if ( !contentType || !/\btext\/html\b/.test(contentType) ) { |
||
193 | this.wappalyzer.log('Skipping; url: ' + pageUrl.href + '; content type: ' + contentType, 'driver'); |
||
194 | |||
195 | this.analyzedPageUrls.splice(this.analyzedPageUrls.indexOf(pageUrl.href), 1); |
||
196 | |||
197 | return false; |
||
198 | } |
||
199 | |||
200 | // Validate document |
||
201 | if ( !browser.document || !browser.document.documentElement ) { |
||
202 | this.wappalyzer.log('No HTML document; url: ' + pageUrl.href, 'driver', 'error'); |
||
203 | |||
204 | return false; |
||
205 | } |
||
206 | |||
207 | return true; |
||
208 | } |
||
209 | |||
210 | getHeaders(browser) { |
||
211 | const headers = {}; |
||
212 | |||
213 | const resource = browser.resources.length ? browser.resources.filter(resource => resource.response).shift() : null; |
||
214 | |||
215 | if ( resource ) { |
||
216 | resource.response.headers._headers.forEach(header => { |
||
217 | if ( !headers[header[0]] ){ |
||
218 | headers[header[0]] = []; |
||
219 | } |
||
220 | |||
221 | headers[header[0]].push(header[1]); |
||
222 | }); |
||
223 | } |
||
224 | |||
225 | return headers; |
||
226 | } |
||
227 | |||
228 | getHtml(browser) { |
||
229 | let html = ''; |
||
230 | |||
231 | try { |
||
232 | html = browser.html() |
||
233 | .split('\n') |
||
234 | .slice(0, this.options.htmlMaxRows / 2).concat(html.slice(html.length - this.options.htmlMaxRows / 2)) |
||
235 | .map(line => line.substring(0, this.options.htmlMaxCols)) |
||
236 | .join('\n'); |
||
237 | } catch ( error ) { |
||
238 | this.wappalyzer.log(error.message, 'browser', 'error'); |
||
239 | } |
||
240 | |||
241 | return html; |
||
242 | } |
||
243 | |||
244 | getScripts(browser) { |
||
245 | if ( !browser.document || !browser.document.scripts ) { |
||
246 | return []; |
||
247 | } |
||
248 | |||
249 | const scripts = Array.prototype.slice |
||
250 | .apply(browser.document.scripts) |
||
251 | .filter(script => script.src) |
||
252 | .map(script => script.src); |
||
253 | |||
254 | return scripts; |
||
255 | } |
||
256 | |||
257 | getJs(browser) { |
||
258 | const patterns = this.wappalyzer.jsPatterns; |
||
259 | const js = {}; |
||
260 | |||
261 | Object.keys(patterns).forEach(appName => { |
||
262 | js[appName] = {}; |
||
263 | |||
264 | Object.keys(patterns[appName]).forEach(chain => { |
||
265 | js[appName][chain] = {}; |
||
266 | |||
267 | patterns[appName][chain].forEach((pattern, index) => { |
||
268 | const properties = chain.split('.'); |
||
269 | |||
270 | let value = properties.reduce((parent, property) => { |
||
271 | return parent && parent.hasOwnProperty(property) ? parent[property] : null; |
||
272 | }, browser.window); |
||
273 | |||
274 | value = typeof value === 'string' || typeof value === 'number' ? value : !!value; |
||
275 | |||
276 | if ( value ) { |
||
277 | js[appName][chain][index] = value; |
||
278 | } |
||
279 | }); |
||
280 | }); |
||
281 | }); |
||
282 | |||
283 | return js; |
||
284 | } |
||
285 | |||
286 | getCookies(browser) { |
||
287 | const cookies = []; |
||
288 | |||
289 | if ( browser.cookies ) { |
||
290 | browser.cookies.forEach(cookie => cookies.push({ |
||
291 | name: cookie.key, |
||
292 | value: cookie.value, |
||
293 | domain: cookie.domain, |
||
294 | path: cookie.path, |
||
295 | })); |
||
296 | } |
||
297 | |||
298 | return cookies; |
||
299 | } |
||
300 | |||
301 | crawl(pageUrl, index = 1, depth = 1) { |
||
302 | pageUrl.canonical = pageUrl.protocol + '//' + pageUrl.host + pageUrl.pathname; |
||
303 | |||
304 | return new Promise(resolve => { |
||
305 | this.fetch(pageUrl, index, depth) |
||
306 | .catch(() => {}) |
||
307 | .then(links => { |
||
308 | if ( links && this.options.recursive && depth < this.options.maxDepth ) { |
||
309 | return this.chunk(links.slice(0, this.options.maxUrls), depth + 1); |
||
310 | } else { |
||
311 | return Promise.resolve(); |
||
312 | } |
||
313 | }) |
||
314 | .then(() => { |
||
315 | resolve({ |
||
316 | urls: this.analyzedPageUrls, |
||
317 | applications: this.apps, |
||
318 | meta: this.meta |
||
319 | }); |
||
320 | }); |
||
321 | }); |
||
322 | } |
||
323 | |||
324 | chunk(links, depth, chunk = 0) { |
||
325 | if ( links.length === 0 ) { |
||
326 | return Promise.resolve(); |
||
327 | } |
||
328 | |||
329 | const chunked = links.splice(0, this.options.chunkSize); |
||
330 | |||
331 | return new Promise(resolve => { |
||
332 | Promise.all(chunked.map((link, index) => this.crawl(link, index, depth))) |
||
333 | .then(() => this.chunk(links, depth, chunk + 1)) |
||
334 | .then(() => resolve()); |
||
335 | }); |
||
336 | } |
||
337 | |||
338 | sleep(ms) { |
||
339 | return ms ? new Promise(resolve => setTimeout(resolve, ms)) : Promise.resolve(); |
||
340 | } |
||
341 | |||
342 | timer(message, scope) { |
||
343 | const time = new Date().getTime(); |
||
344 | const sinceStart = ( Math.round(( time - this.time.start ) / 10) / 100) + 's'; |
||
345 | const sinceLast = ( Math.round(( time - scope.last ) / 10) / 100) + 's'; |
||
346 | |||
347 | this.wappalyzer.log('[timer] ' + message + '; lapsed: ' + sinceLast + ' / ' + sinceStart, 'driver'); |
||
348 | |||
349 | scope.last = time; |
||
350 | } |
||
351 | }; |
||
352 | |||
353 | module.exports = Driver; |
||
354 |