Completed
Push — master ( 0083cf...5b9113 )
by Elbert
41s
created

src/drivers/npm/driver.js (2 issues)

1
'use strict';
2
3
const Wappalyzer = require('./wappalyzer');
4
const request = require('request');
0 ignored issues
show
The constant request seems to be never used. Consider removing it.
Loading history...
5
const url = require('url');
6
const fs = require('fs');
7
const Browser = require('zombie');
8
9
const json = JSON.parse(fs.readFileSync(__dirname + '/apps.json'));
0 ignored issues
show
Consider using the path module for constructing paths since they are otherwise not cross-OS compatible.
Loading history...
10
11
const extensions = /^([^.]+$|\.(asp|aspx|cgi|htm|html|jsp|php)$)/;
12
13
class Driver {
14
  constructor(pageUrl, options) {
15
    this.options = Object.assign({}, {
16
			password: '',
17
			proxy: null,
18
			username: '',
19
      chunkSize: 5,
20
      debug: false,
21
      delay: 500,
22
      htmlMaxCols: 2000,
23
      htmlMaxRows: 3000,
24
      maxDepth: 3,
25
      maxUrls: 10,
26
      maxWait: 5000,
27
      recursive: false,
28
      userAgent: 'Mozilla/5.0 (compatible; Wappalyzer)',
29
    }, options || {});
30
31
    this.options.debug = Boolean(+this.options.debug);
32
    this.options.recursive = Boolean(+this.options.recursive);
33
    this.options.delay = this.options.recursive ? parseInt(this.options.delay, 10) : 0;
34
    this.options.maxDepth = parseInt(this.options.maxDepth, 10);
35
    this.options.maxUrls = parseInt(this.options.maxUrls, 10);
36
    this.options.maxWait = parseInt(this.options.maxWait, 10);
37
    this.options.htmlMaxCols = parseInt(this.options.htmlMaxCols, 10);
38
    this.options.htmlMaxRows = parseInt(this.options.htmlMaxRows, 10);
39
40
    this.origPageUrl = url.parse(pageUrl);
41
    this.analyzedPageUrls = [];
42
    this.apps = [];
43
    this.meta = {};
44
45
    this.wappalyzer = new Wappalyzer();
46
47
    this.wappalyzer.apps = json.apps;
48
    this.wappalyzer.categories = json.categories;
49
50
    this.wappalyzer.parseJsPatterns();
51
52
    this.wappalyzer.driver.log = (message, source, type) => this.log(message, source, type);
53
    this.wappalyzer.driver.displayApps = (detected, meta, context) => this.displayApps(detected, meta, context);
54
55
    process.on('uncaughtException', e => this.wappalyzer.log('Uncaught exception: ' + e.message, 'driver', 'error'));
56
  }
57
58
  analyze() {
59
    this.time = {
60
      start: new Date().getTime(),
61
      last: new Date().getTime(),
62
    }
63
64
    return this.crawl(this.origPageUrl);
65
  }
66
67
  log(message, source, type) {
68
    this.options.debug && console.log('[wappalyzer ' + type + ']', '[' + source + ']', message);
69
  }
70
71
  displayApps(detected, meta) {
72
    this.meta = meta;
73
74
    Object.keys(detected).forEach(appName => {
75
      const app = detected[appName];
76
77
      var categories = [];
78
79
      app.props.cats.forEach(id => {
80
        var category = {};
81
82
        category[id] = json.categories[id].name;
83
84
        categories.push(category)
85
      });
86
87
      if ( !this.apps.some(detectedApp => detectedApp.name === app.name) ) {
88
        this.apps.push({
89
          name: app.name,
90
          confidence: app.confidenceTotal.toString(),
91
          version: app.version,
92
          icon: app.props.icon || 'default.svg',
93
          website: app.props.website,
94
          categories
95
        });
96
      }
97
    });
98
  }
99
100
  fetch(pageUrl, index, depth) {
101
    // Return when the URL is a duplicate or maxUrls has been reached
102
    if ( this.analyzedPageUrls.indexOf(pageUrl.href) !== -1 || this.analyzedPageUrls.length >= this.options.maxUrls ) {
103
      return Promise.resolve();
104
    }
105
106
    this.analyzedPageUrls.push(pageUrl.href);
107
108
    const timerScope = {
109
      last: new Date().getTime()
110
    };
111
112
    this.timer('fetch; url: ' + pageUrl.href + '; depth: ' + depth + '; delay: ' + ( this.options.delay * index ) + 'ms', timerScope);
113
114
    return new Promise(resolve => this.sleep(this.options.delay * index).then(() => this.visit(pageUrl, timerScope, resolve)));
115
  }
116
117
  visit(pageUrl, timerScope, resolve) {
118
    const browser = new Browser({
119
			proxy: this.options.proxy,
120
      silent: true,
121
      strictSSL: false,
122
      userAgent: this.options.userAgent,
123
      waitDuration: this.options.maxWait,
124
    });
125
126
		browser.on('authenticate', auth => {
127
			auth.username = this.options.username;
128
			auth.password = this.options.password;
129
		});
130
131
    this.timer('browser.visit start; url: ' + pageUrl.href, timerScope);
132
133
    browser.visit(pageUrl.href, () => {
134
      this.timer('browser.visit end; url: ' + pageUrl.href, timerScope);
135
136
      if ( !this.responseOk(browser, pageUrl) ) {
137
        return resolve();
138
      }
139
140
      const headers = this.getHeaders(browser);
141
      const html = this.getHtml(browser);
142
      const scripts = this.getScripts(browser);
143
      const js = this.getJs(browser);
144
      const cookies = this.getCookies(browser);
145
146
      this.wappalyzer.analyze(pageUrl, {
147
        headers,
148
        html,
149
        scripts,
150
        js,
151
        cookies,
152
      })
153
        .then(() => {
154
          const links = Array.prototype.reduce.call(
155
            browser.document.getElementsByTagName('a'), (results, link) => {
156
              if ( link.protocol.match(/https?:/) && link.hostname === this.origPageUrl.hostname && extensions.test(link.pathname) ) {
157
                link.hash = '';
158
159
                results.push(url.parse(link.href));
160
              }
161
162
              return results;
163
            }, []
164
          );
165
166
          return resolve(links);
167
        });
168
    });
169
  }
170
171
  responseOk(browser, pageUrl) {
172
    // Validate response
173
    const resource = browser.resources.length ? browser.resources.filter(resource => resource.response).shift() : null;
174
175
    if ( !resource ) {
176
      this.wappalyzer.log('No response from server; url: ' + pageUrl.href, 'driver', 'error');
177
178
      return false;
179
    }
180
181
    if ( resource.response.status !== 200 ) {
182
      this.wappalyzer.log('Response was not OK; status: ' + resource.response.status + ' ' + resource.response.statusText + '; url: ' + pageUrl.href, 'driver', 'error');
183
184
      return false;
185
    }
186
187
    const headers = this.getHeaders(browser);
188
189
    // Validate content type
190
    const contentType = headers.hasOwnProperty('content-type') ? headers['content-type'].shift() : null;
191
192
    if ( !contentType || !/\btext\/html\b/.test(contentType) ) {
193
      this.wappalyzer.log('Skipping; url: ' + pageUrl.href + '; content type: ' + contentType, 'driver');
194
195
      this.analyzedPageUrls.splice(this.analyzedPageUrls.indexOf(pageUrl.href), 1);
196
197
      return false;
198
    }
199
200
    // Validate document
201
    if ( !browser.document || !browser.document.documentElement ) {
202
      this.wappalyzer.log('No HTML document; url: ' + pageUrl.href, 'driver', 'error');
203
204
      return false;
205
    }
206
207
    return true;
208
  }
209
210
  getHeaders(browser) {
211
    const headers = {};
212
213
    const resource = browser.resources.length ? browser.resources.filter(resource => resource.response).shift() : null;
214
215
    if ( resource ) {
216
      resource.response.headers._headers.forEach(header => {
217
        if ( !headers[header[0]] ){
218
          headers[header[0]] = [];
219
        }
220
221
        headers[header[0]].push(header[1]);
222
      });
223
    }
224
225
    return headers;
226
  }
227
228
  getHtml(browser) {
229
    let html = '';
230
231
    try {
232
      html = browser.html()
233
        .split('\n')
234
        .slice(0, this.options.htmlMaxRows / 2).concat(html.slice(html.length - this.options.htmlMaxRows / 2))
235
        .map(line => line.substring(0, this.options.htmlMaxCols))
236
        .join('\n');
237
    } catch ( error ) {
238
      this.wappalyzer.log(error.message, 'browser', 'error');
239
    }
240
241
    return html;
242
  }
243
244
  getScripts(browser) {
245
    if ( !browser.document || !browser.document.scripts ) {
246
      return [];
247
    }
248
249
    const scripts = Array.prototype.slice
250
      .apply(browser.document.scripts)
251
      .filter(script => script.src)
252
      .map(script => script.src);
253
254
    return scripts;
255
  }
256
257
  getJs(browser) {
258
    const patterns = this.wappalyzer.jsPatterns;
259
    const js = {};
260
261
    Object.keys(patterns).forEach(appName => {
262
      js[appName] = {};
263
264
      Object.keys(patterns[appName]).forEach(chain => {
265
        js[appName][chain] = {};
266
267
        patterns[appName][chain].forEach((pattern, index) => {
268
          const properties = chain.split('.');
269
270
          let value = properties.reduce((parent, property) => {
271
            return parent && parent.hasOwnProperty(property) ? parent[property] : null;
272
          }, browser.window);
273
274
          value = typeof value === 'string' || typeof value === 'number' ? value : !!value;
275
276
          if ( value ) {
277
            js[appName][chain][index] = value;
278
          }
279
        });
280
      });
281
    });
282
283
    return js;
284
  }
285
286
  getCookies(browser) {
287
    const cookies = [];
288
289
    if ( browser.cookies ) {
290
      browser.cookies.forEach(cookie => cookies.push({
291
        name: cookie.key,
292
        value: cookie.value,
293
        domain: cookie.domain,
294
        path: cookie.path,
295
      }));
296
    }
297
298
    return cookies;
299
  }
300
301
  crawl(pageUrl, index = 1, depth = 1) {
302
    pageUrl.canonical = pageUrl.protocol + '//' + pageUrl.host + pageUrl.pathname;
303
304
    return new Promise(resolve => {
305
      this.fetch(pageUrl, index, depth)
306
        .catch(() => {})
307
        .then(links => {
308
          if ( links && this.options.recursive && depth < this.options.maxDepth ) {
309
            return this.chunk(links.slice(0, this.options.maxUrls), depth + 1);
310
          } else {
311
            return Promise.resolve();
312
          }
313
        })
314
        .then(() => {
315
          resolve({
316
            urls: this.analyzedPageUrls,
317
            applications: this.apps,
318
            meta: this.meta
319
          });
320
        });
321
    });
322
  }
323
324
  chunk(links, depth, chunk = 0) {
325
    if ( links.length === 0 ) {
326
      return Promise.resolve();
327
    }
328
329
    const chunked = links.splice(0, this.options.chunkSize);
330
331
    return new Promise(resolve => {
332
      Promise.all(chunked.map((link, index) => this.crawl(link, index, depth)))
333
        .then(() => this.chunk(links, depth, chunk + 1))
334
        .then(() => resolve());
335
    });
336
  }
337
338
  sleep(ms) {
339
    return ms ? new Promise(resolve => setTimeout(resolve, ms)) : Promise.resolve();
340
  }
341
342
  timer(message, scope) {
343
    const time = new Date().getTime();
344
    const sinceStart = ( Math.round(( time - this.time.start ) / 10) / 100) + 's';
345
    const sinceLast = ( Math.round(( time - scope.last ) / 10) / 100) + 's';
346
347
    this.wappalyzer.log('[timer] ' + message + '; lapsed: ' + sinceLast + ' / ' + sinceStart, 'driver');
348
349
    scope.last = time;
350
  }
351
};
352
353
module.exports = Driver;
354