Completed
Push — master ( 0083cf...5b9113 )
by Elbert
41s
created

src/wappalyzer.js (4 issues)

1
/**
2
 * Wappalyzer v5
3
 *
4
 * Created by Elbert Alias <[email protected]>
5
 *
6
 * License: GPLv3 http://www.gnu.org/licenses/gpl-3.0.txt
7
 */
8
9
'use strict';
10
11
const validation = {
12
  hostname: /(www.)?((.+?)\.(([a-z]{2,3}\.)?[a-z]{2,6}))$/,
13
  hostnameBlacklist: /((local|dev(elopment)?|stag(e|ing)?|test(ing)?|demo(shop)?|admin|google|cache)\.|\/admin|\.local)/
14
};
15
16
class Wappalyzer {
17
  constructor() {
18
    this.apps = {};
19
    this.categories = {};
20
    this.driver = {};
21
    this.jsPatterns = {};
22
    this.detected = {};
23
    this.hostnameCache = {};
24
    this.adCache = [];
25
26
    this.config = {
27
      websiteURL: 'https://www.wappalyzer.com/',
28
      twitterURL: 'https://twitter.com/Wappalyzer',
29
      githubURL: 'https://github.com/AliasIO/Wappalyzer',
30
    };
31
  }
32
33
  /**
34
   * Log messages to console
35
   */
36
  log(message, source, type) {
37
    this.driver.log(message, source || '', type || 'debug');
38
  }
39
40
  asyncForEach(iterable, iterator) {
41
    return Promise.all(( iterable || [] ).map(item => new Promise(resolve => setTimeout(() => resolve(iterator(item)), 1))));
42
  }
43
44
  analyze(url, data, context) {
45
    const startTime = new Date();
46
47
    const promises = [];
48
49
    var apps = {};
50
51
    if ( this.detected[url.canonical] === undefined ) {
52
      this.detected[url.canonical] = {};
53
    }
54
55
    // Additional information
56
    var language = null;
57
58
    if ( data.html ) {
59
      if ( typeof data.html !== 'string' ) {
60
        data.html = '';
61
      }
62
63
      const matches = data.html.match(/<html[^>]*[: ]lang="([a-z]{2}((-|_)[A-Z]{2})?)"/i);
64
65
      language = matches && matches.length ? matches[1] : null;
66
    }
67
68
    Object.keys(this.apps).forEach(appName => {
69
      apps[appName] = this.detected[url.canonical] && this.detected[url.canonical][appName] ? this.detected[url.canonical][appName] : new Application(appName, this.apps[appName]);
70
71
      var app = apps[appName];
72
73
      this.analyzeUrl(app, url);
74
75
      if ( data.html ) {
76
        promises.push(this.analyzeHtml(app, data.html));
77
        promises.push(this.analyzeMeta(app, data.html));
78
      }
79
80
      if ( data.scripts ) {
81
        promises.push(this.analyzeScripts(app, data.scripts));
82
      }
83
84
      if ( data.cookies ) {
85
        promises.push(this.analyzeCookies(app, data.cookies));
86
      }
87
88
      if ( data.headers ) {
89
        promises.push(this.analyzeHeaders(app, data.headers));
90
      }
91
    });
92
93
    if ( data.js ) {
94
      Object.keys(data.js).forEach(appName => {
95
        if (typeof data.js[appName] != 'function') {
96
          promises.push(this.analyzeJs(apps[appName], data.js[appName]));
97
        }
98
      });
99
    }
100
101
    return new Promise(resolve => {
102
      Promise.all(promises)
103
        .then(() => {
104
          Object.keys(apps).forEach(appName => {
105
            var app = apps[appName];
106
107
            if ( !app.detected || !app.getConfidence() ) {
108
              delete apps[app.name];
109
            }
110
          });
111
112
          this.resolveExcludes(apps);
113
          this.resolveImplies(apps, url.canonical);
114
115
          this.cacheDetectedApps(apps, url.canonical);
116
          this.trackDetectedApps(apps, url, language);
117
118
          this.log('Processing ' + Object.keys(data).join(', ') + ' took ' + (( new Date() - startTime ) / 1000).toFixed(2) + 's (' + url.hostname + ')', 'core');
119
120
          if ( Object.keys(apps).length ) {
121
            this.log('Identified ' + Object.keys(apps).join(', ') + ' (' + url.hostname + ')', 'core');
122
          }
123
124
          this.driver.displayApps(this.detected[url.canonical], { language }, context);
125
126
          resolve();
127
        });
128
    });
129
  }
130
131
  /**
132
   * Cache detected ads
133
   */
134
  cacheDetectedAds(ad) {
135
    this.adCache.push(ad);
136
  }
137
138
  /**
139
   *
140
   */
141
  robotsTxtAllows(url) {
142
    return new Promise((resolve, reject) => {
143
      var parsed = this.parseUrl(url);
144
145
      if ( parsed.protocol !== 'http:' && parsed.protocol !== 'https:' ) {
146
        return reject();
147
      }
148
149
      this.driver.getRobotsTxt(parsed.host, parsed.protocol === 'https:')
150
        .then(robotsTxt => {
151
          if ( robotsTxt.some(disallowedPath => parsed.pathname.indexOf(disallowedPath) === 0) ) {
152
            return reject();
153
          }
154
155
          return resolve();
156
        }, () => resolve());
157
    });
158
  };
159
160
  /**
161
   * Parse a URL
162
   */
163
  parseUrl(url) {
164
    var a = this.driver.document.createElement('a');
165
166
    a.href = url;
167
168
    a.canonical = a.protocol + '//' + a.host + a.pathname;
169
170
    return a;
171
  }
172
173
  /**
174
   *
175
   */
176
  parseRobotsTxt(robotsTxt) {
177
    var userAgent;
178
    var disallow = [];
179
180
    robotsTxt.split('\n').forEach(line => {
181
      var matches = /^User-agent:\s*(.+)$/i.exec(line);
182
183
      if ( matches ) {
184
        userAgent = matches[1].toLowerCase();
185
      } else {
186
        if ( userAgent === '*' || userAgent === 'wappalyzer' ) {
187
          matches = /^Disallow:\s*(.+)$/i.exec(line);
188
189
          if ( matches ) {
190
            disallow.push(matches[1]);
191
          }
192
        }
193
      }
194
    });
195
196
    return disallow;
197
  }
198
199
  /**
200
   *
201
   */
202
  ping() {
203
    if ( Object.keys(this.hostnameCache).length > 100 ) {
204
      this.driver.ping(this.hostnameCache);
205
206
      this.hostnameCache = {};
207
    }
208
209
    if ( this.adCache.length > 50 ) {
210
      this.driver.ping({}, this.adCache);
211
212
      this.adCache = [];
213
    }
214
  }
215
216
  /**
217
   * Enclose string in array
218
   */
219
  asArray(value) {
220
    return value instanceof Array ? value : [ value ];
221
  }
222
223
  /**
224
   * Parse apps.json patterns
225
   */
226
  parsePatterns(patterns) {
227
    if ( !patterns ) {
228
      return [];
229
    }
230
231
    var parsed = {};
232
233
    // Convert string to object containing array containing string
234
    if ( typeof patterns === 'string' || patterns instanceof Array ) {
235
      patterns = {
236
        main: this.asArray(patterns)
237
      };
238
    }
239
240
    Object.keys(patterns).forEach(key => {
241
      parsed[key] = [];
242
243
      this.asArray(patterns[key]).forEach(pattern => {
244
        var attrs = {};
245
246
        pattern.split('\\;').forEach((attr, i) => {
247
          if ( i ) {
248
            // Key value pairs
249
            attr = attr.split(':');
250
251
            if ( attr.length > 1 ) {
252
              attrs[attr.shift()] = attr.join(':');
253
            }
254
          } else {
255
            attrs.string = attr;
256
257
            try {
258
              attrs.regex = new RegExp(attr.replace('/', '\/'), 'i'); // Escape slashes in regular expression
259
            } catch (e) {
260
              attrs.regex = new RegExp();
261
262
              this.log(e + ': ' + attr, 'error', 'core');
263
            }
264
          }
265
        });
266
267
        parsed[key].push(attrs);
268
      });
269
    });
270
271
    // Convert back to array if the original pattern list was an array (or string)
272
    if ( 'main' in parsed ) {
273
      parsed = parsed.main;
274
    }
275
276
    return parsed;
277
  }
278
279
  /**
280
   * Parse JavaScript patterns
281
   */
282
  parseJsPatterns() {
283
    Object.keys(this.apps).forEach(appName => {
284
      if ( this.apps[appName].js ) {
285
        this.jsPatterns[appName] = this.parsePatterns(this.apps[appName].js);
286
      }
287
    });
288
  }
289
290
  resolveExcludes(apps) {
291
    var excludes = [];
292
293
    // Exclude app in detected apps only
294
    Object.keys(apps).forEach(appName => {
295
      var app = apps[appName];
296
297
      if ( app.props.excludes ) {
298
        this.asArray(app.props.excludes).forEach(excluded => {
299
          excludes.push(excluded);
300
        });
301
      }
302
    })
303
304
    // Remove excluded applications
305
    Object.keys(apps).forEach(appName => {
306
      if ( excludes.indexOf(appName) > -1 ) {
307
        delete apps[appName];
308
      }
309
    })
310
  }
311
312
  resolveImplies(apps, url) {
313
    var checkImplies = true;
314
315
    // Implied applications
316
    // Run several passes as implied apps may imply other apps
317
    while ( checkImplies ) {
318
      checkImplies = false;
319
320
      Object.keys(apps).forEach(appName => {
321
        var app = apps[appName];
322
323
        if ( app && app.props.implies ) {
324
          this.asArray(app.props.implies).forEach(implied => {
325
            implied = this.parsePatterns(implied)[0];
326
327
            if ( !this.apps[implied.string] ) {
328
              this.log('Implied application ' + implied.string + ' does not exist', 'core', 'warn');
329
330
              return;
331
            }
332
333
            if ( !( implied.string in apps ) ) {
334
              apps[implied.string] = this.detected[url] && this.detected[url][implied.string] ? this.detected[url][implied.string] : new Application(implied.string, this.apps[implied.string], true);
335
336
              checkImplies = true;
337
            }
338
339
            // Apply app confidence to implied app
340
            Object.keys(app.confidence).forEach(id => {
341
              apps[implied.string].confidence[id + ' implied by ' + appName] = app.confidence[id] * ( implied.confidence === undefined ? 1 : implied.confidence / 100 );
342
            });
343
          });
344
        }
345
      });
346
    }
347
  }
348
349
  /**
350
   * Cache detected applications
351
   */
352
  cacheDetectedApps(apps, url) {
353
    Object.keys(apps).forEach(appName => {
354
      var app = apps[appName];
355
356
      // Per URL
357
      this.detected[url][appName] = app;
358
359
      Object.keys(app.confidence).forEach(id => {
360
        this.detected[url][appName].confidence[id] = app.confidence[id];
361
      });
362
    })
363
364
    if ( this.driver.ping instanceof Function ) {
365
      this.ping();
366
    }
367
  }
368
369
  /**
370
   * Track detected applications
371
   */
372
  trackDetectedApps(apps, url, language) {
373
    if ( !( this.driver.ping instanceof Function ) ) {
374
      return;
375
    }
376
377
    const hostname = url.protocol + '//' + url.hostname;
378
379
    Object.keys(apps).forEach(appName => {
380
      const app = apps[appName];
381
382
      if ( this.detected[url.canonical][appName].getConfidence() >= 100 ) {
383
        if ( validation.hostname.test(url.hostname) && !validation.hostnameBlacklist.test(url.hostname) ) {
384
          if ( !( hostname in this.hostnameCache ) ) {
385
            this.hostnameCache[hostname] = {
386
              applications: {},
387
              meta: {}
388
            };
389
          }
390
391
          if ( !( appName in this.hostnameCache[hostname].applications ) ) {
392
            this.hostnameCache[hostname].applications[appName] = {
393
              hits: 0
394
            };
395
          }
396
397
          this.hostnameCache[hostname].applications[appName].hits ++;
398
399
          if ( apps[appName].version ) {
400
            this.hostnameCache[hostname].applications[appName].version = app.version;
401
          }
402
        }
403
      }
404
    });
405
406
    if ( hostname in this.hostnameCache ) {
407
      this.hostnameCache[hostname].meta['language'] = language;
408
    }
409
410
    this.ping();
411
  }
412
413
  /**
414
   * Analyze URL
415
   */
416
  analyzeUrl(app, url) {
417
    var patterns = this.parsePatterns(app.props.url);
418
419
    if ( !patterns.length ) {
420
      return Promise.resolve();
421
    }
422
423
    return this.asyncForEach(patterns, pattern => {
424
      if ( pattern.regex.test(url.canonical) ) {
425
        this.addDetected(app, pattern, 'url', url.canonical);
426
      }
427
    });
428
  }
429
430
  /**
431
   * Analyze HTML
432
   */
433
  analyzeHtml(app, html) {
434
    var patterns = this.parsePatterns(app.props.html);
435
436
    if ( !patterns.length ) {
437
      return Promise.resolve();
438
    }
439
440
    return this.asyncForEach(patterns, pattern => {
441
      if ( pattern.regex.test(html) ) {
442
        this.addDetected(app, pattern, 'html', html);
443
      }
444
    });
445
  }
446
447
  /**
448
   * Analyze script tag
449
   */
450
  analyzeScripts(app, scripts) {
451
    var patterns = this.parsePatterns(app.props.script);
452
453
    if ( !patterns.length ) {
454
      return Promise.resolve();
455
    }
456
457
    return this.asyncForEach(patterns, pattern => {
458
      var match;
0 ignored issues
show
The variable match seems to be never used. Consider removing it.
Loading history...
459
460
      scripts.forEach(uri => {
461
        if ( pattern.regex.test(uri) ) {
462
          this.addDetected(app, pattern, 'script', uri);
463
        }
464
      });
465
    });
466
  }
467
468
  /**
469
   * Analyze meta tag
470
   */
471
  analyzeMeta(app, html) {
472
    const regex = /<meta[^>]+>/ig;
473
    const patterns = this.parsePatterns(app.props.meta);
474
    const promises = [];
475
476
    var matches = [];
0 ignored issues
show
The assignment to variable matches seems to be never used. Consider removing it.
Loading history...
477
478
    while ( patterns && ( matches = regex.exec(html) ) ) {
479
      for ( var meta in patterns ) {
0 ignored issues
show
A for in loop automatically includes the property of any prototype object, consider checking the key using hasOwnProperty.

When iterating over the keys of an object, this includes not only the keys of the object, but also keys contained in the prototype of that object. It is generally a best practice to check for these keys specifically:

var someObject;
for (var key in someObject) {
    if ( ! someObject.hasOwnProperty(key)) {
        continue; // Skip keys from the prototype.
    }

    doSomethingWith(key);
}
Loading history...
480
        const r = new RegExp('(?:name|property)=["\']' + meta + '["\']', 'i');
481
482
        if ( r.test(matches[0]) ) {
483
          let content = matches[0].match(/content=("|')([^"']+)("|')/i);
484
485
          promises.push(this.asyncForEach(patterns[meta], pattern => {
486
            if ( content && content.length === 4 && pattern.regex.test(content[2]) ) {
487
              this.addDetected(app, pattern, 'meta', content[2], meta);
488
            }
489
          }));
490
        }
491
      }
492
    }
493
494
    return promises ? Promise.all(promises) : Promise.resolve();
495
  }
496
497
  /**
498
   * Analyze response headers
499
   */
500
  analyzeHeaders(app, headers) {
501
    const patterns = this.parsePatterns(app.props.headers);
502
    const promises = [];
503
504
    Object.keys(patterns).forEach(headerName => {
505
      if (typeof patterns[headerName] != 'function') {
506
        promises.push(this.asyncForEach(patterns[headerName], pattern => {
507
          headerName = headerName.toLowerCase();
508
509
          if ( headerName in headers ) {
510
            headers[headerName].forEach(headerValue => {
511
              if ( pattern.regex.test(headerValue) ) {
512
                this.addDetected(app, pattern, 'headers', headerValue, headerName);
513
              }
514
            });
515
          }
516
        }));
517
      }
518
    });
519
520
    return promises ? Promise.all(promises) : Promise.resolve();
521
  }
522
523
  /**
524
   * Analyze cookies
525
   */
526
  analyzeCookies(app, cookies) {
527
    const patterns = this.parsePatterns(app.props.cookies);
528
    const promises = [];
529
530
    Object.keys(patterns).forEach(cookieName => {
531
      if (typeof patterns[cookieName] != 'function') {
532
        cookieName = cookieName.toLowerCase();
533
534
        promises.push(this.asyncForEach(patterns[cookieName], pattern => {
535
          const cookie = cookies.find(cookie => cookie.name.toLowerCase() === cookieName);
536
537
          if ( cookie && pattern.regex.test(cookie.value) ) {
538
            this.addDetected(app, pattern, 'cookies', cookie.value, cookieName);
539
          }
540
        }));
541
      }
542
    });
543
544
    return promises ? Promise.all(promises) : Promise.resolve();
545
  }
546
547
  /**
548
   * Analyze JavaScript variables
549
   */
550
  analyzeJs(app, results) {
551
    const promises = [];
552
553
    Object.keys(results).forEach(string => {
554
      if (typeof results[string] != 'function') {
555
        promises.push(this.asyncForEach(Object.keys(results[string]), index => {
556
          const pattern = this.jsPatterns[app.name][string][index];
557
          const value = results[string][index];
558
559
          if ( pattern && pattern.regex.test(value) ) {
560
            this.addDetected(app, pattern, 'js', value);
561
          }
562
        }));
563
      }
564
    });
565
566
    return promises ? Promise.all(promises) : Promise.resolve();
567
  }
568
569
  /**
570
   * Mark application as detected, set confidence and version
571
   */
572
  addDetected(app, pattern, type, value, key) {
573
    app.detected = true;
574
575
    // Set confidence level
576
    app.confidence[type + ' ' + ( key ? key + ' ' : '' ) + pattern.regex] = pattern.confidence === undefined ? 100 : parseInt(pattern.confidence);
577
578
    // Detect version number
579
    if ( pattern.version ) {
580
      var versions = [];
581
      var version  = pattern.version;
582
      var matches  = pattern.regex.exec(value);
583
584
      if ( matches ) {
585
        matches.forEach((match, i) => {
586
          // Parse ternary operator
587
          var ternary = new RegExp('\\\\' + i + '\\?([^:]+):(.*)$').exec(version);
588
589
          if ( ternary && ternary.length === 3 ) {
590
            version = version.replace(ternary[0], match ? ternary[1] : ternary[2]);
591
          }
592
593
          // Replace back references
594
          version = version.trim().replace(new RegExp('\\\\' + i, 'g'), match || '');
595
        });
596
597
        if ( version && versions.indexOf(version) === -1 ) {
598
          versions.push(version);
599
        }
600
601
        if ( versions.length ) {
602
          // Use the longest detected version number
603
          app.version = versions.reduce((a, b) => a.length > b.length ? a : b);
604
        }
605
      }
606
    }
607
  }
608
}
609
610
/**
611
 * Application class
612
 */
613
class Application {
614
  constructor(name, props, detected) {
615
    this.confidence      = {};
616
    this.confidenceTotal = 0;
617
    this.detected        = Boolean(detected);
618
    this.excludes        = [];
619
    this.name            = name;
620
    this.props           = props;
621
    this.version         = '';
622
  }
623
624
  /**
625
   * Calculate confidence total
626
   */
627
  getConfidence() {
628
    var total = 0;
629
630
    for ( var id in this.confidence ) {
0 ignored issues
show
A for in loop automatically includes the property of any prototype object, consider checking the key using hasOwnProperty.

When iterating over the keys of an object, this includes not only the keys of the object, but also keys contained in the prototype of that object. It is generally a best practice to check for these keys specifically:

var someObject;
for (var key in someObject) {
    if ( ! someObject.hasOwnProperty(key)) {
        continue; // Skip keys from the prototype.
    }

    doSomethingWith(key);
}
Loading history...
631
      total += this.confidence[id];
632
    }
633
634
    return this.confidenceTotal = Math.min(total, 100);
635
  }
636
}
637
638
if ( typeof module === 'object' ) {
639
  module.exports = Wappalyzer;
640
}
641