Issues (14)

src/wappalyzer.js (1 issue)

Severity
1
/**
2
 * Wappalyzer v5
3
 *
4
 * Created by Elbert Alias <[email protected]>
5
 *
6
 * License: GPLv3 http://www.gnu.org/licenses/gpl-3.0.txt
7
 */
8
9
'use strict';
10
11
const validation = {
12
  hostname: /(www.)?((.+?)\.(([a-z]{2,3}\.)?[a-z]{2,6}))$/,
13
  hostnameBlacklist: /((local|dev(elopment)?|stag(e|ing)?|test(ing)?|demo(shop)?|admin|google|cache)\.|\/admin|\.local)/
14
};
15
16
class Wappalyzer {
17
  constructor() {
18
    this.apps = {};
19
    this.categories = {};
20
    this.driver = {};
21
    this.jsPatterns = {};
22
    this.detected = {};
23
    this.hostnameCache = {};
24
    this.adCache = [];
25
26
    this.config = {
27
      websiteURL: 'https://www.wappalyzer.com/',
28
      twitterURL: 'https://twitter.com/Wappalyzer',
29
      githubURL: 'https://github.com/AliasIO/Wappalyzer',
30
    };
31
  }
32
33
  /**
34
   * Log messages to console
35
   */
36
  log(message, source, type) {
37
    this.driver.log(message, source || '', type || 'debug');
38
  }
39
40
  asyncForEach(iterable, iterator) {
41
    return Promise.all(( iterable || [] ).map(item => new Promise(resolve => setTimeout(() => resolve(iterator(item)), 1))));
42
  }
43
44
  analyze(url, data, context) {
45
    const startTime = new Date();
46
47
    const promises = [];
48
49
    var apps = {};
50
51
    if ( this.detected[url.canonical] === undefined ) {
52
      this.detected[url.canonical] = {};
53
    }
54
55
    // Additional information
56
    var language = null;
57
58
    if ( data.html ) {
59
      if ( typeof data.html !== 'string' ) {
60
        data.html = '';
61
      }
62
63
      const matches = data.html.match(/<html[^>]*[: ]lang="([a-z]{2}((-|_)[A-Z]{2})?)"/i);
64
65
      language = matches && matches.length ? matches[1] : null;
66
    }
67
68
    Object.keys(this.apps).forEach(appName => {
69
      apps[appName] = this.detected[url.canonical] && this.detected[url.canonical][appName] ? this.detected[url.canonical][appName] : new Application(appName, this.apps[appName]);
70
71
      var app = apps[appName];
72
73
      this.analyzeUrl(app, url);
74
75
      if ( data.html ) {
76
        promises.push(this.analyzeHtml(app, data.html));
77
        promises.push(this.analyzeMeta(app, data.html));
78
      }
79
80
      if ( data.scripts ) {
81
        promises.push(this.analyzeScripts(app, data.scripts));
82
      }
83
84
      if ( data.cookies ) {
85
        promises.push(this.analyzeCookies(app, data.cookies));
86
      }
87
88
      if ( data.headers ) {
89
        promises.push(this.analyzeHeaders(app, data.headers));
90
      }
91
    });
92
93
    if ( data.js ) {
94
      Object.keys(data.js).forEach(appName => {
95
        if (typeof data.js[appName] != 'function') {
96
          promises.push(this.analyzeJs(apps[appName], data.js[appName]));
97
        }
98
      });
99
    }
100
101
    return new Promise(resolve => {
102
      Promise.all(promises)
103
        .then(() => {
104
          Object.keys(apps).forEach(appName => {
105
            var app = apps[appName];
106
107
            if ( !app.detected || !app.getConfidence() ) {
108
              delete apps[app.name];
109
            }
110
          });
111
112
          this.resolveExcludes(apps);
113
          this.resolveImplies(apps, url.canonical);
114
115
          this.cacheDetectedApps(apps, url.canonical);
116
          this.trackDetectedApps(apps, url, language);
117
118
          this.log('Processing ' + Object.keys(data).join(', ') + ' took ' + (( new Date() - startTime ) / 1000).toFixed(2) + 's (' + url.hostname + ')', 'core');
119
120
          if ( Object.keys(apps).length ) {
121
            this.log('Identified ' + Object.keys(apps).join(', ') + ' (' + url.hostname + ')', 'core');
122
          }
123
124
          this.driver.displayApps(this.detected[url.canonical], { language }, context);
125
126
          return resolve();
127
        });
128
    });
129
  }
130
131
  /**
132
   * Cache detected ads
133
   */
134
  cacheDetectedAds(ad) {
135
    this.adCache.push(ad);
136
  }
137
138
  /**
139
   *
140
   */
141
  robotsTxtAllows(url) {
142
    return new Promise((resolve, reject) => {
143
      var parsed = this.parseUrl(url);
144
145
      if ( parsed.protocol !== 'http:' && parsed.protocol !== 'https:' ) {
146
        return reject();
147
      }
148
149
      this.driver.getRobotsTxt(parsed.host, parsed.protocol === 'https:')
150
        .then(robotsTxt => {
151
          if ( robotsTxt.some(disallowedPath => parsed.pathname.indexOf(disallowedPath) === 0) ) {
152
            return reject();
153
          }
154
155
          return resolve();
156
        }, () => resolve());
0 ignored issues
show
There is no return statement in this branch, but you do return something in other branches. Did you maybe miss it? If you do not want to return anything, consider adding return undefined; explicitly.
Loading history...
157
    });
158
  };
159
160
  /**
161
   * Parse a URL
162
   */
163
  parseUrl(url) {
164
    var a = this.driver.document.createElement('a');
165
166
    a.href = url;
167
168
    a.canonical = a.protocol + '//' + a.host + a.pathname;
169
170
    return a;
171
  }
172
173
  /**
174
   *
175
   */
176
  parseRobotsTxt(robotsTxt) {
177
    var userAgent;
178
    var disallow = [];
179
180
    robotsTxt.split('\n').forEach(line => {
181
      var matches = /^User-agent:\s*(.+)$/i.exec(line);
182
183
      if ( matches ) {
184
        userAgent = matches[1].toLowerCase();
185
      } else {
186
        if ( userAgent === '*' || userAgent === 'wappalyzer' ) {
187
          matches = /^Disallow:\s*(.+)$/i.exec(line);
188
189
          if ( matches ) {
190
            disallow.push(matches[1]);
191
          }
192
        }
193
      }
194
    });
195
196
    return disallow;
197
  }
198
199
  /**
200
   *
201
   */
202
  ping() {
203
    if ( Object.keys(this.hostnameCache).length > 100 ) {
204
      this.driver.ping(this.hostnameCache);
205
206
      this.hostnameCache = {};
207
    }
208
209
    if ( this.adCache.length > 50 ) {
210
      this.driver.ping({}, this.adCache);
211
212
      this.adCache = [];
213
    }
214
  }
215
216
  /**
217
   * Enclose string in array
218
   */
219
  asArray(value) {
220
    return value instanceof Array ? value : [ value ];
221
  }
222
223
  /**
224
   * Parse apps.json patterns
225
   */
226
  parsePatterns(patterns) {
227
    if ( !patterns ) {
228
      return [];
229
    }
230
231
    var parsed = {};
232
233
    // Convert string to object containing array containing string
234
    if ( typeof patterns === 'string' || patterns instanceof Array ) {
235
      patterns = {
236
        main: this.asArray(patterns)
237
      };
238
    }
239
240
    Object.keys(patterns).forEach(key => {
241
      parsed[key] = [];
242
243
      this.asArray(patterns[key]).forEach(pattern => {
244
        var attrs = {};
245
246
        pattern.split('\\;').forEach((attr, i) => {
247
          if ( i ) {
248
            // Key value pairs
249
            attr = attr.split(':');
250
251
            if ( attr.length > 1 ) {
252
              attrs[attr.shift()] = attr.join(':');
253
            }
254
          } else {
255
            attrs.string = attr;
256
257
            try {
258
              attrs.regex = new RegExp(attr.replace('/', '\/'), 'i'); // Escape slashes in regular expression
259
            } catch (e) {
260
              attrs.regex = new RegExp();
261
262
              this.log(e + ': ' + attr, 'error', 'core');
263
            }
264
          }
265
        });
266
267
        parsed[key].push(attrs);
268
      });
269
    });
270
271
    // Convert back to array if the original pattern list was an array (or string)
272
    if ( 'main' in parsed ) {
273
      parsed = parsed.main;
274
    }
275
276
    return parsed;
277
  }
278
279
  /**
280
   * Parse JavaScript patterns
281
   */
282
  parseJsPatterns() {
283
    Object.keys(this.apps).forEach(appName => {
284
      if ( this.apps[appName].js ) {
285
        this.jsPatterns[appName] = this.parsePatterns(this.apps[appName].js);
286
      }
287
    });
288
  }
289
290
  resolveExcludes(apps) {
291
    var excludes = [];
292
293
    // Exclude app in detected apps only
294
    Object.keys(apps).forEach(appName => {
295
      var app = apps[appName];
296
297
      if ( app.props.excludes ) {
298
        this.asArray(app.props.excludes).forEach(excluded => {
299
          excludes.push(excluded);
300
        });
301
      }
302
    })
303
304
    // Remove excluded applications
305
    Object.keys(apps).forEach(appName => {
306
      if ( excludes.indexOf(appName) > -1 ) {
307
        delete apps[appName];
308
      }
309
    })
310
  }
311
312
  resolveImplies(apps, url) {
313
    var checkImplies = true;
314
315
    // Implied applications
316
    // Run several passes as implied apps may imply other apps
317
    while ( checkImplies ) {
318
      checkImplies = false;
319
320
      Object.keys(apps).forEach(appName => {
321
        var app = apps[appName];
322
323
        if ( app && app.props.implies ) {
324
          this.asArray(app.props.implies).forEach(implied => {
325
            implied = this.parsePatterns(implied)[0];
326
327
            if ( !this.apps[implied.string] ) {
328
              this.log('Implied application ' + implied.string + ' does not exist', 'core', 'warn');
329
330
              return;
331
            }
332
333
            if ( !( implied.string in apps ) ) {
334
              apps[implied.string] = this.detected[url] && this.detected[url][implied.string] ? this.detected[url][implied.string] : new Application(implied.string, this.apps[implied.string], true);
335
336
              checkImplies = true;
337
            }
338
339
            // Apply app confidence to implied app
340
            Object.keys(app.confidence).forEach(id => {
341
              apps[implied.string].confidence[id + ' implied by ' + appName] = app.confidence[id] * ( implied.confidence === undefined ? 1 : implied.confidence / 100 );
342
            });
343
          });
344
        }
345
      });
346
    }
347
  }
348
349
  /**
350
   * Cache detected applications
351
   */
352
  cacheDetectedApps(apps, url) {
353
    Object.keys(apps).forEach(appName => {
354
      var app = apps[appName];
355
356
      // Per URL
357
      this.detected[url][appName] = app;
358
359
      Object.keys(app.confidence).forEach(id => {
360
        this.detected[url][appName].confidence[id] = app.confidence[id];
361
      });
362
    })
363
364
    if ( this.driver.ping instanceof Function ) {
365
      this.ping();
366
    }
367
  }
368
369
  /**
370
   * Track detected applications
371
   */
372
  trackDetectedApps(apps, url, language) {
373
    if ( !( this.driver.ping instanceof Function ) ) {
374
      return;
375
    }
376
377
    const hostname = url.protocol + '//' + url.hostname;
378
379
    Object.keys(apps).forEach(appName => {
380
      const app = apps[appName];
381
382
      if ( this.detected[url.canonical][appName].getConfidence() >= 100 ) {
383
        if ( validation.hostname.test(url.hostname) && !validation.hostnameBlacklist.test(url.hostname) ) {
384
          if ( !( hostname in this.hostnameCache ) ) {
385
            this.hostnameCache[hostname] = {
386
              applications: {},
387
              meta: {}
388
            };
389
          }
390
391
          if ( !( appName in this.hostnameCache[hostname].applications ) ) {
392
            this.hostnameCache[hostname].applications[appName] = {
393
              hits: 0
394
            };
395
          }
396
397
          this.hostnameCache[hostname].applications[appName].hits ++;
398
399
          if ( apps[appName].version ) {
400
            this.hostnameCache[hostname].applications[appName].version = app.version;
401
          }
402
        }
403
      }
404
    });
405
406
    if ( hostname in this.hostnameCache ) {
407
      this.hostnameCache[hostname].meta['language'] = language;
408
    }
409
410
    this.ping();
411
  }
412
413
  /**
414
   * Analyze URL
415
   */
416
  analyzeUrl(app, url) {
417
    var patterns = this.parsePatterns(app.props.url);
418
419
    if ( !patterns.length ) {
420
      return Promise.resolve();
421
    }
422
423
    return this.asyncForEach(patterns, pattern => {
424
      if ( pattern.regex.test(url.canonical) ) {
425
        this.addDetected(app, pattern, 'url', url.canonical);
426
      }
427
    });
428
  }
429
430
  /**
431
   * Analyze HTML
432
   */
433
  analyzeHtml(app, html) {
434
    var patterns = this.parsePatterns(app.props.html);
435
436
    if ( !patterns.length ) {
437
      return Promise.resolve();
438
    }
439
440
    return this.asyncForEach(patterns, pattern => {
441
      if ( pattern.regex.test(html) ) {
442
        this.addDetected(app, pattern, 'html', html);
443
      }
444
    });
445
  }
446
447
  /**
448
   * Analyze script tag
449
   */
450
  analyzeScripts(app, scripts) {
451
    var patterns = this.parsePatterns(app.props.script);
452
453
    if ( !patterns.length ) {
454
      return Promise.resolve();
455
    }
456
457
    return this.asyncForEach(patterns, pattern => {
458
      scripts.forEach(uri => {
459
        if ( pattern.regex.test(uri) ) {
460
          this.addDetected(app, pattern, 'script', uri);
461
        }
462
      });
463
    });
464
  }
465
466
  /**
467
   * Analyze meta tag
468
   */
469
  analyzeMeta(app, html) {
470
    const regex = /<meta[^>]+>/ig;
471
    const patterns = this.parsePatterns(app.props.meta);
472
    const promises = [];
473
474
    let matches;
475
476
    while ( patterns && ( matches = regex.exec(html) ) ) {
477
      for ( let meta in patterns ) {
478
        const r = new RegExp('(?:name|property)=["\']' + meta + '["\']', 'i');
479
480
        if ( r.test(matches[0]) ) {
481
          let content = matches[0].match(/content=("|')([^"']+)("|')/i);
482
483
          promises.push(this.asyncForEach(patterns[meta], pattern => {
484
            if ( content && content.length === 4 && pattern.regex.test(content[2]) ) {
485
              this.addDetected(app, pattern, 'meta', content[2], meta);
486
            }
487
          }));
488
        }
489
      }
490
    }
491
492
    return promises ? Promise.all(promises) : Promise.resolve();
493
  }
494
495
  /**
496
   * Analyze response headers
497
   */
498
  analyzeHeaders(app, headers) {
499
    const patterns = this.parsePatterns(app.props.headers);
500
    const promises = [];
501
502
    Object.keys(patterns).forEach(headerName => {
503
      if (typeof patterns[headerName] != 'function') {
504
        promises.push(this.asyncForEach(patterns[headerName], pattern => {
505
          headerName = headerName.toLowerCase();
506
507
          if ( headerName in headers ) {
508
            headers[headerName].forEach(headerValue => {
509
              if ( pattern.regex.test(headerValue) ) {
510
                this.addDetected(app, pattern, 'headers', headerValue, headerName);
511
              }
512
            });
513
          }
514
        }));
515
      }
516
    });
517
518
    return promises ? Promise.all(promises) : Promise.resolve();
519
  }
520
521
  /**
522
   * Analyze cookies
523
   */
524
  analyzeCookies(app, cookies) {
525
    const patterns = this.parsePatterns(app.props.cookies);
526
    const promises = [];
527
528
    Object.keys(patterns).forEach(cookieName => {
529
      if (typeof patterns[cookieName] != 'function') {
530
        cookieName = cookieName.toLowerCase();
531
532
        promises.push(this.asyncForEach(patterns[cookieName], pattern => {
533
          const cookie = cookies.find(cookie => cookie.name.toLowerCase() === cookieName);
534
535
          if ( cookie && pattern.regex.test(cookie.value) ) {
536
            this.addDetected(app, pattern, 'cookies', cookie.value, cookieName);
537
          }
538
        }));
539
      }
540
    });
541
542
    return promises ? Promise.all(promises) : Promise.resolve();
543
  }
544
545
  /**
546
   * Analyze JavaScript variables
547
   */
548
  analyzeJs(app, results) {
549
    const promises = [];
550
551
    Object.keys(results).forEach(string => {
552
      if (typeof results[string] != 'function') {
553
        promises.push(this.asyncForEach(Object.keys(results[string]), index => {
554
          const pattern = this.jsPatterns[app.name][string][index];
555
          const value = results[string][index];
556
557
          if ( pattern && pattern.regex.test(value) ) {
558
            this.addDetected(app, pattern, 'js', value);
559
          }
560
        }));
561
      }
562
    });
563
564
    return promises ? Promise.all(promises) : Promise.resolve();
565
  }
566
567
  /**
568
   * Mark application as detected, set confidence and version
569
   */
570
  addDetected(app, pattern, type, value, key) {
571
    app.detected = true;
572
573
    // Set confidence level
574
    app.confidence[type + ' ' + ( key ? key + ' ' : '' ) + pattern.regex] = pattern.confidence === undefined ? 100 : parseInt(pattern.confidence);
575
576
    // Detect version number
577
    if ( pattern.version ) {
578
      var versions = [];
579
      var version  = pattern.version;
580
      var matches  = pattern.regex.exec(value);
581
582
      if ( matches ) {
583
        matches.forEach((match, i) => {
584
          // Parse ternary operator
585
          var ternary = new RegExp('\\\\' + i + '\\?([^:]+):(.*)$').exec(version);
586
587
          if ( ternary && ternary.length === 3 ) {
588
            version = version.replace(ternary[0], match ? ternary[1] : ternary[2]);
589
          }
590
591
          // Replace back references
592
          version = version.trim().replace(new RegExp('\\\\' + i, 'g'), match || '');
593
        });
594
595
        if ( version && versions.indexOf(version) === -1 ) {
596
          versions.push(version);
597
        }
598
599
        if ( versions.length ) {
600
          // Use the longest detected version number
601
          app.version = versions.reduce((a, b) => a.length > b.length ? a : b);
602
        }
603
      }
604
    }
605
  }
606
}
607
608
/**
609
 * Application class
610
 */
611
class Application {
612
  constructor(name, props, detected) {
613
    this.confidence      = {};
614
    this.confidenceTotal = 0;
615
    this.detected        = Boolean(detected);
616
    this.excludes        = [];
617
    this.name            = name;
618
    this.props           = props;
619
    this.version         = '';
620
  }
621
622
  /**
623
   * Calculate confidence total
624
   */
625
  getConfidence() {
626
    var total = 0;
627
628
    for ( let id in this.confidence ) {
629
      total += this.confidence[id];
630
    }
631
632
    return this.confidenceTotal = Math.min(total, 100);
633
  }
634
}
635
636
if ( typeof module === 'object' ) {
637
  module.exports = Wappalyzer;
638
}
639