Completed
Push — master ( b0e6d7...0b0339 )
by Elbert
39s
created

w.checkAdCache   A

Complexity

Conditions 3
Paths 2

Size

Total Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 3
c 1
b 0
f 0
nc 2
nop 0
dl 0
loc 5
rs 9.4285
1
/**
2
 * Wappalyzer v4
3
 *
4
 * Created by Elbert Alias <[email protected]>
5
 *
6
 * License: GPLv3 http://www.gnu.org/licenses/gpl-3.0.txt
7
 */
8
9
var wappalyzer = (function() {
10
	//'use strict';
11
12
	/**
13
	 * Application class
14
	 */
15
	var Application = function(app, detected) {
16
		this.app             = app;
17
		this.confidence      = { };
18
		this.confidenceTotal = 0;
19
		this.detected        = Boolean(detected);
20
		this.excludes        = [ ];
21
		this.version         = '';
22
		this.versions        = [ ];
23
	};
24
25
	Application.prototype = {
26
		/**
27
		 * Calculate confidence total
28
		 */
29
		getConfidence: function() {
30
			var
31
				id,
32
				total = 0;
33
34
			for ( id in this.confidence ) {
35
				total += this.confidence[id];
36
			}
37
38
			return this.confidenceTotal = Math.min(total, 100);
39
		},
40
41
		/**
42
		 * Resolve version number (find the longest version number that contains all shorter detected version numbers)
43
		 */
44
		getVersion: function() {
45
			var i, resolved;
46
47
			if ( !this.versions.length ) {
48
				return;
49
			}
50
51
			this.versions.sort(function(a, b) {
52
				return a.length - b.length;
53
			});
54
55
			resolved = this.versions[0];
56
57
			for ( i = 1; i < this.versions.length; i++ ) {
58
				if ( this.versions[i].indexOf(resolved) === -1 ) {
59
					break;
60
				}
61
62
				resolved = this.versions[i];
63
			}
64
65
			return this.version = resolved;
66
		},
67
68
		setDetected: function(pattern, type, value, key) {
69
			this.detected = true;
70
71
			// Set confidence level
72
			this.confidence[type + ' ' + ( key ? key + ' ' : '' ) + pattern.regex] = pattern.confidence || 100;
73
74
			// Detect version number
75
			if ( pattern.version ) {
76
				var
77
					version = pattern.version,
78
					matches = pattern.regex.exec(value);
79
80
				if ( matches ) {
81
					matches.forEach(function(match, i) {
82
						// Parse ternary operator
83
						var ternary = new RegExp('\\\\' + i + '\\?([^:]+):(.*)$').exec(version);
84
85
						if ( ternary && ternary.length === 3 ) {
86
							version = version.replace(ternary[0], match ? ternary[1] : ternary[2]);
87
						}
88
89
						// Replace back references
90
						version = version.replace(new RegExp('\\\\' + i, 'g'), match || '');
91
					});
92
93
					if ( version && this.versions.indexOf(version) < 0 ) {
94
						this.versions.push(version);
95
					}
96
97
					this.getVersion();
98
				}
99
			}
100
		}
101
	};
102
103
	var asArray = function(value) {
104
		return typeof value === 'string' ? [ value ] : value;
105
	};
106
107
	/**
108
	 * Call driver functions
109
	 */
110
	var driver = function(func, args) {
111
		if ( typeof w.driver[func] !== 'function' ) {
112
			w.log('not implemented: w.driver.' + func, 'core', 'warn');
113
114
			return;
115
		}
116
117
		return w.driver[func](args);
118
	};
119
120
	/**
121
	 * Parse apps.json patterns
122
	 */
123
	var parsePatterns = function(patterns) {
124
		var
125
			key,
126
			parsed = {};
127
128
		// Convert string to object containing array containing string
129
		if ( typeof patterns === 'string' || patterns instanceof Array ) {
130
			patterns = {
131
				main: asArray(patterns)
132
			};
133
		}
134
135
		for ( key in patterns ) {
136
			parsed[key] = [];
137
138
			asArray(patterns[key]).forEach(function(pattern) {
139
				var attrs = {};
140
141
				pattern.split('\\;').forEach(function(attr, i) {
142
					if ( i ) {
143
						// Key value pairs
144
						attr = attr.split(':');
145
146
						if ( attr.length > 1 ) {
147
							attrs[attr.shift()] = attr.join(':');
148
						}
149
					} else {
150
						attrs.string = attr;
151
152
						try {
153
							attrs.regex = new RegExp(attr.replace('/', '\/'), 'i'); // Escape slashes in regular expression
154
						} catch (e) {
155
							attrs.regex = new RegExp();
156
157
							w.log(e + ': ' + attr, 'error', 'core');
158
						}
159
					}
160
				});
161
162
				parsed[key].push(attrs);
163
			});
164
		}
165
166
		// Convert back to array if the original pattern list was an array (or string)
167
		if ( parsed.hasOwnProperty('main') ) {
168
			parsed = parsed.main;
169
		}
170
171
		return parsed;
172
	};
173
174
	/**
175
	 * Main script
176
	 */
177
	var w = {
178
		apps: {},
179
		cats: null,
180
		ping: {
181
			hostnames: { }
182
		},
183
		adCache: [],
184
		detected: {},
185
186
		config: {
187
			websiteURL: 'https://wappalyzer.com/',
188
			twitterURL: 'https://twitter.com/Wappalyzer',
189
			githubURL: 'https://github.com/AliasIO/Wappalyzer',
190
		},
191
192
		validation: {
193
			hostname: /(www.)?((.+?)\.(([a-z]{2,3}\.)?[a-z]{2,6}))$/,
194
			hostnameBlacklist: /((local|dev(elopment)?|stag(e|ing)?|test(ing)?|demo(shop)?|admin|google|cache)\.|\/admin|\.local)/
195
		},
196
197
		/**
198
		 * Log messages to console
199
		 */
200
		log: function(message, source, type) {
201
			driver('log', {
202
				source: source || '',
203
				message: JSON.stringify(message),
204
				type: type || 'debug'
205
			});
206
		},
207
208
		/**
209
		 * Initialize
210
		 */
211
		init: function() {
212
			w.log('Function call: w.init()', 'core');
213
214
			// Initialize driver
215
			if ( w.driver !== undefined ) {
216
				driver('init');
217
			} else {
218
				w.log('No driver, exiting', 'core');
219
			}
220
		},
221
222
		/**
223
		 * Analyze the request
224
		 */
225
		analyze: function(hostname, url, data) {
226
			var
227
				app,
228
				apps = {};
229
230
			w.log('Function call: w.analyze()', 'core');
231
232
			if ( w.apps === undefined || w.categories === undefined ) {
233
				w.log('apps.json not loaded, check for syntax errors', 'core');
234
235
				return;
236
			}
237
238
			// Remove hash from URL
239
			data.url = url = url.split('#')[0];
240
241
			if ( typeof data.html !== 'string' ) {
242
				data.html = '';
243
			}
244
245
			if ( w.detected[url] === undefined ) {
246
				w.detected[url] = {};
247
			}
248
249
			for ( app in w.apps ) {
250
				apps[app] = w.detected[url] && w.detected[url][app] ? w.detected[url][app] : new Application(app);
251
252
				if ( url ) {
253
					w.analyzeUrl(apps[app], url);
254
				}
255
256
				if ( data.html ) {
257
					w.analyzeHtml(apps[app], data.html);
258
					w.analyzeScript(apps[app], data.html);
259
					w.analyzeMeta(apps[app], data.html);
260
				}
261
262
				if ( data.headers ) {
263
					w.analyzeHeaders(apps[app], data.headers);
264
				}
265
266
				if ( data.env ) {
267
					w.analyzeEnv(apps[app], data.env);
268
				}
269
			}
270
271
			for ( app in apps ) {
272
				if ( !apps[app].detected ) {
273
					delete apps[app];
274
				}
275
			}
276
277
			w.resolveExcludes(apps);
278
			w.resolveImplies(apps, url);
279
280
			w.cacheDetectedApps(apps, url);
281
			w.trackDetectedApps(apps, url, hostname, data.html);
282
283
			if ( Object.keys(apps).length ) {
284
				w.log(Object.keys(apps).length + ' apps detected: ' + Object.keys(apps).join(', ') + ' on ' + url, 'core');
285
			}
286
287
			driver('displayApps');
288
		},
289
290
		resolveExcludes: function(apps) {
291
			var
292
				app,
293
				excludes = [];
294
295
			// Exclude app in detected apps only
296
			for ( app in apps ) {
297
				if ( w.apps[app].excludes ) {
298
					asArray(w.apps[app].excludes).forEach(function(excluded) {
299
						excludes.push(excluded);
300
					});
301
				}
302
			}
303
304
			// Remove excluded applications
305
			for ( app in apps ) {
306
				if ( excludes.indexOf(app) !== -1 ) {
307
					delete apps[app];
308
				}
309
			}
310
		},
311
312
		resolveImplies: function(apps, url) {
313
			var
314
				confidence,
315
				id,
316
				checkImplies = true;
317
318
			// Implied applications
319
			// Run several passes as implied apps may imply other apps
320
			while ( checkImplies ) {
321
				checkImplies = false;
322
323
				for ( app in apps ) {
324
					confidence = apps[app].confidence;
325
326
					if ( w.apps[app] && w.apps[app].implies ) {
327
						asArray(w.apps[app].implies).forEach(function(implied) {
328
							implied = parsePatterns(implied)[0];
329
330
							if ( !w.apps[implied.string] ) {
331
								w.log('Implied application ' + implied.string + ' does not exist', 'core', 'warn');
332
333
								return;
334
							}
335
336
							if ( !apps.hasOwnProperty(implied.string) ) {
337
								apps[implied.string] = w.detected[url] && w.detected[url][implied.string] ? w.detected[url][implied.string] : new Application(implied.string, true);
338
339
								checkImplies = true;
340
							}
341
342
							// Apply app confidence to implied app
343
							for ( id in confidence ) {
344
								apps[implied.string].confidence[id + ' implied by ' + app] = confidence[id] * ( implied.confidence ? implied.confidence / 100 : 1 );
345
							}
346
						});
347
					}
348
				}
349
			}
350
		},
351
352
		/**
353
		 * Cache detected applications
354
		 */
355
		cacheDetectedApps: function(apps, url) {
356
			var app, id, confidence;
357
358
			for ( app in apps ) {
359
				confidence = apps[app].confidence;
360
361
				// Per URL
362
				w.detected[url][app] = apps[app];
363
364
				for ( id in confidence ) {
365
					w.detected[url][app].confidence[id] = confidence[id];
366
				}
367
			}
368
		},
369
370
		checkAdCache: function() {
371
			if ( Object.keys(w.ping.hostnames).length >= 50 || w.adCache.length >= 50 ) {
372
				driver('ping');
373
			}
374
		},
375
376
		/**
377
		 * Track detected applications
378
		 */
379
		trackDetectedApps: function(apps, url, hostname, html) {
380
			var app, match;
381
382
			for ( app in apps ) {
383
				if ( w.detected[url][app].getConfidence() >= 100 && w.validation.hostname.test(hostname) && !w.validation.hostnameBlacklist.test(url) ) {
384
					if ( !w.ping.hostnames.hasOwnProperty(hostname) ) {
385
						w.ping.hostnames[hostname] = {
386
							applications: {},
387
							meta: {}
388
						};
389
					}
390
391
					if ( !w.ping.hostnames[hostname].applications.hasOwnProperty(app) ) {
392
						w.ping.hostnames[hostname].applications[app] = {
393
							hits: 0
394
						};
395
					}
396
397
					w.ping.hostnames[hostname].applications[app].hits ++;
398
399
					if ( apps[app].version ) {
400
						w.ping.hostnames[hostname].applications[app].version = apps[app].version;
401
					}
402
				}
403
			}
404
405
			// Additional information
406
			if ( w.ping.hostnames.hasOwnProperty(hostname) ) {
407
				match = html.match(/<html[^>]*[: ]lang="([a-z]{2}((-|_)[A-Z]{2})?)"/i);
408
409
				if ( match && match.length ) {
410
					w.ping.hostnames[hostname].meta['language'] = match[1];
411
				}
412
			}
413
			w.checkAdCache();
414
		},
415
416
		/**
417
		 * Analyze URL
418
		 */
419
		analyzeUrl: function(app, url) {
420
			var patterns = parsePatterns(w.apps[app.app].url);
421
422
			if ( patterns.length ) {
423
				patterns.forEach(function(pattern) {
424
					if ( pattern.regex.test(url) ) {
425
						app.setDetected(pattern, 'url', url);
426
					}
427
				});
428
			}
429
		},
430
431
		/**
432
		 * Analyze HTML
433
		 */
434
		analyzeHtml: function(app, html) {
435
			var patterns = parsePatterns(w.apps[app.app].html);
436
437
			if ( patterns.length ) {
438
				patterns.forEach(function(pattern) {
439
					if ( pattern.regex.test(html) ) {
440
						app.setDetected(pattern, 'html', html);
441
					}
442
				});
443
			}
444
		},
445
446
		/**
447
		 * Analyze script tag
448
		 */
449
		analyzeScript: function(app, html) {
450
			var
451
				regex = new RegExp('<script[^>]+src=("|\')([^"\']+)', 'ig'),
452
				patterns = parsePatterns(w.apps[app.app].script);
453
454
			if ( patterns.length ) {
455
				patterns.forEach(function(pattern) {
456
					var match;
457
458
					while ( (match = regex.exec(html)) ) {
459
						if ( pattern.regex.test(match[2]) ) {
460
							app.setDetected(pattern, 'script', match[2]);
461
						}
462
					}
463
				});
464
			}
465
		},
466
467
		/**
468
		 * Analyze meta tag
469
		 */
470
		analyzeMeta: function(app, html) {
471
			var
472
				content, match, meta,
473
				regex = /<meta[^>]+>/ig,
474
				patterns = parsePatterns(w.apps[app.app].meta);
475
476
			if ( patterns ) {
477
				while ( (match = regex.exec(html)) ) {
478
					for ( meta in patterns ) {
479
						if ( new RegExp('(name|property)=["\']' + meta + '["\']', 'i').test(match) ) {
480
							content = match.toString().match(/content=("|')([^"']+)("|')/i);
481
482
							patterns[meta].forEach(function(pattern) {
483
								if ( content && content.length === 4 && pattern.regex.test(content[2]) ) {
484
									app.setDetected(pattern, 'meta', content[2], meta);
485
								}
486
							});
487
						}
488
					}
489
				}
490
			}
491
		},
492
493
		/**
494
		 * analyze response headers
495
		 */
496
		analyzeHeaders: function(app, headers) {
497
			var
498
				header,
499
				patterns = parsePatterns(w.apps[app.app].headers);
500
501
			if ( headers ) {
502
				for ( header in patterns ) {
503
					patterns[header].forEach(function(pattern) {
504
						header = header.toLowerCase();
505
506
						if ( headers.hasOwnProperty(header) && pattern.regex.test(headers[header]) ) {
507
							app.setDetected(pattern, 'headers', headers[header], header);
508
						}
509
					});
510
				}
511
			}
512
		},
513
514
		/**
515
		 * Analyze environment variables
516
		 */
517
		analyzeEnv: function(app, envs) {
518
			var patterns = parsePatterns(w.apps[app.app].env);
519
520
			if ( patterns.length ) {
521
				patterns.forEach(function(pattern) {
522
					var env;
523
524
					for ( env in envs ) {
525
						if ( pattern.regex.test(envs[env]) ) {
526
							app.setDetected(pattern, 'env', envs[env]);
527
						}
528
					}
529
				});
530
			}
531
		}
532
	};
533
534
	return w;
535
})();
536
537
// CommonJS package
538
// See http://wiki.commonjs.org/wiki/CommonJS
539
if ( typeof exports === 'object' ) {
540
	exports.wappalyzer = wappalyzer;
541
}
542