Completed
Push — develop ( f2361e...8024f0 )
by Dylan
03:04
created

default_tests.int_link_info   D

Complexity

Conditions 9
Paths 48

Size

Total Lines 36

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 9
c 0
b 0
f 0
nc 48
nop 5
dl 0
loc 36
rs 4.909

1 Function

Rating   Name   Duplication   Size   Complexity  
A 0 4 4
1
const default_tests = {
2
3
    // [name, title, headers, type]
4
    tests: [
5
        ['error_pages', 'ERROR PAGES', ['URL'], 'success'],
6
        ['h1_info', 'H1 INFO', ['URL', 'Count', 'Text', 'Status']],
7
        ['h2_info', 'H2 INFO', ['URL', 'Count', 'Text', 'Status']],
8
        ['word_count', 'WORD COUNT', ['URL', 'Word Count', 'Article Word Count']],
9
        ['int_link_info', 'INTERNAL LINKS',
10
            ['URL', 'Article Links', 'Article Link Count', 'Article Density', 'Total Link Count', 'Total Density', 'Status'],
11
            'info'],
12
        ['ext_link_info', 'EXTERNAL LINKS', ['URL', 'External Link Count', 'External Links'], 'success'],
13
        ['img_info', 'IMAGES', ['URL', 'Count', 'Missing Alt Tag', 'Missing Title Tag', 'Fields Missing Images', 'Status'], 'success'],
14
        ['title_info', 'META TITLE', ['URL', 'Meta Title', 'Length', 'Status']],
15
        ['description_info', 'META DESCRIPTION', ['URL', 'Meta Description', 'Length', 'Status']],
16
        ['canonical_info', 'CANONICAL', ['URL', 'Status'], 'success'],
17
        ['noindex_pages', 'NO-INDEX PAGES', ['URL'], 'success'],
18
        ['urls_test', 'URL STRUCTURE', ['URL', 'Status'], 'success'],
19
        ['duplicate_meta_tags', 'DUPLICATE META TAGS', ['URL', 'Status'], 'success'],
20
        ['href_langs', 'LANG TAGS', ['URL', 'Tags'], 'info'],
21
        ['orphan_pages', 'ORPHAN PAGES', ['URL'], 'info']
22
    ],
23
24
    /**
25
     * Test the h1s on the page provided
26
     *
27
     * @param {string} url
28
     * @param {jQuery} html
29
     * @returns {undefined}
30
     */
31
    h1_info: function(url, html){
32
        var h1      = html.find( 'h1' ),
33
            link    = crawler_painter.create_link(url, url),
34
            joined  = [],
35
            status;
36
37
        h1.each(function(){ joined.push(this.innerHTML); });
38
39
        if(h1.length != 1)
40
            status = crawler_painter.create_status('error', (h1.length < 1) ? 'Missing H1' : 'Multiple H1 tags');
41
        else status = crawler_painter.create_status('success', 'OK!');
42
43
        return crawler_painter.add_row(this.name, [link, h1.length, joined.join(', '), status]);
44
    },
45
46
    /**
47
     * Test the h2s on the page provided
48
     *
49
     * @param {string} url
50
     * @param {jQuery} html
51
     * @returns {undefined}
52
     */
53
    h2_info: function(url, html){
54
        var h2      = html.find( 'h2' ),
55
            link    = crawler_painter.create_link(url, url),
56
            joined  = [], status;
57
58
        h2.each(function(){ joined.push(this.innerHTML); });
59
60
        if(h2.length < 1) status = crawler_painter.create_status('warning', 'Missing H2');
61
        else status = crawler_painter.create_status('success', 'OK!');
62
63
        return crawler_painter.add_row(this.name, [link, h2.length, joined.join(', '), status]);
64
    },
65
66
    /**
67
     * Check the word count for the passed page
68
     *
69
     * @param {string} url
70
     * @param {jQuery} html
71
     * @param {string} headers
72
     * @param {Array} field_data
73
     * @param {Array} phrases
74
     * @returns {undefined}
75
     */
76
    word_count: function(url, html, headers, field_data, phrases){
77
        var link        = crawler_painter.create_link(url, url),
78
            word_count  = crawler.get_word_count(phrases),
79
            art_count   = crawler.get_word_count(field_data[3]);
80
81
        return crawler_painter.add_row(this.name, [link, word_count, art_count]);
82
    },
83
84
    /**
85
     * Test the internal links found on the page provided
86
     *
87
     * @param {string} url
88
     * @param {jQuery} html
89
     * @param {string} headers
90
     * @param {Array} field_data
91
     * @param {Array} phrases
92
     * @returns {undefined}
93
     */
94
    int_link_info: function(url, html, headers, field_data, phrases){
95
        var link = crawler_painter.create_link(url, url),
96
            art_links = [], links = [];
97
98
        // Article links
99
        for( var field in field_data[2] ) {
100
            $.each($(field_data[2][field]).find('a'), function () {
101
                var href = $(this).attr('href');
102
                if(href && !crawler.is_external(href) && !crawler.is_anchor(href, url)) art_links.push(href);
103
            });
104
        }
105
106
        // Full page links
107
        $.each(html.find('a'), function () {
108
            var href = $(this).attr('href');
109
            if(href && !crawler.is_external(href) && !crawler.is_anchor(href, url)) links.push(href);
110
        });
111
112
        var art_word_count  = crawler.get_word_count(field_data[3]),
113
            art_density     = (art_links.length > 0) ? art_word_count / art_links.length : false,
114
            art_dens_text   = (art_density != false) ? art_density.toFixed(2) +' words/link' : 'No internal links',
115
            word_count      = crawler.get_word_count(phrases),
116
            density         = (links.length > 0) ? word_count / links.length : false,
117
            dens_text       = (density != false) ? density.toFixed(2) +' words/link' : 'No internal links',
118
            status          = crawler_painter.create_status('success', 'OK!');
119
120
        if( ( art_density !== false && art_density < 100 ) )
121
            status = crawler_painter.create_status('warning', 'This page might be considered spammy');
122
123
        if(links.length > 0)
124
            crawler_painter.add_row( this.name, [
125
                link, art_links.join('<br />'), art_links.length, art_dens_text, links.length, dens_text, status
126
            ]);
127
128
        return undefined;
129
    },
130
131
    /**
132
     * Test the external links on the page provided
133
     *
134
     * @param {string} url
135
     * @param {jQuery} html
136
     * @param {string} headers
137
     * @param {Array} field_data
138
     * @returns {undefined}
139
     */
140
    ext_link_info: function(url, html, headers, field_data){
141
        var link = crawler_painter.create_link(url, url),
142
            links = [];
143
144
        for( var field in field_data[2] ) {
145
            $.each($(field_data[2][field]).find('a'), function () {
146
                var $this = $(this),
147
                    href = $this.attr('href');
148
                if(href && crawler.is_external(href)){
149
                    var type = ( !$this.attr('rel') || $this.attr('rel').toLowerCase().indexOf('nofollow') < 0 )
150
                        ? 'warning' : 'info';
151
                    links.push(
152
                        $('<div class="clearfix"></div>').append([
153
                            crawler_painter.create_status(type, href),
154
                            '<p>&nbsp;</p>'
155
                        ])
156
                    );
157
                }
158
            });
159
        }
160
161
        if(links.length > 0){
162
            crawler_painter.add_row(this.name, [link, links.length, links]);
163
        }
164
165
        return undefined;
166
    },
167
168
    /**
169
     * Test the images on the page provided
170
     *
171
     * @param {string} url
172
     * @param {jQuery} html
173
     * @param {string} headers
174
     * @param {Array} field_data
175
     * @returns {undefined}
176
     */
177
    img_info: function(url, html, headers, field_data) {
178
        var link = crawler_painter.create_link(url, url),
179
            imgs = html.find('img'),
180
            alt = 0, title = 0, fields = [], status = '';
181
182
        // Check alt and title tags
183
        $.each(imgs, function () {
184
            var $this = $(this);
185
            if (!$this.attr('alt') || $this.attr('alt').length < 1) alt += 1;
186
            if (!$this.attr('title') || $this.attr('title').length < 1) title += 1;
187
        });
188
189
        // Check the fields
190
        for (var f in field_data[2]) if ($(field_data[2][f]).find('img').length < 1) fields.push(field_data[1][f]);
191
192
        // Construct Result
193
        if (alt > 0)
194
            status = crawler_painter.create_status('error',
195
                (alt > 1) ? alt + ' images missing alt tag' : '1 image missing alt tag');
196
        else if(fields.length > 0)
197
            status = crawler_painter.create_status('warning',
198
                (fields.length > 1) ? fields.join(' and ') + ' are missing images' : fields[0] + ' is missing images');
199
        else if(title > 0)
200
            status = crawler_painter.create_status('info',
201
                (title > 1) ? title + ' images missing title tag' : '1 image is missing title tag');
202
        else
203
            status = crawler_painter.create_status('success', 'OK!');
204
205
        return crawler_painter.add_row(this.name, [link, imgs.length, alt, title, fields.join(', '), status]);
206
    },
207
208
    /**
209
     * Test the meta title of the page provided
210
     *
211
     * @param {string} url
212
     * @param {jQuery} html
213
     * @returns {undefined}
214
     */
215
    title_info: function(url, html){
216
        var title   = html.filter( 'title' ),
217
            link    = crawler_painter.create_link(url, url),
218
            text    = (title.length == 1) ? title.html() : '',
219
            status  = default_tests.get_meta_tags_status(title, 'meta title', text, 40, 56);
220
221
        if(title.length == 1){
222
            crawler.set_property('meta_titles', text, url);
223
        }
224
225
        return crawler_painter.add_row(this.name, [link, text, text.length, status]);
226
    },
227
228
    /**
229
     * Test the meta description for the page provided
230
     *
231
     * @param {string} url
232
     * @param {jQuery} html
233
     * @returns {undefined}
234
     */
235
    description_info: function(url, html){
236
        var desc    = html.filter( 'meta[name=description]' ),
237
            link    = crawler_painter.create_link(url, url),
238
            text    = (desc.length == 1) ? desc.attr('content') : '',
239
            status  = default_tests.get_meta_tags_status(desc, 'meta description', text, 70, 156);
240
241
        if( desc.length == 1 ){
242
            crawler.set_property('descriptions', text, url);
243
        }
244
245
        return crawler_painter.add_row(this.name, [link, text, text.length, status]);
246
    },
247
248
    /**
249
     * Test the canonical rules for the page provided
250
     *
251
     * @param {string} url
252
     * @param {jQuery} html
253
     * @returns {undefined}
254
     */
255
    canonical_info: function(url, html){
256
        var tags = default_tests.get_tags(html, 'link', 'rel', 'canonical');
257
258
        if(tags.length != 1) {
259
            var status = crawler_painter.create_status('error', 'Missing / Multiple canonicals found');
260
            crawler_painter.add_row(this.name, [crawler_painter.create_link(url, url), status]);
261
        }else{
262
            crawler.set_property('canonicals', tags[0].attr('href'), url);
263
        }
264
265
        return undefined;
266
    },
267
268
    /**
269
     * Check if the page provided has a no-index header
270
     *
271
     * @param {string} url
272
     * @param {jQuery} html
273
     * @returns {undefined}
274
     */
275
    noindex_pages: function(url, html) {
276
        if(default_tests.get_tags(html, 'meta', 'content', 'noindex').length > 0){
277
            crawler_painter.add_row(this.name, [crawler_painter.create_link(url, url)]);
278
            crawler_painter.set_type(this.name, 'error');
279
        }
280
281
        return undefined;
282
    },
283
284
    /**
285
     * Test the url passed for it's structure
286
     *
287
     * @param url
288
     * @returns {undefined}
289
     */
290
    urls_test: function(url){
291
        var link = crawler_painter.create_link(url, url),
292
            msg;
293
294
        if( url.length > 115 )                  msg = 'URL is too long';
295
        else if( url.toLowerCase() != url )     msg = 'URL is not in lower case';
296
        else if( url.replace('_','') !== url )  msg = 'URL contains under scores';
297
        else return undefined;
298
299
        return crawler_painter.add_row(this.name, [link, crawler_painter.create_status('warning', msg)]);
300
    },
301
302
    /**
303
     * Check for href lang tags in the page provided
304
     *
305
     * @param {string} url
306
     * @param {jQuery} html
307
     * @returns {undefined}
308
     */
309
    href_langs: function(url, html){
310
        var link    = crawler_painter.create_link(url, url),
311
            tags    = [];
312
313
        $.each( html.filter( 'link' ), function(){
314
            if( $(this).attr( 'hreflang' ) )
315
                tags.push( $('<p>').text( $(this).clone().wrap('<p>').parent().html() ).html() );
316
        });
317
318
        if( tags.length > 0 ) crawler_painter.add_row(this.name, [link, tags.join('<br />')] );
319
320
        return undefined;
321
    },
322
323
    /**
324
     * Returns a list of jQuery Objects that are of type {tag},
325
     * have an attribute {key} an it's value is {value}
326
     *
327
     * @param {jQuery} html
328
     * @param {string} tag
329
     * @param {string} key
330
     * @param {string} value
331
     * @returns {Array}
332
     */
333
    get_tags: function(html, tag, key, value){
334
        var returns = [];
335
336
        $.each(html.filter(tag), function(){
337
            var $this = $(this);
338
            if( $this.attr(key) && $this.attr(key) == value ){
339
                returns.push($this);
340
            }
341
        });
342
343
        return returns;
344
    },
345
346
    /**
347
     * Goes through an object and tries to find a key that has a value matching the value passed
348
     *
349
     * @param {*} object
350
     * @param {*} search
351
     * @returns {*}
352
     */
353
    get_key_from_object: function(object, search){
354
        for( var key in object ) if( object[key].indexOf(search) >= 0 ) return key;
355
        return undefined;
356
    },
357
358
    /**
359
     * Gets the status box for the meta tag being tested
360
     * Append to the crawler_painter
361
     *
362
     * @param {Array} tags
363
     * @param {string} tag_name
364
     * @param {string} text
365
     * @param {int} min_char
366
     * @param {int} max_char
367
     * @returns {jQuery}
368
     */
369
    get_meta_tags_status: function(tags, tag_name, text, min_char, max_char){
370
        if( tags.length > 1 ){
371
            return crawler_painter.create_status('error', 'Multiple '+tag_name+' tags');
372
        }else if( tags.length < 1 ){
373
            return crawler_painter.create_status('error', 'Missing '+tag_name+' tag');
374
        }else{
375
            var len = text.length;
376
            if(len < min_char){
377
                return crawler_painter.create_status('warning', tag_name+' is too short');
378
            }else if(len > max_char){
379
                return crawler_painter.create_status('warning', tag_name+' is too long');
380
            }else{
381
                return crawler_painter.create_status('success', 'OK!');
382
            }
383
        }
384
    }
385
}
386
387
// Register the tests
388
crawler.on('BEFORE_INIT', function(){
389
    for( var t in default_tests.tests ){
390
        var test = default_tests.tests[t],
391
            func = default_tests.hasOwnProperty( test[0] ) ? default_tests[test[0]] : false;
392
393
        crawler.regiser_test(test[0], test[1], test[2], func);
394
        crawler_painter.set_type(test[0], test[3] || 'default');
395
    }
396
});
397
398
// When a crawl fails add an error page
399
crawler.on('CRAWL_LOAD_FAILED', function(url){
400
    crawler_painter.add_row('error_pages', [url]);
401
    crawler_painter.set_type('error_pages', 'error');
402
});
403
404
// When crawler is done check for orphan pages
405
crawler.on('ALL_CRAWLS_FINISHED', function(){
406
    pages_loop:
407
        for( var i in crawler.tested ){
408
            var url = crawler.tested[i];
409
410
            if( crawler.failed.indexOf(url) >= 0 ){
411
                continue;
412
            }
413
414
            if( crawler.linked_from.hasOwnProperty(url) ) {
415
                for (var x in crawler.linked_from[url])
416
                    if (crawler.linked_from[url][x] != url) continue pages_loop;
417
            }
418
419
            crawler.add_row('orphan_pages', [crawler_painter.create_link(crawler.tested[i], crawler.tested[i])]);
420
            crawler_painter.set_type('orphan_pages', 'error');
421
        }
422
423
    return true;
424
});
425
426
// When crawler is done check for orphan pages
427
crawler.on('ALL_CRAWLS_FINISHED', function(){
428
    var canonicals = crawler.canonicals,
429
        tests      = {
430
            'meta_titles'   : 'Urls have same meta title but different canonicals',
431
            'descriptions'  : 'Urls have same meta description but different canonicals'
432
        };
433
434
    for(var test in tests){
435
        for(var x in crawler[test]){
436
            var urls = crawler[test][x];
437
            if( urls < 2 ) continue;
438
            var canonical = default_tests.get_key_from_object(canonicals, urls[0]);
439
            for( var i in urls )
440
                if( canonical != default_tests.get_key_from_object(canonicals, urls[i]) ) {
441
                    var status = crawler_painter.create_status('error', tests[test]);
442
                    crawler_painter.add_row('duplicate_meta_tags', [urls.join(', '), status]);
443
                    break;
444
                }
445
        }
446
    }
447
448
    return undefined;
449
});
450