Passed
Push — develop ( 2fcfd9...1545da )
by Dylan
02:41
created

crawler.CRAWL_FINISHED   C

Complexity

Conditions 7
Paths 6

Size

Total Lines 23

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 1 Features 0
Metric Value
dl 0
loc 23
rs 6.7272
c 1
b 1
f 0
cc 7
nc 6
nop 0
1
const default_tests = [
2
    {
3
        name: 'error_pages',
4
        title: 'ERROR PAGES',
5
        headers: ['URL'],
6
        type: 'success'
7
    },
8
9
    {
10
        name : 'h1_info',
11
        title: 'H1 INFO',
12
        headers: ['URL', 'Count', 'Text', 'Status'],
13
        callback: function(cont, url, html){
14
            var h1      = html.find( 'h1' ),
15
                link    = crawler_painter.create_link(url, url),
16
                joined  = [],
17
                status;
18
19
            h1.each(function(){ joined.push(this.innerHTML); });
20
21
            if(h1.length != 1)
22
                status = crawler_painter.create_status('error', (h1.length < 1) ? 'Missing H1' : 'Multiple H1 tags');
23
            else status = crawler_painter.create_status('success', 'OK!');
24
25
            crawler_painter.add_row(this.name, [link, h1.length, joined.join(', '), status]);
26
27
            return true;
28
        }
29
    },
30
31
    {
32
        name : 'h2_info',
33
        title: 'H2 INFO',
34
        headers: ['URL', 'Count', 'Text', 'Status'],
35
        callback: function(cont, url, html){
36
            var h2      = html.find( 'h2' ),
37
                link    = crawler_painter.create_link(url, url),
38
                joined  = [], status;
39
40
            h2.each(function(){ joined.push(this.innerHTML); });
41
42
            if(h2.length < 1) status = crawler_painter.create_status('warning', 'Missing H2');
43
            else status = crawler_painter.create_status('success', 'OK!');
44
45
            crawler_painter.add_row(this.name, [link, h2.length, joined.join(', '), status]);
46
47
            return true;
48
        }
49
    },
50
51
    {
52
        name : 'word_count',
53
        title: 'WORD COUNT',
54
        headers: ['URL', 'Word Count', 'Article Word Count'],
55
        callback: function(cont, url, html, headers, field_data, phrases){
56
            var link        = crawler_painter.create_link(url, url),
57
                word_count  = crawler.get_word_count(phrases),
58
                art_count   = crawler.get_word_count(field_data[3]);
59
60
            crawler_painter.add_row(this.name, [link, word_count, art_count]);
61
62
            return true;
63
        }
64
    },
65
66
    {
67
        name : 'int_link_info',
68
        title: 'INTERNAL LINK INFO',
69
        headers: ['URL', 'Article Links', 'Article Link Count', 'Article Density',
70
                    'Total Link Count', 'Total Density', 'Status'],
71
        type: 'info',
72
        callback: function(cont, url, html, headers, field_data, phrases){
73
            var link = crawler_painter.create_link(url, url),
74
                art_links = [], links = [];
75
76
            // Article links
77
            for( var field in field_data[2] ) {
78
                $.each($(field_data[2][field]).find('a'), function () {
79
                    var href = $(this).attr('href');
80
                    if(href && !crawler.is_external(href) && !crawler.is_anchor(href, url)) art_links.push(href);
81
                });
82
            }
83
84
            // Full page links
85
            $.each(html.find('a'), function () {
86
                var href = $(this).attr('href');
87
                if(href && !crawler.is_external(href) && !crawler.is_anchor(href, url)) links.push(href);
88
            });
89
90
            var art_word_count  = crawler.get_word_count(field_data[3]),
91
                art_density     = (art_links.length > 0) ? art_word_count / art_links.length : false,
92
                art_dens_text   = (art_density != false) ? art_density.toFixed(2) +' words/link' : 'No internal links',
93
                word_count      = crawler.get_word_count(phrases),
94
                density         = (links.length > 0) ? word_count / links.length : false,
95
                dens_text       = (density != false) ? density.toFixed(2) +' words/link' : 'No internal links',
96
                status          = crawler_painter.create_status('success', 'OK!');
97
98
            if( ( art_density !== false && art_density < 100 ) )
99
                status = crawler_painter.create_status('warning', 'This page might be considered spammy');
100
101
            if(links.length > 0)
102
                crawler_painter.add_row( this.name, [
103
                    link, art_links.join('<br />'), art_links.length, art_dens_text, links.length, dens_text, status
104
                ]);
105
106
            return true;
107
        }
108
    },
109
110
    {
111
        name : 'ext_link_info',
112
        title: 'EXTERNAL LINK INFO',
113
        headers: ['URL', 'External Link Count', 'External Links'],
114
        type: 'success',
115
        callback: function(cont, url, html, headers, field_data){
116
            var link = crawler_painter.create_link(url, url),
117
                links = [];
118
119
            for( var field in field_data[2] ) {
120
                $.each($(field_data[2][field]).find('a'), function () {
121
                    var $this = $(this),
122
                        href = $this.attr('href');
123
                    if(href && crawler.is_external(href)){
124
                        var type = ( !$this.attr('rel') || $this.attr('rel').toLowerCase().indexOf('nofollow') < 0 )
125
                            ? 'warning' : 'info';
126
                        links.push(
127
                            $('<div class="clearfix"></div>').append([
128
                                crawler_painter.create_status(type, href),
129
                                '<p>&nbsp;</p>'
130
                            ])
131
                        );
132
                    }
133
                });
134
            }
135
136
            if(links.length > 0) crawler_painter.add_row(this.name, [link, links.length, links]);
137
138
            return true;
139
        }
140
    },
141
142
    {
143
        name : 'img_info',
144
        title: 'IMAGE INFO',
145
        headers: ['URL', 'Count', 'Missing Alt Tag', 'Missing Title Tag', 'Fields Missing Images', 'Status'],
146
        type: 'success',
147
        callback: function(cont, url, html, headers, field_data) {
148
            var link = crawler_painter.create_link(url, url),
149
                imgs = html.find('img'),
150
                alt = 0, title = 0, fields = [], status = '';
151
152
            // Check alt and title tags
153
            $.each(imgs, function () {
154
                var $this = $(this);
155
                if (!$this.attr('alt') || $this.attr('alt').length < 1) alt += 1;
156
                if (!$this.attr('title') || $this.attr('title').length < 1) title += 1;
157
            });
158
159
            // Check the fields
160
            for (var f in field_data[2]) if ($(field_data[2][f]).find('img').length < 1) fields.push(field_data[1][f]);
161
162
            // Construct Result
163
            if (alt > 0)
164
                status = crawler_painter.create_status('error',
165
                    (alt > 1) ? alt + ' images missing alt tag' : '1 image missing alt tag');
166
            else if(fields.length > 0)
167
                status = crawler_painter.create_status('warning',
168
                    (fields.length > 1) ? fields.join(' and ') + ' are missing images' : fields[0] + ' is missing images');
169
            else if(title > 0)
170
                status = crawler_painter.create_status('info',
171
                    (title > 1) ? title + ' images missing title tag' : '1 image is missing title tag');
172
            else
173
                status = crawler_painter.create_status('success', 'OK!');
174
175
            crawler_painter.add_row(this.name, [link, imgs.length, alt, title, fields.join(', '), status]);
176
177
            return true;
178
        }
179
    },
180
181
    {
182
        name: 'title_info',
183
        title: 'META TITLE',
184
        headers: ['URL', 'Meta Title', 'Length', 'Status'],
185 View Code Duplication
        callback: function(cont, url, html){
186
            var title   = html.filter( 'title' ),
187
                link    = crawler_painter.create_link(url, url),
188
                text    = '', len = 0,
189
                status  = crawler_painter.create_status('success', 'OK!');
190
191
            if( title.length > 1 ){
192
                text = 'Multiple Titles';
193
                len  = 'N/A';
194
                status = crawler_painter.create_status('error', 'Multiple title tags');
195
            }else if( title.length < 1 ){
196
                status = crawler_painter.create_status('error', 'Missing title tag');
197
            }else{
198
                text = title.html();
199
                len = text.length;
200
                if(len < 40) status = crawler_painter.create_status('warning', 'Meta title is too short');
201
                else if(len > 56) status = crawler_painter.create_status('warning', 'Meta title is too long');
202
            }
203
204
            crawler_painter.add_row(this.name, [link, text, len, status]);
205
            if(!crawler.hasOwnProperty('meta_titles')) crawler.meta_titles = {};
206
            if(!crawler.meta_titles.hasOwnProperty(text)) crawler.meta_titles[text] = [url];
207
            else crawler.meta_titles[text].push(url);
208
209
            return true;
210
        }
211
    },
212
213
    {
214
        name: 'description_info',
215
        title: 'META DESCRIPTION',
216
        headers: ['URL', 'Meta Description', 'Length', 'Status'],
217 View Code Duplication
        callback: function(cont, url, html){
218
            var desc = html.filter( 'meta[name=description]' ),
219
                link  = crawler_painter.create_link(url, url),
220
                text  = '', len = 0, status;
221
222
            if( desc.length > 1 ){
223
                text = 'Multiple Meta Descriptions';
224
                len  = 'N/A';
225
                status = crawler_painter.create_status('error', 'Multiple meta description tags');
226
            }else if( desc.length < 1 ){
227
                status = crawler_painter.create_status('error', 'Missing meta description tag');
228
            }else{
229
                text = desc.attr('content');
230
                len = text.length;
231
                if(len < 70) status = crawler_painter.create_status('warning', 'Meta description is too short');
232
                else if(len > 156) status = crawler_painter.create_status('warning', 'Meta description is too long');
233
                else status = crawler_painter.create_status('success', 'OK!');
234
235
                if(!crawler.hasOwnProperty('descriptions')) crawler.descriptions = {};
236
                if(!crawler.descriptions.hasOwnProperty(text)) crawler.descriptions[text] = [url];
237
                else crawler.descriptions[text].push(url);
238
            }
239
240
            crawler_painter.add_row(this.name, [link, text, len, status]);
241
242
            return true;
243
        }
244
    },
245
246
    {
247
        name: 'canonical_info',
248
        title: 'PAGES MISSING CANONICAL',
249
        headers: ['URL'],
250
        type: 'success',
251
        callback: function(cont, url, html){
252
            var tags = html.filter( 'link' ), canonical;
253
254
            for( var i = 0; i < tags.length; i++ ) {
255
                var rel = $(tags[i]).attr('rel');
256
                if( rel && rel.toLowerCase() === 'canonical' ) {
257
                    canonical = $(tags[i]).attr('rel');
258
                    break;
259
                }
260
            }
261
262
            if(canonical === undefined || canonical.length < 1) {
263
                crawler_painter.add_row(this.name, [crawler_painter.create_link(url, url)]);
264
                crawler_painter.set_type(this.name, 'error');
265
            }
266
267
            canonical = url; // What Google will do
268
            if(!crawler.hasOwnProperty('canonicals')) crawler.canonicals = {};
269
            if(!crawler.canonicals.hasOwnProperty(canonical)) crawler.canonicals[canonical] = [url];
270
            else crawler.canonicals[canonical].push(url);
271
272
            return true;
273
        }
274
    },
275
276
    {
277
        name: 'no-index_pages',
278
        title: 'NO-INDEX PAGES',
279
        headers: ['URL'],
280
        type: 'success',
281
        callback: function(cont, url, html){
282
            var tags = html.filter( 'meta' );
283
            for( var i = 0; i < tags.length; i++ )
284
                if( $(tags[i]).attr( 'name' ) && $(tags[i]).attr( 'name' ).toLowerCase() === 'robots' &&
285
                        $(tags[i]).attr('content').toLowerCase().indexOf( 'noindex' ) > -1 ) {
286
                    crawler_painter.add_row(this.name, [crawler_painter.create_link(url, url)]);
287
                    crawler_painter.set_type(this.name, 'warning');
288
                    return;
289
                }
290
291
            return true;
292
        }
293
    },
294
295
    {
296
        name: 'urls_test',
297
        title: 'URL STRUCTURE',
298
        headers: ['URL', 'Status'],
299
        type: 'success',
300
        callback: function(cont, url){
301
            var link = crawler_painter.create_link(url, url),
302
                msg;
303
304
            if( url.length > 115 )                  msg = 'URL is too long';
305
            else if( url.toLowerCase() != url )     msg = 'URL is not in lower case';
306
            else if( url.replace('_','') !== url )  msg = 'URL contains under scores';
307
            else return true;
308
309
            crawler_painter.add_row(this.name, [link, crawler_painter.create_status('warning', msg)]);
310
311
            return true;
312
        }
313
    },
314
315
    {
316
        name: 'duplicate_meta_tags',
317
        title: 'DUPLICATE META TAGS',
318
        headers: ['URL', 'Status'],
319
        type: 'success',
320
        callback: function(){
321
            var canonicals = crawler.canonicals,
322
                tests      = {
323
                    'meta_titles'   : 'Urls have same meta title but different canonicals',
324
                    'descriptions'  : 'Urls have same meta description but different canonicals'
325
                };
326
327
            // Reset table
328
            crawler_painter.reset_table(this.name, 'success');
329
330
            for(var test in tests){
331
                for(var x in crawler[test]){
332
                    var urls = crawler[test][x];
333
                    if( urls < 2 ) continue;
334
                    var canonical = getKeyFromObject(canonicals, urls[0]);
335
                    for( var i in urls )
336
                        if( canonical != getKeyFromObject(canonicals, urls[i]) ) {
337
                            crawler_painter.add_row(
338
                                this.name,
339
                                [urls.join(', '), crawler_painter.create_status('error', tests[test])]
340
                            );
341
                            break;
342
                        }
343
                }
344
            }
345
346
            function getKeyFromObject(object, search){
347
                for( var key in object ) if( object[key].indexOf(search) >= 0 ) return key;
348
                return undefined;
349
            }
350
351
            return true;
352
        }
353
    },
354
355
    {
356
        name: 'href_langs',
357
        title: 'LANG TAGS',
358
        headers: ['URL', 'Tags'],
359
        type: 'info',
360
        callback: function(cont, url, html){
361
            var link    = crawler_painter.create_link(url, url),
362
                tags    = [];
363
364
            $.each( html.filter( 'link' ), function(){
365
                if( $(this).attr( 'hreflang' ) )
366
                    tags.push( $('<p>').text( $(this).clone().wrap('<p>').parent().html() ).html() );
367
            });
368
369
            if( tags.length > 0 ) crawler_painter.add_row(this.name, [link, tags.join('<br />')] );
370
371
            return true;
372
        }
373
    },
374
375
    {
376
        name: 'orphan_pages',
377
        title: 'ORPHAN PAGES',
378
        headers: ['URL']
379
    }
380
];
381
382
crawler.on('CRAWL_LOAD_FAILED', function(url){
383
    crawler_painter.add_row('error_pages', [url]);
384
    crawler_painter.set_type('error_pages', 'error');
385
});
386
387
crawler.on('CRAWL_FINISHED', function(){
388
    if(crawler.que.length > 0) return true;
389
    crawler_painter.reset_table('orphan_pages', 'success');
390
391
    pages_loop:
392
        for( var i in crawler.tested ){
393
            var url = crawler.tested[i];
394
395
            if( crawler.failed.indexOf(url) >= 0 ){
396
                continue pages_loop;
397
            }
398
399
            if( crawler.linked_from.hasOwnProperty(url) ) {
400
                for (var x in crawler.linked_from[url])
401
                    if (crawler.linked_from[url][x] != url) continue pages_loop;
402
            }
403
404
            crawler.add_row('orphan_pages', [crawler_painter.create_link(crawler.tested[i], crawler.tested[i])]);
405
            crawler_painter.set_type('orphan_pages', 'error');
406
        }
407
408
    return true;
409
});
410