Completed
Push — develop ( 7852a2...d7f552 )
by Dylan
03:02
created

crawler.ALL_CRAWLS_FINISHED   B

Complexity

Conditions 6
Paths 5

Size

Total Lines 22

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
dl 0
loc 22
rs 8.6737
c 1
b 0
f 0
cc 6
nc 5
nop 0
1
const default_tests = [
2
    {
3
        name: 'error_pages',
4
        title: 'ERROR PAGES',
5
        headers: ['URL'],
6
        type: 'success'
7
    },
8
9
    {
10
        name : 'h1_info',
11
        title: 'H1 INFO',
12
        headers: ['URL', 'Count', 'Text', 'Status'],
13
        callback: function(cont, url, html){
14
            var h1      = html.find( 'h1' ),
15
                link    = crawler_painter.create_link(url, url),
16
                joined  = [],
17
                status;
18
19
            h1.each(function(){ joined.push(this.innerHTML); });
20
21
            if(h1.length != 1)
22
                status = crawler_painter.create_status('error', (h1.length < 1) ? 'Missing H1' : 'Multiple H1 tags');
23
            else status = crawler_painter.create_status('success', 'OK!');
24
25
            crawler_painter.add_row(this.name, [link, h1.length, joined.join(', '), status]);
26
27
            return true;
28
        }
29
    },
30
31
    {
32
        name : 'h2_info',
33
        title: 'H2 INFO',
34
        headers: ['URL', 'Count', 'Text', 'Status'],
35
        callback: function(cont, url, html){
36
            var h2      = html.find( 'h2' ),
37
                link    = crawler_painter.create_link(url, url),
38
                joined  = [], status;
39
40
            h2.each(function(){ joined.push(this.innerHTML); });
41
42
            if(h2.length < 1) status = crawler_painter.create_status('warning', 'Missing H2');
43
            else status = crawler_painter.create_status('success', 'OK!');
44
45
            crawler_painter.add_row(this.name, [link, h2.length, joined.join(', '), status]);
46
47
            return true;
48
        }
49
    },
50
51
    {
52
        name : 'word_count',
53
        title: 'WORD COUNT',
54
        headers: ['URL', 'Word Count', 'Article Word Count'],
55
        callback: function(cont, url, html, headers, field_data, phrases){
56
            var link        = crawler_painter.create_link(url, url),
57
                word_count  = crawler.get_word_count(phrases),
58
                art_count   = crawler.get_word_count(field_data[3]);
59
60
            crawler_painter.add_row(this.name, [link, word_count, art_count]);
61
62
            return true;
63
        }
64
    },
65
66
    {
67
        name : 'int_link_info',
68
        title: 'INTERNAL LINK INFO',
69
        headers: ['URL', 'Article Links', 'Article Link Count', 'Article Density',
70
                    'Total Link Count', 'Total Density', 'Status'],
71
        type: 'info',
72
        callback: function(cont, url, html, headers, field_data, phrases){
73
            var link = crawler_painter.create_link(url, url),
74
                art_links = [], links = [];
75
76
            // Article links
77
            for( var field in field_data[2] ) {
78
                $.each($(field_data[2][field]).find('a'), function () {
79
                    var href = $(this).attr('href');
80
                    if(href && !crawler.is_external(href) && !crawler.is_anchor(href, url)) art_links.push(href);
81
                });
82
            }
83
84
            // Full page links
85
            $.each(html.find('a'), function () {
86
                var href = $(this).attr('href');
87
                if(href && !crawler.is_external(href) && !crawler.is_anchor(href, url)) links.push(href);
88
            });
89
90
            var art_word_count  = crawler.get_word_count(field_data[3]),
91
                art_density     = (art_links.length > 0) ? art_word_count / art_links.length : false,
92
                art_dens_text   = (art_density != false) ? art_density.toFixed(2) +' words/link' : 'No internal links',
93
                word_count      = crawler.get_word_count(phrases),
94
                density         = (links.length > 0) ? word_count / links.length : false,
95
                dens_text       = (density != false) ? density.toFixed(2) +' words/link' : 'No internal links',
96
                status          = crawler_painter.create_status('success', 'OK!');
97
98
            if( ( art_density !== false && art_density < 100 ) )
99
                status = crawler_painter.create_status('warning', 'This page might be considered spammy');
100
101
            if(links.length > 0)
102
                crawler_painter.add_row( this.name, [
103
                    link, art_links.join('<br />'), art_links.length, art_dens_text, links.length, dens_text, status
104
                ]);
105
106
            return true;
107
        }
108
    },
109
110
    {
111
        name : 'ext_link_info',
112
        title: 'EXTERNAL LINK INFO',
113
        headers: ['URL', 'External Link Count', 'External Links'],
114
        type: 'success',
115
        callback: function(cont, url, html, headers, field_data){
116
            var link = crawler_painter.create_link(url, url),
117
                links = [];
118
119
            for( var field in field_data[2] ) {
120
                $.each($(field_data[2][field]).find('a'), function () {
121
                    var $this = $(this),
122
                        href = $this.attr('href');
123
                    if(href && crawler.is_external(href)){
124
                        var type = ( !$this.attr('rel') || $this.attr('rel').toLowerCase().indexOf('nofollow') < 0 )
125
                            ? 'warning' : 'info';
126
                        links.push(
127
                            $('<div class="clearfix"></div>').append([
128
                                crawler_painter.create_status(type, href),
129
                                '<p>&nbsp;</p>'
130
                            ])
131
                        );
132
                    }
133
                });
134
            }
135
136
            if(links.length > 0) crawler_painter.add_row(this.name, [link, links.length, links]);
137
138
            return true;
139
        }
140
    },
141
142
    {
143
        name : 'img_info',
144
        title: 'IMAGE INFO',
145
        headers: ['URL', 'Count', 'Missing Alt Tag', 'Missing Title Tag', 'Fields Missing Images', 'Status'],
146
        type: 'success',
147
        callback: function(cont, url, html, headers, field_data) {
148
            var link = crawler_painter.create_link(url, url),
149
                imgs = html.find('img'),
150
                alt = 0, title = 0, fields = [], status = '';
151
152
            // Check alt and title tags
153
            $.each(imgs, function () {
154
                var $this = $(this);
155
                if (!$this.attr('alt') || $this.attr('alt').length < 1) alt += 1;
156
                if (!$this.attr('title') || $this.attr('title').length < 1) title += 1;
157
            });
158
159
            // Check the fields
160
            for (var f in field_data[2]) if ($(field_data[2][f]).find('img').length < 1) fields.push(field_data[1][f]);
161
162
            // Construct Result
163
            if (alt > 0)
164
                status = crawler_painter.create_status('error',
165
                    (alt > 1) ? alt + ' images missing alt tag' : '1 image missing alt tag');
166
            else if(fields.length > 0)
167
                status = crawler_painter.create_status('warning',
168
                    (fields.length > 1) ? fields.join(' and ') + ' are missing images' : fields[0] + ' is missing images');
169
            else if(title > 0)
170
                status = crawler_painter.create_status('info',
171
                    (title > 1) ? title + ' images missing title tag' : '1 image is missing title tag');
172
            else
173
                status = crawler_painter.create_status('success', 'OK!');
174
175
            crawler_painter.add_row(this.name, [link, imgs.length, alt, title, fields.join(', '), status]);
176
177
            return true;
178
        }
179
    },
180
181
    {
182
        name: 'title_info',
183
        title: 'META TITLE',
184
        headers: ['URL', 'Meta Title', 'Length', 'Status'],
185
        callback: function(cont, url, html){
186
            var title   = html.filter( 'title' ),
187
                link    = crawler_painter.create_link(url, url),
188
                text    = (title.length == 1) ? title.html() : '',
189
                status  = crawler_painter.get_meta_tags_status(title, 'meta title', text, 40, 56);
190
191
            if(title.length == 1){
192
                crawler.set_property('meta_titles', text, url);
193
            }
194
195
            crawler_painter.add_row(this.name, [link, text, text.length, status]);
196
197
            return true;
198
        }
199
    },
200
201
    {
202
        name: 'description_info',
203
        title: 'META DESCRIPTION',
204
        headers: ['URL', 'Meta Description', 'Length', 'Status'],
205
        callback: function(cont, url, html){
206
            var desc    = html.filter( 'meta[name=description]' ),
207
                link    = crawler_painter.create_link(url, url),
208
                text    = (desc.length == 1) ? desc.attr('content') : '',
209
                status  = crawler_painter.get_meta_tags_status(desc, 'meta description', text, 70, 156);
210
211
            if( desc.length == 1 ){
212
                crawler.set_property('descriptions', text, url);
213
            }
214
215
            crawler_painter.add_row(this.name, [link, text, text.length, status]);
216
217
            return true;
218
        }
219
    },
220
221
    {
222
        name: 'canonical_info',
223
        title: 'CANONICAL INFO',
224
        headers: ['URL', 'Status'],
225
        type: 'success',
226
        callback: function(cont, url, html){
227
            var tags = html.filter( 'link' ), canonical;
228
229
            for( var i = 0; i < tags.length; i++ ) {
230
                var rel = $(tags[i]).attr('rel');
231
                if( rel && rel.toLowerCase() === 'canonical' ) {
232
                    canonical = $(tags[i]).attr('rel');
233
                    break;
234
                }
235
            }
236
237
            if(canonical === undefined || canonical.length != 1) {
238
                var status = crawler_painter.create_status('error', 'Missing / Multiple canonicals found');
239
                crawler_painter.add_row(this.name, [crawler_painter.create_link(url, url), status]);
240
            }else{
241
                crawler.set_property('canonicals', canonical, url);
242
            }
243
244
            return true;
245
        }
246
    },
247
248
    {
249
        name: 'no-index_pages',
250
        title: 'NO-INDEX PAGES',
251
        headers: ['URL'],
252
        type: 'success',
253
        callback: function(cont, url, html){
254
            var tags = html.filter( 'meta' );
255
            for( var i = 0; i < tags.length; i++ )
256
                if( $(tags[i]).attr( 'name' ) && $(tags[i]).attr( 'name' ).toLowerCase() === 'robots' &&
257
                        $(tags[i]).attr('content').toLowerCase().indexOf( 'noindex' ) > -1 ) {
258
                    crawler_painter.add_row(this.name, [crawler_painter.create_link(url, url)]);
259
                    crawler_painter.set_type(this.name, 'warning');
260
                    return;
261
                }
262
263
            return true;
264
        }
265
    },
266
267
    {
268
        name: 'urls_test',
269
        title: 'URL STRUCTURE',
270
        headers: ['URL', 'Status'],
271
        type: 'success',
272
        callback: function(cont, url){
273
            var link = crawler_painter.create_link(url, url),
274
                msg;
275
276
            if( url.length > 115 )                  msg = 'URL is too long';
277
            else if( url.toLowerCase() != url )     msg = 'URL is not in lower case';
278
            else if( url.replace('_','') !== url )  msg = 'URL contains under scores';
279
            else return true;
280
281
            crawler_painter.add_row(this.name, [link, crawler_painter.create_status('warning', msg)]);
282
283
            return true;
284
        }
285
    },
286
287
    {
288
        name: 'duplicate_meta_tags',
289
        title: 'DUPLICATE META TAGS',
290
        headers: ['URL', 'Status'],
291
        type: 'success',
292
        callback: function(){
293
            var canonicals = crawler.canonicals,
294
                tests      = {
295
                    'meta_titles'   : 'Urls have same meta title but different canonicals',
296
                    'descriptions'  : 'Urls have same meta description but different canonicals'
297
                };
298
299
            // Reset table
300
            crawler_painter.reset_table(this.name, 'success');
301
302
            for(var test in tests){
303
                for(var x in crawler[test]){
304
                    var urls = crawler[test][x];
305
                    if( urls < 2 ) continue;
306
                    var canonical = getKeyFromObject(canonicals, urls[0]);
307
                    for( var i in urls )
308
                        if( canonical != getKeyFromObject(canonicals, urls[i]) ) {
309
                            crawler_painter.add_row(
310
                                this.name,
311
                                [urls.join(', '), crawler_painter.create_status('error', tests[test])]
312
                            );
313
                            break;
314
                        }
315
                }
316
            }
317
318
            function getKeyFromObject(object, search){
319
                for( var key in object ) if( object[key].indexOf(search) >= 0 ) return key;
320
                return undefined;
321
            }
322
323
            return true;
324
        }
325
    },
326
327
    {
328
        name: 'href_langs',
329
        title: 'LANG TAGS',
330
        headers: ['URL', 'Tags'],
331
        type: 'info',
332
        callback: function(cont, url, html){
333
            var link    = crawler_painter.create_link(url, url),
334
                tags    = [];
335
336
            $.each( html.filter( 'link' ), function(){
337
                if( $(this).attr( 'hreflang' ) )
338
                    tags.push( $('<p>').text( $(this).clone().wrap('<p>').parent().html() ).html() );
339
            });
340
341
            if( tags.length > 0 ) crawler_painter.add_row(this.name, [link, tags.join('<br />')] );
342
343
            return true;
344
        }
345
    },
346
347
    {
348
        name: 'orphan_pages',
349
        title: 'ORPHAN PAGES',
350
        headers: ['URL']
351
    }
352
];
353
354
crawler.on('CRAWL_LOAD_FAILED', function(url){
355
    crawler_painter.add_row('error_pages', [url]);
356
    crawler_painter.set_type('error_pages', 'error');
357
});
358
359
crawler.on('ALL_CRAWLS_FINISHED', function(){
360
    crawler_painter.reset_table('orphan_pages', 'success');
361
362
    pages_loop:
363
        for( var i in crawler.tested ){
364
            var url = crawler.tested[i];
365
366
            if( crawler.failed.indexOf(url) >= 0 ){
367
                continue;
368
            }
369
370
            if( crawler.linked_from.hasOwnProperty(url) ) {
371
                for (var x in crawler.linked_from[url])
372
                    if (crawler.linked_from[url][x] != url) continue pages_loop;
373
            }
374
375
            crawler.add_row('orphan_pages', [crawler_painter.create_link(crawler.tested[i], crawler.tested[i])]);
376
            crawler_painter.set_type('orphan_pages', 'error');
377
        }
378
379
    return true;
380
});
381
382
/**
383
 * Gets the status box for the meta tag being tested
384
 * Append to the crawler_painter
385
 *
386
 * @param {Array} tags
387
 * @param {string} tag_name
388
 * @param {string} text
389
 * @param {int} min_char
390
 * @param {int} max_char
391
 * @returns {jQuery}
392
 */
393
crawler_painter.get_meta_tags_status = function(tags, tag_name, text, min_char, max_char){
394
    if( tags.length > 1 ){
395
        return this.create_status('error', 'Multiple '+tag_name+' tags');
396
    }else if( tags.length < 1 ){
397
        return this.create_status('error', 'Missing '+tag_name+' tag');
398
    }else{
399
        var len = text.length;
400
        if(len < min_char){
401
            return this.create_status('warning', tag_name+' is too short');
402
        }else if(len > max_char){
403
            return this.create_status('warning', tag_name+' is too long');
404
        }else{
405
            return this.create_status('success', 'OK!');
406
        }
407
    }
408
};
409