crawler.ALL_CRAWLS_FINISHED - Code Metrics - Inspection of "#12 Bug & QA Fixes" - dylangrech92/seotoolbox - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — develop ( 7852a2...d7f552 )

by Dylan

created 2016-09-29 07:00 UTC

crawler.ALL_CRAWLS_FINISHED A

↳ Parent: js/crawler_file_tester.js

Complexity

Conditions	1
Paths	1

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
dl	0
loc	3
rs	10
c	0
b	0
f	0
cc	1
nc	1
nop	0

const crawler_file_tester = {

    robot_rules: [],

    /**
     * Parse the content of the robots file
     *
     * @param {*} result
     * @throws {Exception}
     */
    parse_robots_file: function(result){
        var rules = result.split("\n");
        crawler_painter.add_row('file_tests', [ crawler_painter.create_status('success', 'Robots file loaded') ]);

        var agent = '*';
        for(var r in rules){
            if( rules[r].length < 1 || rules[r].toLowerCase().indexOf('sitemap:') >= 0 ){
                continue;
            }else if( rules[r].toLowerCase().indexOf('user-agent:') >= 0 ){
                agent = rules[r].replace(/user-agent:/gi, '').replace(/^\s+|\s+$|\s+(?=\s)/g, '');
            }else if( rules[r].toLowerCase().indexOf('disallow:') >= 0 ){
                var rule =
                    '^'+rules[r]
                    .replace(/disallow:/gi, '') // remove disallow
                    .replace(/^\s+|\s+$|\s+(?=\s)/g, '') // remove white space
                    .replace('?', '\\?') // escape query string start
                    .replace('|', '\\|') // escape pipe
                    .replace('/', '\\/') // escape slashes
                    .replace(/^\^\^/g, '^') // If it already had a caret remove it
                    .replace(/^(\*)/g, '(.*?)'); // Replace star with match anything modifier
                crawler_file_tester.robot_rules.push({ 'rule': rule, 'agent': agent, 'original': rules[r] });
            }else{
                console.log(rules[r]);
                throw "Found a rule which we don't understand. Report it to the developer";
            }
        }
    },

    /**
     * Check all tested url and see if they are blocked by any rule in the robots file
     *
     * @returns {undefined}
     */
    test_blocked_pages: function(){
        for(var t in crawler.tested){
            var url = crawler.tested[t];

            if( crawler.linked_from.hasOwnProperty(url) ) {
                for (var r in this.robot_rules) {
                    var regex = new RegExp(this.robot_rules[r]['rule'], 'g');
                    if (regex.test('/' + url)) {
                        var link    = crawler_painter.create_link(url, url),
                            status  = crawler_painter.create_status('error', 'Page has links and is blocked in robots'),
                            agent   = ( this.robot_rules[r]['agent'] == '*' ) ? 'ALL BOTS' : this.robot_rules[r]['agent'];
                        crawler_painter.add_row(
                            'blocked_pages',
                            [link, crawler.linked_from[url].join(', '), agent, this.robot_rules[r]['original'], status]);
                    }
                }
            }
        }

        return undefined;
    },

    /**
     * Setup an ajax call to fetch url
     *
     * @param {string} url
     * @param {function} callback
     * @param {function} failed_callback
     */
    get_file_contents: function(url, callback, failed_callback){
        $.ajax({
            'url': crawler.get_proxy('/seotest/getPage?u='+url+'&agent='+crawler.agent)
        }).done(callback).fail(failed_callback);
    },

    /**
     * Start testing the robots page
     */
    init_robots_tester: function(){



    },


};

// Register the tests
crawler.on('BEFORE_INIT', function(){
    crawler.regiser_test('file_tests', 'FILE TESTS', ['Status'], false);
    crawler.regiser_test('blocked_pages', 'BLOCKED PAGES', ['URL', 'Linked From', 'Blocked For', 'Blocked By', 'Status'], false);

    crawler_painter.set_type('blocked_pages', 'success');
});

// Start up the file testers
crawler.on('AFTER_INIT', function(){
    crawler_file_tester.get_file_contents(
        crawler.robots_url,
        crawler_file_tester.parse_robots_file,
        function(){ crawler_painter.add_status_row('file_tests', 'error', 'Failed to load robots file'); }
    );
    //crawler_file_tester.init_sitemap_tester();
});

// Test for blocked pages the the crawler finishes
crawler.on('ALL_CRAWLS_FINISHED', function(){
    crawler_file_tester.test_blocked_pages();
});



1			const crawler_file_tester = {
2
3			robot_rules: [],
4
5			/**
6			* Parse the content of the robots file
7			*
8			* @param {*} result
9			* @throws {Exception}
10			*/
11			parse_robots_file: function(result){
12			var rules = result.split("\n");
13			crawler_painter.add_row('file_tests', [ crawler_painter.create_status('success', 'Robots file loaded') ]);
14
15			var agent = '*';
16			for(var r in rules){
17			if( rules[r].length < 1 \|\| rules[r].toLowerCase().indexOf('sitemap:') >= 0 ){
18			continue;
19			}else if( rules[r].toLowerCase().indexOf('user-agent:') >= 0 ){
20			agent = rules[r].replace(/user-agent:/gi, '').replace(/^\s+\|\s+$\|\s+(?=\s)/g, '');
21			}else if( rules[r].toLowerCase().indexOf('disallow:') >= 0 ){
22			var rule =
23			'^'+rules[r]
24			.replace(/disallow:/gi, '') // remove disallow
25			.replace(/^\s+\|\s+$\|\s+(?=\s)/g, '') // remove white space
26			.replace('?', '\\?') // escape query string start
27			.replace('\|', '\\\|') // escape pipe
28			.replace('/', '\\/') // escape slashes
29			.replace(/^\^\^/g, '^') // If it already had a caret remove it
30			.replace(/^(\)/g, '(.?)'); // Replace star with match anything modifier
31			crawler_file_tester.robot_rules.push({ 'rule': rule, 'agent': agent, 'original': rules[r] });
32			}else{
33			console.log(rules[r]);
34			throw "Found a rule which we don't understand. Report it to the developer";
35			}
36			}
37			},
38
39			/**
40			* Check all tested url and see if they are blocked by any rule in the robots file
41			*
42			* @returns {undefined}
43			*/
44			test_blocked_pages: function(){
45			for(var t in crawler.tested){
46			var url = crawler.tested[t];
47
48			if( crawler.linked_from.hasOwnProperty(url) ) {
49			for (var r in this.robot_rules) {
50			var regex = new RegExp(this.robot_rules[r]['rule'], 'g');
51			if (regex.test('/' + url)) {
52			var link = crawler_painter.create_link(url, url),
53			status = crawler_painter.create_status('error', 'Page has links and is blocked in robots'),
54			agent = ( this.robot_rules[r]['agent'] == '*' ) ? 'ALL BOTS' : this.robot_rules[r]['agent'];
55			crawler_painter.add_row(
56			'blocked_pages',
57			[link, crawler.linked_from[url].join(', '), agent, this.robot_rules[r]['original'], status]);
58			}
59			}
60			}
61			}
62
63			return undefined;
64			},
65
66			/**
67			* Setup an ajax call to fetch url
68			*
69			* @param {string} url
70			* @param {function} callback
71			* @param {function} failed_callback
72			*/
73			get_file_contents: function(url, callback, failed_callback){
74			$.ajax({
75			'url': crawler.get_proxy('/seotest/getPage?u='+url+'&agent='+crawler.agent)
76			}).done(callback).fail(failed_callback);
77			},
78
79			/**
80			* Start testing the robots page
81			*/
82			init_robots_tester: function(){
83
84
85
86			},
87
88
89			};
90
91			// Register the tests
92			crawler.on('BEFORE_INIT', function(){
93			crawler.regiser_test('file_tests', 'FILE TESTS', ['Status'], false);
94			crawler.regiser_test('blocked_pages', 'BLOCKED PAGES', ['URL', 'Linked From', 'Blocked For', 'Blocked By', 'Status'], false);
95
96			crawler_painter.set_type('blocked_pages', 'success');
97			});
98
99			// Start up the file testers
100			crawler.on('AFTER_INIT', function(){
101			crawler_file_tester.get_file_contents(
102			crawler.robots_url,
103			crawler_file_tester.parse_robots_file,
104			function(){ crawler_painter.add_status_row('file_tests', 'error', 'Failed to load robots file'); }
105			);
106			//crawler_file_tester.init_sitemap_tester();
107			});
108
109			// Test for blocked pages the the crawler finishes
110			crawler.on('ALL_CRAWLS_FINISHED', function(){
111			crawler_file_tester.test_blocked_pages();
112			});
113
114

dylangrech92 / seotoolbox

Push — develop ( 7852a2...d7f552 )

crawler.ALL_CRAWLS_FINISHED A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like