Passed
Push — develop ( 09225f...2749c8 )
by Dylan
02:47
created

SEOToolboxControllerExtension::crawl_response()   C

Complexity

Conditions 7
Paths 4

Size

Total Lines 32
Code Lines 21

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 32
rs 6.7272
c 0
b 0
f 0
cc 7
eloc 21
nc 4
nop 0
1
<?php
2
/**
3
 * Plugin: SEOToolbox
4
 * Author: Dylan Grech
5
 * Copyright: 2016
6
 *
7
 * SEOtoolbox Controller Extension decorates the Content Controller
8
 * to add the auotamted links to a page where needed
9
 *
10
 * @see AutomatedLink
11
 */
12
class SEOToolboxControllerExtension extends Extension {
1 ignored issue
show
Coding Style Compatibility introduced by
PSR1 recommends that each class must be in a namespace of at least one level to avoid collisions.

You can fix this by adding a namespace to your class:

namespace YourVendor;

class YourClass { }

When choosing a vendor namespace, try to pick something that is not too generic to avoid conflicts with other libraries.

Loading history...
13
14
    private $maxLinksPerPage;
15
    private $settings       = null;
16
    private $linkCount      = 0;
17
    private $addLinks       = true;
18
    private $excludeTags    = array();
19
    private $maxLinks       = 0;
20
21
    public function index(){
22
        $this->addAutomatedLinks();
23
24
        // If we have a crawl request check the CrawlID so we're sure we didn't hit another SS site running our module
25
        if( $crawl_id = $this->owner->request->getHeader('X-Crawl-Id') ){
26
            return( $crawl_id == GlobalAutoLinkSettings::get_current()->CrawlID )
27
                ? $this->crawl_response()
28
                : $this->owner->redirect(SEOTestSiteTreeController::getPermissionDeniedPage()->Link());
29
        }
30
31
        return array();
32
    }
33
34
    private function crawl_response(){
35
        // Encoded version to detect which fields are being used
36
        $customize = array();
37
        $dbFields  = Config::inst()->get($this->owner->ClassName, 'db');
38
        if(is_array($dbFields)) {
39
            foreach ( $dbFields as $field => $type) {
40
                if (strtolower($type) == 'htmltext') {
41
                    $data = ($this->owner->hasMethod($field)) ? $this->owner->$field() : $this->owner->$field;
42
                    if($data){
43
                        $tmp = new HTMLText('tmp');
44
                        $tmp->setValue($data);
45
                        $data = base64_encode($tmp->forTemplate());
46
                        $customize[$field] = "[**[$field]**[$data]**]";
47
                    }
48
                }
49
            }
50
        }
51
52
        if (in_array($this->owner->ClassName, ClassInfo::subclassesFor('ErrorPage'))) {
53
            header("HTTP/1.0 405 Instance of ErrorPage");
54
            die();
1 ignored issue
show
Coding Style Compatibility introduced by
The method crawl_response() contains an exit expression.

An exit expression should only be used in rare cases. For example, if you write a short command line script.

In most cases however, using an exit expression makes the code untestable and often causes incompatibilities with other libraries. Thus, unless you are absolutely sure it is required here, we recommend to refactor your code to avoid its usage.

Loading history...
55
        }
56
57
        // Clean out the html before sending it back to minimize response size
58
        die(
1 ignored issue
show
Coding Style Compatibility introduced by
The method crawl_response() contains an exit expression.

An exit expression should only be used in rare cases. For example, if you write a short command line script.

In most cases however, using an exit expression makes the code untestable and often causes incompatibilities with other libraries. Thus, unless you are absolutely sure it is required here, we recommend to refactor your code to avoid its usage.

Loading history...
59
            preg_replace(array(
60
                '/<style(.*?)[>]/im',
61
                '/<(script|noscript)(.*?)<\/(script|noscript)[>]/im',
62
                '/<!--(.*?)-->/im',
63
            ), '', $this->owner->customise($customize)->render())
64
        );
65
    }
66
67
    /**
68
     * Get the global settings and check if we should be adding
69
     * links to this page
70
     *
71
     * @return GlobalAutoLinkSettings|false
72
     */
73
    private function getSettings() {
74
        if ($this->settings === null) {
75
            $this->settings = GlobalAutoLinkSettings::get_current();
76
            if (!$this->settings) return $this->addLinks = false;
77
78
            $this->excludeTags = (array) $this->settings->ExcludeTags();
79
            $this->maxLinks = (int) ($this->settings->MaxLinksPerPage) ? $this->settings->MaxLinksPerPage : PHP_INT_MAX;
80
81
            if (!in_array($this->owner->ClassName, $this->settings->AllowedIn())) $this->addLinks = false;
82
        }
83
84
        return $this->settings;
85
    }
86
87
    /**
88
     * Goes through all the automated link settings and adds
89
     * the links where necessary
90
     *
91
     * @return void
92
     */
93
    public function addAutomatedLinks(){
94
        if( GlobalAutoLinkSettings::$enabled && $this->owner->class != 'RedirectorPage' ) {
95
            $this->getSettings();
96
            if( !$this->addLinks ) {
97
                return;
98
            }
99
100
            foreach( $this->getSettings()->IncludeInFields() as $field ){
101
                // Check that the field provided by user exists in this object, is of type HTMLText and has content
102
                if( AutomatedLink::isFieldParsable( $this->owner->data(), $field ) ){
103
104
                    // Create dummy object so we can parse the HTML
105
                    $dummy = new HTMLText( $field );
106
                    $dummy->setValue( $this->owner->$field );
107
                    // Create DOMDocument Object
108
                    $content = mb_convert_encoding( $dummy->forTemplate(), 'html-entities', GlobalAutoLinkSettings::$encoding );
109
110 View Code Duplication
                    if( class_exists( 'HTML5_Parser' ) ){
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
111
                        $html5 = HTML5_Parser::parse( $content );
112
                        if($html5 instanceof DOMNodeList){
113
                            $dom = new DOMDocument();
114
                            while($html5->length > 0) {
115
                                $dom->appendChild($html5->item(0));
116
                            }
117
                        }else{
118
                            $dom = $html5;
119
                        }
120
                    } else{
121
                        $dom = new DOMDocument();
122
                        $dom->loadHTML( $content );
123
                    }
124
125
                    // Check current link count and if it's already exceeded do nothing
126
                    $this->linkCount += (int) $dom->getElementsByTagName( 'a' )->length;
127
                    if( $this->linkCount >= $this->maxLinks ) {
128
                        return;
129
                    }
130
131
                    $parsed = $this->parseField( $dom, $field );
132
                    $this->owner->data()->$field = $parsed;
133
                    $this->owner->$field         = $parsed;
134
                }
135
            }
136
        }
137
    }
138
139
    /**
140
     * Parse the provided field and add the necessary links
141
     *
142
     * @param DOMDocument $html
143
     * @param String $field
144
     * @return string
145
     */
146
    private function parseField( DOMDocument $html, $field ){
147
        $this->owner->extend( 'beforeParseField', $html, $field );
148
149
        // Remove Tags from Content we wown't be using
150
        $excluded = array();
151
        foreach( $this->excludeTags as $eTag ){
152
            while( $tags = $html->getElementsByTagName( $eTag ) ){
153
                if( !$tags->length ) break 1;
154
                $tag	= $tags->item(0);
155
                $value  = $html->saveHTML( $tag );
156
                $key    = (string) crc32( $value );
157
158
                // Convert back children nodes of this node if they were already hashed
159
                $excluded[$key] = str_replace( array_keys( $excluded ), array_values( $excluded ), $value );
160
161
                $tag->parentNode->replaceChild( $html->createTextNode( $key ), $tag );
162
            }
163
        }
164
165
        $body    = (string)$html->saveHTML( $html->getElementsByTagName('body')->item(0) );
166
        $content = preg_replace( array( '/\<body\>/is', '/\<\/body\>/is' ), '', $body, 1 );
167
168
        // Create the links
169
        $links = AutomatedLink::get()->sort('Priority');
170
        foreach( $links as $link ){
171
            // Check if self-linking is allowed and if current pagetype is allowed
172
            if( !$link->canBeAdded( $this->owner, $field ) ) continue;
173
174
            $max    = (int) ( $link->MaxLinksPerPage > 0 ) ? $link->MaxLinksPerPage : PHP_INT_MAX;
175
            $escape = (string) preg_quote( $link->Phrase, '/' );
176
            $regex  = (string) ( $link->CaseSensitive ) ? "/(\b{$escape}\b)/" : "/(\b{$escape}\b)/i";
177
178
            // Count the matches
179
            preg_match_all( $regex, $content, $count );
180
            $count = ( is_array( $count ) && isset( $count[0] ) ) ? count( $count[0] ) : 0;
181
            if( $count < 1 ) continue;
182
183
            if( isset( $this->maxLinksPerPage[ $link->ID ] ) )
184
                $max -= $this->maxLinksPerPage[ $link->ID ];
185
            else
186
                $this->maxLinksPerPage[ $link->ID ] = 0;
187
188
            for( $x = 0; $x < $count; $x++ ){
189
                // Stop adding links if we reached the link or page limit
190
                if( $x >= $max || $this->linkCount >= $this->maxLinks ) break;
191
192
                // Check if there is anything else to replace else stop
193
                preg_match( $regex, $content, $match );
194
                if( !is_array( $match ) || !count( $match ) ) break;
195
196
                if( !$html = (string) $link->getHTML( $match[0] ) ) continue;
197
                $key              = (string) crc32( $html );
198
                $excluded[ $key ] = (string) $html;
199
200
                $content = preg_replace( $regex, $key, $content, 1 );
201
                $this->linkCount++;
202
                $this->maxLinksPerPage[ $link->ID ]++;
203
            }
204
205
            // Stop Adding links if we reached the page limit
206
            if( $this->linkCount >= $this->maxLinks ) break;
207
        }
208
209
        // Re-add the excluded Tags
210
        $content = str_replace( array_keys( $excluded ), array_values( $excluded ), $content );
211
212
        return $content;
213
    }
214
}
215