1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* Plugin: SEOToolbox |
4
|
|
|
* Author: Dylan Grech |
5
|
|
|
* Copyright: 2016 |
6
|
|
|
* |
7
|
|
|
* SEOtoolbox Controller Extension decorates the Content Controller |
8
|
|
|
* to add the auotamted links to a page where needed |
9
|
|
|
* |
10
|
|
|
* @see AutomatedLink |
11
|
|
|
*/ |
12
|
|
|
class SEOToolboxControllerExtension extends Extension { |
13
|
|
|
|
14
|
|
|
private $maxLinksPerPage; |
15
|
|
|
private $settings = null; |
16
|
|
|
private $linkCount = 0; |
17
|
|
|
private $addLinks = true; |
18
|
|
|
private $excludeTags = array(); |
19
|
|
|
private $maxLinks = 0; |
20
|
|
|
|
21
|
|
|
public function index(){ |
22
|
|
|
$this->addAutomatedLinks(); |
23
|
|
|
|
24
|
|
|
// If we have a crawl request check the CrawlID so we're sure we didn't hit another SS site running our module |
25
|
|
|
if( $crawl_id = $this->owner->request->getHeader('X-Crawl-Id') ){ |
26
|
|
|
return( $crawl_id == GlobalAutoLinkSettings::get_current()->CrawlID ) |
27
|
|
|
? $this->crawl_response() |
28
|
|
|
: $this->owner->redirect(SEOTestSiteTreeController::getPermissionDeniedPage()->Link()); |
29
|
|
|
} |
30
|
|
|
|
31
|
|
|
return array(); |
32
|
|
|
} |
33
|
|
|
|
34
|
|
|
private function crawl_response(){ |
35
|
|
|
// Encoded version to detect which fields are being used |
36
|
|
|
$customize = array(); |
37
|
|
|
$dbFields = Config::inst()->get($this->owner->ClassName, 'db'); |
38
|
|
|
if(is_array($dbFields)) { |
39
|
|
|
foreach ( $dbFields as $field => $type) { |
40
|
|
|
if (strtolower($type) == 'htmltext') { |
41
|
|
|
$data = ($this->owner->hasMethod($field)) ? $this->owner->$field() : $this->owner->$field; |
42
|
|
|
if($data){ |
43
|
|
|
$tmp = new HTMLText('tmp'); |
44
|
|
|
$tmp->setValue($data); |
45
|
|
|
$data = base64_encode($tmp->forTemplate()); |
46
|
|
|
$customize[$field] = "[**[$field]**[$data]**]"; |
47
|
|
|
} |
48
|
|
|
} |
49
|
|
|
} |
50
|
|
|
} |
51
|
|
|
|
52
|
|
|
if (in_array($this->owner->ClassName, ClassInfo::subclassesFor('ErrorPage'))) { |
53
|
|
|
header("HTTP/1.0 405 Instance of ErrorPage"); |
54
|
|
|
die(); |
55
|
|
|
} |
56
|
|
|
|
57
|
|
|
// Clean out the html before sending it back to minimize response size |
58
|
|
|
die( |
59
|
|
|
preg_replace(array( |
60
|
|
|
'/<style(.*?)[>]/im', |
61
|
|
|
'/<(script|noscript)(.*?)<\/(script|noscript)[>]/im', |
62
|
|
|
'/<!--(.*?)-->/im', |
63
|
|
|
), '', $this->owner->customise($customize)->render()) |
64
|
|
|
); |
65
|
|
|
} |
66
|
|
|
|
67
|
|
|
/** |
68
|
|
|
* Get the global settings and check if we should be adding |
69
|
|
|
* links to this page |
70
|
|
|
* |
71
|
|
|
* @return GlobalAutoLinkSettings|false |
72
|
|
|
*/ |
73
|
|
|
private function getSettings() { |
74
|
|
|
if ($this->settings === null) { |
75
|
|
|
$this->settings = GlobalAutoLinkSettings::get_current(); |
76
|
|
|
if (!$this->settings) return $this->addLinks = false; |
77
|
|
|
|
78
|
|
|
$this->excludeTags = (array) $this->settings->ExcludeTags(); |
79
|
|
|
$this->maxLinks = (int) ($this->settings->MaxLinksPerPage) ? $this->settings->MaxLinksPerPage : PHP_INT_MAX; |
80
|
|
|
|
81
|
|
|
if (!in_array($this->owner->ClassName, $this->settings->AllowedIn())) $this->addLinks = false; |
82
|
|
|
} |
83
|
|
|
|
84
|
|
|
return $this->settings; |
85
|
|
|
} |
86
|
|
|
|
87
|
|
|
/** |
88
|
|
|
* Goes through all the automated link settings and adds |
89
|
|
|
* the links where necessary |
90
|
|
|
* |
91
|
|
|
* @return void |
92
|
|
|
*/ |
93
|
|
|
public function addAutomatedLinks(){ |
94
|
|
|
if( GlobalAutoLinkSettings::$enabled && $this->owner->class != 'RedirectorPage' ) { |
95
|
|
|
$this->getSettings(); |
96
|
|
|
if( !$this->addLinks ) { |
97
|
|
|
return; |
98
|
|
|
} |
99
|
|
|
|
100
|
|
|
foreach( $this->getSettings()->IncludeInFields() as $field ){ |
101
|
|
|
// Check that the field provided by user exists in this object, is of type HTMLText and has content |
102
|
|
|
if( AutomatedLink::isFieldParsable( $this->owner->data(), $field ) ){ |
103
|
|
|
|
104
|
|
|
// Create dummy object so we can parse the HTML |
105
|
|
|
$dummy = new HTMLText( $field ); |
106
|
|
|
$dummy->setValue( $this->owner->$field ); |
107
|
|
|
// Create DOMDocument Object |
108
|
|
|
$content = mb_convert_encoding( $dummy->forTemplate(), 'html-entities', GlobalAutoLinkSettings::$encoding ); |
109
|
|
|
$dom = AutomatedLink::constructDOMDocument($content); |
110
|
|
|
|
111
|
|
|
// Check current link count and if it's already exceeded do nothing |
112
|
|
|
$this->linkCount += (int) $dom->getElementsByTagName( 'a' )->length; |
113
|
|
|
if( $this->linkCount >= $this->maxLinks ) { |
114
|
|
|
return; |
115
|
|
|
} |
116
|
|
|
|
117
|
|
|
$parsed = $this->parseField( $dom, $field ); |
118
|
|
|
$this->owner->data()->$field = $parsed; |
119
|
|
|
$this->owner->$field = $parsed; |
120
|
|
|
} |
121
|
|
|
} |
122
|
|
|
} |
123
|
|
|
} |
124
|
|
|
|
125
|
|
|
/** |
126
|
|
|
* Goes through $tags and replaces them in $html with a hash of themselves. |
127
|
|
|
* Returns an array of hash keys used and the original value |
128
|
|
|
* |
129
|
|
|
* @param DOMDocument $html |
130
|
|
|
* @param array $hash_tags |
131
|
|
|
* @return array |
132
|
|
|
*/ |
133
|
|
|
private function hashTags( DOMDocument $html, $hash_tags ){ |
134
|
|
|
$excluded = array(); |
135
|
|
|
foreach( $hash_tags as $eTag ){ |
136
|
|
|
while( $tags = $html->getElementsByTagName( $eTag ) ){ |
137
|
|
|
if( !$tags->length ) break 1; |
138
|
|
|
$tag = $tags->item(0); |
139
|
|
|
$value = $html->saveHTML( $tag ); |
140
|
|
|
$key = (string) crc32( $value ); |
141
|
|
|
|
142
|
|
|
// Convert back children nodes of this node if they were already hashed |
143
|
|
|
$excluded[$key] = str_replace( array_keys( $excluded ), array_values( $excluded ), $value ); |
144
|
|
|
|
145
|
|
|
$tag->parentNode->replaceChild( $html->createTextNode( $key ), $tag ); |
146
|
|
|
} |
147
|
|
|
} |
148
|
|
|
|
149
|
|
|
return $excluded; |
150
|
|
|
} |
151
|
|
|
|
152
|
|
|
/** |
153
|
|
|
* Adds the passed automated link to $content if possible. |
154
|
|
|
* Returns an array of hashed that have been added and the original value that was replaced |
155
|
|
|
* |
156
|
|
|
* @param AutomatedLink $link |
157
|
|
|
* @param string $content |
158
|
|
|
* @return array |
159
|
|
|
*/ |
160
|
|
|
public function addLinkToContent(AutomatedLink $link, &$content){ |
161
|
|
|
$links = array(); |
162
|
|
|
$max = (int) ( $link->MaxLinksPerPage > 0 ) ? $link->MaxLinksPerPage : PHP_INT_MAX; |
163
|
|
|
$escape = (string) preg_quote( $link->Phrase, '/' ); |
164
|
|
|
$regex = (string) ( $link->CaseSensitive ) ? "/(\b{$escape}\b)/" : "/(\b{$escape}\b)/i"; |
165
|
|
|
|
166
|
|
|
// Count the matches |
167
|
|
|
preg_match_all( $regex, $content, $count ); |
168
|
|
|
$count = ( is_array( $count ) && isset( $count[0] ) ) ? count( $count[0] ) : 0; |
169
|
|
|
if( $count < 1 ) $links; |
170
|
|
|
|
171
|
|
|
if( isset( $this->maxLinksPerPage[ $link->ID ] ) ) { |
172
|
|
|
$max -= $this->maxLinksPerPage[$link->ID]; |
173
|
|
|
} else { |
174
|
|
|
$this->maxLinksPerPage[$link->ID] = 0; |
175
|
|
|
} |
176
|
|
|
|
177
|
|
|
for( $x = 0; $x < $count; $x++ ){ |
178
|
|
|
// Stop adding links if we reached the link or page limit |
179
|
|
|
if( $x >= $max || $this->linkCount >= $this->maxLinks ) break; |
180
|
|
|
|
181
|
|
|
// Check if there is anything else to replace else stop |
182
|
|
|
preg_match( $regex, $content, $match ); |
183
|
|
|
if( !is_array( $match ) || !count( $match ) ) break; |
184
|
|
|
|
185
|
|
|
if( !$html = (string) $link->getHTML( $match[0] ) ) continue; |
186
|
|
|
$key = (string) crc32( $html ); |
187
|
|
|
$links[ $key ] = (string) $html; |
188
|
|
|
|
189
|
|
|
$content = preg_replace( $regex, $key, $content, 1 ); |
190
|
|
|
$this->linkCount++; |
191
|
|
|
$this->maxLinksPerPage[ $link->ID ]++; |
192
|
|
|
} |
193
|
|
|
|
194
|
|
|
return $links; |
195
|
|
|
} |
196
|
|
|
|
197
|
|
|
/** |
198
|
|
|
* Parse the provided field and add the necessary links |
199
|
|
|
* |
200
|
|
|
* @param DOMDocument $html |
201
|
|
|
* @param String $field |
202
|
|
|
* @return string |
203
|
|
|
*/ |
204
|
|
|
private function parseField( DOMDocument $html, $field ){ |
205
|
|
|
$this->owner->extend( 'beforeParseField', $html, $field ); |
206
|
|
|
|
207
|
|
|
// Remove Tags from Content we wown't be using |
208
|
|
|
$excluded = $this->hashTags($html, $this->excludeTags); |
209
|
|
|
$body = (string)$html->saveHTML( $html->getElementsByTagName('body')->item(0) ); |
210
|
|
|
$content = preg_replace( array( '/\<body\>/is', '/\<\/body\>/is' ), '', $body, 1 ); |
211
|
|
|
|
212
|
|
|
foreach( AutomatedLink::get()->sort('Priority') as $link){ |
213
|
|
|
if( $this->linkCount < $this->maxLinks && $link->canBeAdded( $this->owner, $field ) ) { |
214
|
|
|
$links = $this->addLinkToContent($link, $content); |
215
|
|
|
if( is_array($links) && count($links) > 0 ) { |
216
|
|
|
$excluded = $excluded + $links; |
217
|
|
|
} |
218
|
|
|
} |
219
|
|
|
} |
220
|
|
|
|
221
|
|
|
// Re-add the excluded Tags |
222
|
|
|
return str_replace( array_keys( $excluded ), array_values( $excluded ), $content ); |
223
|
|
|
} |
224
|
|
|
} |
225
|
|
|
|