|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace Coyote\Services\Parser\Parsers; |
|
4
|
|
|
|
|
5
|
|
|
use Collective\Html\HtmlBuilder; |
|
6
|
|
|
use Coyote\Repositories\Contracts\PageRepositoryInterface as PageRepository; |
|
7
|
|
|
|
|
8
|
|
|
class Link extends Parser implements ParserInterface |
|
9
|
|
|
{ |
|
10
|
|
|
const LINK_TAG_REGEXP = "<a\s[^>]*href=(\"??)([^\" >]*?)\\1[^>]*>(.*)<\/a>"; |
|
11
|
|
|
const LINK_INTERNAL_REGEXP = '\[\[(.*?)(\|(.*?))*\]\]'; |
|
12
|
|
|
|
|
13
|
|
|
const REGEXP_EMAIL = '#(^|[\n \[\]\:<>&;]|\()([a-z0-9&\-_.]+?@[\w\-]+\.(?:[\w\-\.]+\.)?[\w]+)#i'; |
|
14
|
|
|
|
|
15
|
|
|
/** |
|
16
|
|
|
* @var PageRepository |
|
17
|
|
|
*/ |
|
18
|
|
|
private $page; |
|
19
|
|
|
|
|
20
|
|
|
/** |
|
21
|
|
|
* @var string |
|
22
|
|
|
*/ |
|
23
|
|
|
private $host; |
|
24
|
|
|
|
|
25
|
|
|
/** |
|
26
|
|
|
* @var HtmlBuilder|null |
|
27
|
|
|
*/ |
|
28
|
|
|
private $html; |
|
29
|
|
|
|
|
30
|
|
|
/** @var UrlFormatter */ |
|
31
|
|
|
private $urlParser; |
|
32
|
|
|
|
|
33
|
|
|
/** |
|
34
|
|
|
* Link constructor. |
|
35
|
|
|
* |
|
36
|
|
|
* @param PageRepository $page |
|
37
|
|
|
* @param string $host |
|
38
|
|
|
* @param HtmlBuilder|null $html |
|
39
|
|
|
*/ |
|
40
|
|
|
public function __construct(PageRepository $page, string $host, HtmlBuilder $html = null) |
|
41
|
|
|
{ |
|
42
|
|
|
$this->page = $page; |
|
43
|
|
|
$this->host = $host; |
|
44
|
|
|
$this->html = $html; |
|
45
|
|
|
$this->urlParser = new UrlFormatter($host, app('html')); |
|
46
|
|
|
} |
|
47
|
|
|
|
|
48
|
|
|
/** |
|
49
|
|
|
* @param string $text |
|
50
|
|
|
* @return string |
|
51
|
|
|
*/ |
|
52
|
|
|
public function parse($text) |
|
53
|
|
|
{ |
|
54
|
|
|
// first, make <a> from plain URL's |
|
55
|
|
|
// ----------------------------------------- |
|
56
|
|
|
$text = $this->hashBlock($text, ['code', 'a']); |
|
57
|
|
|
$text = $this->hashInline($text, 'img'); |
|
58
|
|
|
|
|
59
|
|
|
$text = $this->parseUrl($text); |
|
60
|
|
|
$text = $this->parseEmail($text); |
|
61
|
|
|
|
|
62
|
|
|
$text = $this->unhash($text); |
|
63
|
|
|
// ------------------------------------------ |
|
64
|
|
|
|
|
65
|
|
|
$text = $this->hashBlock($text, 'code'); |
|
66
|
|
|
$text = $this->hashInline($text, 'img'); |
|
67
|
|
|
|
|
68
|
|
|
// then, parse internal links and youtube video links |
|
69
|
|
|
// -------------------------------------------------- |
|
70
|
|
|
$text = $this->parseLinks($text); |
|
71
|
|
|
|
|
72
|
|
|
$text = $this->hashBlock($text, 'a'); |
|
73
|
|
|
|
|
74
|
|
|
// at last, parse coyote markup |
|
75
|
|
|
$text = $this->parseInternalAccessors($text); |
|
76
|
|
|
|
|
77
|
|
|
$text = $this->unhash($text); |
|
78
|
|
|
|
|
79
|
|
|
return $text; |
|
80
|
|
|
} |
|
81
|
|
|
|
|
82
|
|
|
/** |
|
83
|
|
|
* @param string $text |
|
84
|
|
|
* @return string |
|
85
|
|
|
*/ |
|
86
|
|
|
protected function parseLinks($text) |
|
87
|
|
|
{ |
|
88
|
|
|
if (!preg_match_all('/' . self::LINK_TAG_REGEXP . '/siU', $text, $matches, PREG_SET_ORDER)) { |
|
|
|
|
|
|
89
|
|
|
return $text; |
|
90
|
|
|
} |
|
91
|
|
|
|
|
92
|
|
|
for ($i = 0, $count = count($matches); $i < $count; $i++) { |
|
93
|
|
|
$link = $matches[$i][2]; |
|
94
|
|
|
$title = $matches[$i][3]; |
|
95
|
|
|
$match = $matches[$i][0]; |
|
96
|
|
|
|
|
97
|
|
|
$text = $this->parseInternalLink($text, $match, $link, $title); |
|
98
|
|
|
$text = $this->parseYoutubeLinks($text, $match, $link, $title); |
|
99
|
|
|
} |
|
100
|
|
|
|
|
101
|
|
|
return $text; |
|
102
|
|
|
} |
|
103
|
|
|
|
|
104
|
|
|
/** |
|
105
|
|
|
* @param string $text |
|
106
|
|
|
* @param string $match |
|
107
|
|
|
* @param string $url |
|
108
|
|
|
* @param string $title |
|
109
|
|
|
* @return string |
|
110
|
|
|
*/ |
|
111
|
|
|
protected function parseInternalLink($text, $match, $url, $title) |
|
112
|
|
|
{ |
|
113
|
|
|
if (urldecode($title) === urldecode($url) && ($path = $this->getPathFromInternalUrl($url)) !== false) { |
|
114
|
|
|
$page = $this->page->findByPath($path); |
|
115
|
|
|
|
|
116
|
|
|
if ($page) { |
|
117
|
|
|
$text = str_replace($match, link_to($url, $page->title), $text); |
|
|
|
|
|
|
118
|
|
|
} |
|
119
|
|
|
} |
|
120
|
|
|
|
|
121
|
|
|
return $text; |
|
122
|
|
|
} |
|
123
|
|
|
|
|
124
|
|
|
/** |
|
125
|
|
|
* @param string $text |
|
126
|
|
|
* @param string $match |
|
127
|
|
|
* @param string $url |
|
128
|
|
|
* @param string $title |
|
129
|
|
|
* @return string |
|
130
|
|
|
*/ |
|
131
|
|
|
protected function parseYoutubeLinks($text, $match, $url, $title) |
|
132
|
|
|
{ |
|
133
|
|
|
if ($this->html === null) { |
|
134
|
|
|
return $text; |
|
135
|
|
|
} |
|
136
|
|
|
|
|
137
|
|
|
if (urldecode($title) !== urldecode($url)) { |
|
138
|
|
|
return $text; |
|
139
|
|
|
} |
|
140
|
|
|
|
|
141
|
|
|
$components = parse_url($url); |
|
142
|
|
|
|
|
143
|
|
|
if ($this->isUrl($components)) { |
|
144
|
|
|
// get host without "www" |
|
145
|
|
|
$host = $this->getHost($components['host']); |
|
146
|
|
|
$path = trim($components['path'], '/'); |
|
147
|
|
|
|
|
148
|
|
|
if ($host === 'youtube.com' && $path === 'watch') { |
|
149
|
|
|
parse_str($components['query'], $query); |
|
150
|
|
|
|
|
151
|
|
|
if (!empty($query['v'])) { |
|
152
|
|
|
parse_str($components['fragment'] ?? '', $fragments); |
|
153
|
|
|
|
|
154
|
|
|
$text = str_replace( |
|
155
|
|
|
$match, |
|
156
|
|
|
$this->makeIframe($query['v'], $this->timeToSeconds($fragments['t'] ?? null)), |
|
157
|
|
|
$text |
|
158
|
|
|
); |
|
159
|
|
|
} |
|
160
|
|
|
} |
|
161
|
|
|
|
|
162
|
|
|
if ($host === 'youtu.be' && $path !== '') { |
|
163
|
|
|
parse_str($components['query'] ?? '', $query); |
|
164
|
|
|
|
|
165
|
|
|
$text = str_replace($match, $this->makeIframe($path, $this->timeToSeconds($query['t'] ?? null)), $text); |
|
166
|
|
|
} |
|
167
|
|
|
} |
|
168
|
|
|
|
|
169
|
|
|
return $text; |
|
170
|
|
|
} |
|
171
|
|
|
|
|
172
|
|
|
/** |
|
173
|
|
|
* Parse "old" coyote links like [[Foo/Bar]] to http://4programmers.net/Foo/Bar |
|
174
|
|
|
* |
|
175
|
|
|
* @param string $text |
|
176
|
|
|
* @return string |
|
177
|
|
|
*/ |
|
178
|
|
|
protected function parseInternalAccessors($text) |
|
179
|
|
|
{ |
|
180
|
|
|
if (!preg_match_all('/' . self::LINK_INTERNAL_REGEXP . '/i', $text, $matches, PREG_SET_ORDER)) { |
|
|
|
|
|
|
181
|
|
|
return $text; |
|
182
|
|
|
} |
|
183
|
|
|
|
|
184
|
|
|
for ($i = 0, $count = count($matches); $i < $count; $i++) { |
|
185
|
|
|
$origin = $matches[$i][0]; |
|
186
|
|
|
|
|
187
|
|
|
$path = '/' . str_replace(' ', '_', trim($matches[$i][1], '/?&')); |
|
188
|
|
|
|
|
189
|
|
|
$title = $matches[$i][3] ?? null; |
|
190
|
|
|
$hash = $this->getHashFromPath($path); |
|
191
|
|
|
|
|
192
|
|
|
$page = $this->page->findByPath($path); |
|
193
|
|
|
$attr = []; |
|
194
|
|
|
|
|
195
|
|
|
if (empty($page)) { |
|
196
|
|
|
$attr = ['class' => 'link-broken', 'title' => 'Dokument nie istnieje']; |
|
197
|
|
|
$path = 'Create' . $path; |
|
198
|
|
|
|
|
199
|
|
|
if (empty($title)) { |
|
200
|
|
|
$title = str_replace('_', ' ', last(explode('/', $path))); |
|
201
|
|
|
} |
|
202
|
|
|
} else { |
|
203
|
|
|
$path = $page->path; |
|
204
|
|
|
$title = $title ?: $page->title; |
|
|
|
|
|
|
205
|
|
|
} |
|
206
|
|
|
|
|
207
|
|
|
$text = str_replace($origin, link_to($path . ($hash ? '#' . $hash : ''), $title, $attr), $text); |
|
208
|
|
|
} |
|
209
|
|
|
|
|
210
|
|
|
return $text; |
|
211
|
|
|
} |
|
212
|
|
|
|
|
213
|
|
|
/** |
|
214
|
|
|
* @param string $text |
|
215
|
|
|
* @return string |
|
216
|
|
|
*/ |
|
217
|
|
|
protected function parseUrl(string $text): string |
|
218
|
|
|
{ |
|
219
|
|
|
return $this->urlParser->parse($text); |
|
220
|
|
|
} |
|
221
|
|
|
|
|
222
|
|
|
/** |
|
223
|
|
|
* @param string $text |
|
224
|
|
|
* @return string |
|
225
|
|
|
*/ |
|
226
|
|
|
protected function parseEmail(string $text): string |
|
227
|
|
|
{ |
|
228
|
|
|
return preg_replace(self::REGEXP_EMAIL, "\$1<a href=\"mailto:\$2\">$2</a>", $text); |
|
229
|
|
|
} |
|
230
|
|
|
|
|
231
|
|
|
/** |
|
232
|
|
|
* @param string|null $time |
|
233
|
|
|
* @return null|string |
|
234
|
|
|
*/ |
|
235
|
|
|
private function timeToSeconds($time) |
|
236
|
|
|
{ |
|
237
|
|
|
if (!$time) { |
|
238
|
|
|
return null; |
|
239
|
|
|
} |
|
240
|
|
|
|
|
241
|
|
|
if (preg_match('/(\d+)m(\d+)s/', $time, $match)) { |
|
242
|
|
|
return ($match[1] * 60) + $match[2]; |
|
243
|
|
|
} |
|
244
|
|
|
|
|
245
|
|
|
return $time; |
|
246
|
|
|
} |
|
247
|
|
|
|
|
248
|
|
|
/** |
|
249
|
|
|
* @param string $videoId |
|
250
|
|
|
* @param string $start |
|
251
|
|
|
* @return string |
|
252
|
|
|
*/ |
|
253
|
|
|
private function makeIframe(string $videoId, string $start = null): string |
|
254
|
|
|
{ |
|
255
|
|
|
$iframe = (string) $this->html->tag('iframe', '', [ |
|
256
|
|
|
'src' => 'https://youtube.com/embed/' . $videoId . ($start !== null ? "?start=$start" : ''), |
|
257
|
|
|
'class' => 'embed-responsive-item', |
|
258
|
|
|
'allowfullscreen' => 'allowfullscreen' |
|
259
|
|
|
]); |
|
260
|
|
|
|
|
261
|
|
|
return (string) $this->html->tag('div', $iframe, ['class' => 'embed-responsive embed-responsive-16by9']); |
|
262
|
|
|
} |
|
263
|
|
|
|
|
264
|
|
|
/** |
|
265
|
|
|
* Get path from url only if it's internal link (false if it's NOT internal link) |
|
266
|
|
|
* |
|
267
|
|
|
* @example http://4programmers.net/Foo/Bar => /Foo/Bar |
|
268
|
|
|
* @param string $url |
|
269
|
|
|
* @return string|false |
|
270
|
|
|
*/ |
|
271
|
|
|
private function getPathFromInternalUrl($url) |
|
272
|
|
|
{ |
|
273
|
|
|
$components = parse_url($url); |
|
274
|
|
|
$path = false; |
|
275
|
|
|
|
|
276
|
|
|
if ($this->isUrl($components)) { |
|
277
|
|
|
// sprawdzamy, czy mamy do czynienia z linkiem wewnetrznym |
|
278
|
|
|
if ($this->host === $this->getHost($components['host'])) { |
|
279
|
|
|
$path = urldecode($components['path']); |
|
280
|
|
|
} |
|
281
|
|
|
} |
|
282
|
|
|
|
|
283
|
|
|
return $path; |
|
284
|
|
|
} |
|
285
|
|
|
|
|
286
|
|
|
/** |
|
287
|
|
|
* @param array|false $components |
|
288
|
|
|
* @return bool |
|
289
|
|
|
*/ |
|
290
|
|
|
private function isUrl($components) |
|
291
|
|
|
{ |
|
292
|
|
|
if (!is_array($components)) { |
|
293
|
|
|
return false; |
|
294
|
|
|
} |
|
295
|
|
|
|
|
296
|
|
|
return (!empty($components['path']) && !empty($components['host'])); |
|
297
|
|
|
} |
|
298
|
|
|
|
|
299
|
|
|
/** |
|
300
|
|
|
* @param string $path |
|
301
|
|
|
* @return string |
|
302
|
|
|
*/ |
|
303
|
|
|
private function getHashFromPath(&$path) |
|
304
|
|
|
{ |
|
305
|
|
|
$hash = ''; |
|
306
|
|
|
|
|
307
|
|
|
if (($pos = strpos($path, '#')) !== false) { |
|
308
|
|
|
$hash = htmlspecialchars(substr($path, $pos + 1)); |
|
309
|
|
|
$path = substr($path, 0, $pos); |
|
310
|
|
|
} |
|
311
|
|
|
|
|
312
|
|
|
return $hash; |
|
313
|
|
|
} |
|
314
|
|
|
|
|
315
|
|
|
/** |
|
316
|
|
|
* Get host without "www" at the beginning. |
|
317
|
|
|
* |
|
318
|
|
|
* @param string $host |
|
319
|
|
|
* @return string |
|
320
|
|
|
*/ |
|
321
|
|
|
private function getHost(string $host): string |
|
322
|
|
|
{ |
|
323
|
|
|
$parts = explode('.', $host); |
|
324
|
|
|
|
|
325
|
|
|
if ($parts[0] === 'www') { |
|
326
|
|
|
array_shift($parts); |
|
327
|
|
|
} |
|
328
|
|
|
|
|
329
|
|
|
return implode('.', $parts); |
|
330
|
|
|
} |
|
331
|
|
|
} |
|
332
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.