Helper   A
last analyzed

Complexity

Total Complexity 5

Size/Duplication

Total Lines 36
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 5
eloc 13
dl 0
loc 36
c 0
b 0
f 0
rs 10

2 Methods

Rating   Name   Duplication   Size   Complexity  
A textNormalise() 0 4 1
A getCleanedUrl() 0 16 4
1
<?php declare(strict_types=1);
2
3
namespace Goose\Utils;
4
5
use Goose\Exceptions\MalformedURLException;
6
7
/**
8
 * Helper
9
 *
10
 * @package Goose\Utils
11
 * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License 2.0
12
 */
13
class Helper {
14
    /**
15
     * @todo Re-factor result into class
16
     *
17
     * @param string $urlToCrawl
18
     *
19
     * @return object
20
     */
21
    public static function getCleanedUrl($urlToCrawl) {
22
        $parts = parse_url($urlToCrawl);
23
24
        if ($parts === false) {
25
            throw new MalformedURLException($urlToCrawl . ' - is a malformed URL and cannot be processed');
26
        }
27
28
        $prefix = isset($parts['query']) && $parts['query'] ? '&' : '?';
29
30
        $finalUrl = str_replace('#!', $prefix . '_escaped_fragment_=', $urlToCrawl);
31
32
        return (object)[
33
            'url' => $urlToCrawl,
34
            'parts' => (object)$parts,
35
            'linkhash' => md5($urlToCrawl),
36
            'finalUrl' => $finalUrl,
37
        ];
38
    }
39
40
    /**
41
     * @param string $text
42
     *
43
     * @return string
44
     */
45
    public static function textNormalise($text) {
46
        $text = preg_replace('@[\n\r\s\t]+@', " ", $text);
47
48
        return trim($text);
49
    }
50
}
51