Completed
Push — master ( 74c577...c1ce98 )
by Kirill
06:10
created

UrlFinder::getPattern()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %
Metric Value
dl 0
loc 4
rs 10
cc 1
eloc 2
nc 1
nop 0
1
<?php
2
declare(strict_types = 1);
3
/**
4
 * This file is part of GitterBot package.
5
 *
6
 * @author Serafim <[email protected]>
7
 * @date 28.03.2016 23:19
8
 *
9
 * For the full copyright and license information, please view the LICENSE
10
 * file that was distributed with this source code.
11
 */
12
13
14
namespace Domains\Analyser;
15
16
use Core\Lazy\Fetch;
17
use Domains\Message\Message;
18
use Domains\Message\Url;
19
use Illuminate\Database\Eloquent\Model;
20
21
/**
22
 * Class UrlFinder
23
 * @package Domains\Analyser
24
 */
25
class UrlFinder implements Analyser
26
{
27
    /**
28
     * @var Model
29
     */
30
    private $entity;
31
32
    /**
33
     * @var string
34
     */
35
    private $table;
36
37
    /**
38
     * UrlAnalyser constructor.
39
     */
40
    public function __construct()
41
    {
42
        $this->entity = new Url;
43
        $this->table = $this->entity->getTable();
44
    }
45
46
    /**
47
     * @return $this|Analyser
48
     * @throws \Exception
49
     */
50
    public function clear() : Analyser
51
    {
52
        $this->entity->delete();
53
        return $this;
54
    }
55
56
    /**
57
     * @param \Closure|null $progress
58
     * @return $this|Analyser
59
     */
60
    public function analyse(\Closure $progress = null) : Analyser
61
    {
62
        // Add hidden urls
63
        $response = new Fetch(Message::query());
64
65
        /** @var Message $message */
66
        foreach ($response as $i => $message) {
67
            $matches = $this->getUrls($message->text);
68
69
            if (count($matches)) {
70
                $urls = [];
71
                foreach ($matches as $url) {
72
                    $urls[] = new Url(['url' => $url]);
73
                }
74
75
                $message->urls()->saveMany($urls);
76
77
                if ($progress !== null) {
78
                    $progress($message, $matches, $i++);
79
                }
80
            }
81
        }
82
83
        return $this;
84
    }
85
86
    /**
87
     * @param string $text
88
     * @return mixed
89
     */
90
    private function getUrls(string $text)
91
    {
92
        // TODO Add support for non-latin domains
93
        // Current RFC 1738
94
        $pattern = static::getPattern();
95
        preg_match_all($pattern, $text . ' ', $matches, PREG_PATTERN_ORDER);
96
97
        return $matches[1];
98
    }
99
100
    /**
101
     * @return string
102
     */
103
    public static function getPattern()
104
    {
105
        return '/([a-z]{2,5}:\/\/[a-z]+\.[a-z]{2,}[\w\/\?=%#\-&:\$\.\+\!\*]+)(?:\s|\n)/iu';
106
    }
107
}
108