PageHoover   A
last analyzed

Complexity

Total Complexity 7

Size/Duplication

Total Lines 112
Duplicated Lines 0 %

Importance

Changes 7
Bugs 0 Features 0
Metric Value
eloc 36
c 7
b 0
f 0
dl 0
loc 112
rs 10
wmc 7

2 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 9 1
B downloadPage() 0 63 6
1
<?php
2
3
namespace Afsy\Component;
4
5
use GuzzleHttp\Client as GuzzleClient;
6
use Symfony\Component\DomCrawler\Crawler;
7
use OldSound\RabbitMqBundle\RabbitMq\Producer;
8
9
class PageHoover
10
{
11
    /**
12
     *  @var GuzzleClient
13
     */
14
    protected $client = null;
15
16
    /**
17
     *  @var array
18
     */
19
    protected $options = [];
20
21
    /**
22
     *  @var string
23
     */
24
    protected $downloadFolder = null;
25
26
    /**
27
     *  @var Producer
28
     */
29
    protected $downloadImageProducer = null;
30
31
    /**
32
     *  Main constructor.
33
     *
34
     *  @param (GuzzleClient) $client               Guzzle Client
35
     *  @param (Producer) $downloadImageProducer    Download image producer
36
     *  @param (array) $options                     Options list
37
     *
38
     *  @return (void)
39
     */
40
    public function __construct(GuzzleClient $client, Producer $downloadImageProducer, array $options)
41
    {
42
        // Initialize
43
        $this->client = $client;
44
        $this->options = $options;
45
        $this->downloadImageProducer = $downloadImageProducer;
46
47
        // Initialize options
48
        $this->downloadFolder = $options['downloadFolder'];
49
    }
50
51
    /**
52
     *  Download page method.
53
     *
54
     *  @param (string) $page       Page to download (url)
55
     *
56
     *  @return (boolean) Download status
57
     */
58
    public function downloadPage($page)
59
    {
60
        // Initialize
61
        $pageParts = pathinfo($page);
62
        $downloadFolder = $this->downloadFolder;
63
        $saveFile = $downloadFolder.date('Ymd-His').'-'.$pageParts['filename'].'.htm';
64
65
        // Download page
66
        $res = $this->client->get($page);
67
68
        // Check downloaded content
69
        if ($res->getStatusCode() !== 200) {
70
            return false;
71
        }
72
73
        // Get page content
74
        $pageContent = $res->getBody()->getContents();
75
76
        // Save page in downloadFolder
77
        if (!file_put_contents($saveFile, "\xEF\xBB\xBF".$pageContent)) {
78
            // Throw error
79
            throw new \Exception('Error saving file', 1);
80
        }
81
82
        // Initialize crawler
83
        $crawler = new Crawler($pageContent);
84
85
        // Get images list
86
        $images = $crawler->filter('img')->each(function(Crawler $image) {
87
            return $image->attr('src');
88
        });
89
90
        // Download images
91
        foreach ($images as $image) {
92
            // Initialize
93
            $image = str_replace(' ', '', $image);
94
            $imgExt = pathinfo($image, PATHINFO_EXTENSION);
95
            $hasHost = filter_var($image, FILTER_VALIDATE_URL, FILTER_FLAG_PATH_REQUIRED);
96
97
            // Check host
98
            if (!$hasHost) {
99
                $image = $pageParts['dirname'].$image;
100
            }
101
102
            // Check extension
103
            if (!in_array($imgExt, ['png', 'jpg', 'jpeg', 'gif'])) {
104
                $imgExt = 'png';
105
            }
106
107
            // Create image to publish
108
            $imgToPublish = [
109
                'url' => $image,
110
                'savePath' => $this->downloadFolder.pathinfo($image, PATHINFO_FILENAME).'.'.$imgExt,
111
                'savedHtmlFile' => $saveFile,
112
            ];
113
114
            // Publish image
115
            $sImg = serialize($imgToPublish);
116
            $this->downloadImageProducer->publish($sImg);
117
        }
118
119
        // Return status
120
        return true;
121
    }
122
}
123