Browser::getHost()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 4
c 0
b 0
f 0
ccs 0
cts 4
cp 0
rs 10
cc 1
eloc 2
nc 1
nop 0
crap 2
1
<?php
2
/**
3
 * AnimeDb package.
4
 *
5
 * @author    Peter Gribanov <[email protected]>
6
 * @copyright Copyright (c) 2011, Peter Gribanov
7
 * @license   http://opensource.org/licenses/GPL-3.0 GPL v3
8
 */
9
10
namespace AnimeDb\Bundle\WorldArtFillerBundle\Service;
11
12
use Guzzle\Http\Client;
13
use Symfony\Component\HttpFoundation\Request;
14
15
class Browser
16
{
17
    /**
18
     * Default HTTP User-Agent.
19
     *
20
     * @var string
21
     */
22
    const DEFAULT_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36';
23
24
    /**
25
     * Browser.
26
     *
27
     * @var \Guzzle\Http\Client
28
     */
29
    protected $browser;
30
31
    /**
32
     * Request.
33
     *
34
     * @var \Symfony\Component\HttpFoundation\Request
35
     */
36
    protected $request;
37
38
    /**
39
     * HTTP host.
40
     *
41
     * @var string
42
     */
43
    protected $host;
44
45
    /**
46
     * Browser timeout.
47
     *
48
     * @var int
49
     */
50
    protected $timeout;
51
52
    /**
53
     * Browser proxy list.
54
     *
55
     * @var array
56
     */
57
    protected $proxy_list;
58
59
    /**
60
     * Construct.
61
     *
62
     * @param string $host
63
     * @param int    $timeout
64
     * @param array  $proxy_list
65
     */
66
    public function __construct($host, $timeout, array $proxy_list)
67
    {
68
        $this->host = $host;
69
        $this->proxy_list = $proxy_list;
70
        $this->timeout = $timeout;
71
    }
72
73
    /**
74
     * Get host.
75
     *
76
     * @return string
77
     */
78
    public function getHost()
79
    {
80
        return $this->host;
81
    }
82
83
    /**
84
     * Set request.
85
     *
86
     * @param \Symfony\Component\HttpFoundation\Request $request
87
     */
88
    public function setRequest(Request $request = null)
89
    {
90
        $this->request = $request;
91
        // try to set User-Agent from original request
92
        if ($request && $this->browser) {
93
            $this->browser->setDefaultHeaders([
0 ignored issues
show
Deprecated Code introduced by
The method Guzzle\Http\Client::setDefaultHeaders() has been deprecated.

This method has been deprecated.

Loading history...
94
                'User-Agent' => $request->server->get('HTTP_USER_AGENT', self::DEFAULT_USER_AGENT),
95
            ]);
96
        }
97
    }
98
99
    /**
100
     * Get DOMDocument from path.
101
     *
102
     * Receive content from the URL, cleaning using Tidy and creating DOM document
103
     *
104
     * @param string $path
105
     *
106
     * @return \DOMDocument|null
107
     */
108
    public function getDom($path)
109
    {
110
        $dom = new \DOMDocument('1.0', 'utf8');
111
        if (($content = $this->getContent($path)) && $dom->loadHTML($content)) {
112
            return $dom;
113
        } else {
114
            return;
115
        }
116
    }
117
118
    /**
119
     * Get content from path.
120
     *
121
     * Receive content from the URL and cleaning using Tidy
122
     *
123
     * @param string $path
124
     *
125
     * @return string
126
     */
127
    public function getContent($path)
128
    {
129
        /* @var $response \Guzzle\Http\Message\Response */
130
        $response = $this->getBrowser()->get($path)->send();
131
        if ($response->isError()) {
132
            throw new \RuntimeException('Failed to query the server '.$this->host);
133
        }
134
        if ($response->getStatusCode() !== 200 || !($html = $response->getBody(true))) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison !== seems to always evaluate to true as the types of $response->getStatusCode() (string) and 200 (integer) can never be identical. Maybe you want to use a loose comparison != instead?
Loading history...
135
            return;
136
        }
137
        $html = iconv('windows-1251', 'utf-8', $html);
138
139
        // clean content
140
        $config = [
141
            'output-xhtml' => true,
142
            'indent' => true,
143
            'indent-spaces' => 0,
144
            'fix-backslash' => true,
145
            'hide-comments' => true,
146
            'drop-empty-paras' => true,
147
            'wrap' => false,
148
        ];
149
        $tidy = new \tidy();
150
        $tidy->parseString($html, $config, 'utf8');
151
        $tidy->cleanRepair();
152
        $html = $tidy->root()->value;
0 ignored issues
show
Bug introduced by
The property value does not seem to exist in tidyNode.

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
153
        // ignore blocks
154
        $html = preg_replace('/<noembed>.*?<\/noembed>/is', '', $html);
155
        $html = preg_replace('/<noindex>.*?<\/noindex>/is', '', $html);
156
        // remove noembed
157
        return $html;
158
    }
159
160
    /**
161
     * Get HTTP browser.
162
     *
163
     * @param \Guzzle\Http\Client
164
     */
165
    protected function getBrowser()
166
    {
167
        if (!($this->browser instanceof Client)) {
168
            $this->browser = new Client($this->host);
169
170
            // try to set User-Agent from original request
171
            $user_agent = self::DEFAULT_USER_AGENT;
172
            if ($this->request) {
173
                $user_agent = $this->request->server->get('HTTP_USER_AGENT', self::DEFAULT_USER_AGENT);
174
            }
175
            $this->browser->setDefaultHeaders(['User-Agent' => $user_agent]);
0 ignored issues
show
Deprecated Code introduced by
The method Guzzle\Http\Client::setDefaultHeaders() has been deprecated.

This method has been deprecated.

Loading history...
176
177
            // configure browser client
178
            $this->browser->setDefaultOption('timeout', $this->timeout);
179
            if ($this->proxy_list) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->proxy_list of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
180
                $this->browser->setDefaultOption('proxy', $this->proxy_list[array_rand($this->proxy_list)]);
181
            }
182
        }
183
184
        return $this->browser;
185
    }
186
}
187