1
|
|
|
<?php
|
2
|
|
|
|
3
|
|
|
namespace Swader\Diffbot\Abstracts;
|
4
|
|
|
|
5
|
|
|
use Swader\Diffbot\Diffbot;
|
6
|
|
|
use Swader\Diffbot\Traits\DiffbotAware;
|
7
|
|
|
|
8
|
|
|
/**
|
9
|
|
|
* Class Api
|
10
|
|
|
* @package Swader\Diffbot\Abstracts
|
11
|
|
|
*/
|
12
|
|
|
abstract class Api implements \Swader\Diffbot\Interfaces\Api
|
13
|
|
|
{
|
14
|
|
|
/** @var int Timeout value in ms - defaults to 30s if empty */
|
15
|
|
|
private $timeout = 30000;
|
16
|
|
|
|
17
|
|
|
/** @var string The URL onto which to unleash the API in question */
|
18
|
|
|
private $url;
|
19
|
|
|
|
20
|
|
|
/** @var string API URL to which to send the request */
|
21
|
|
|
protected $apiUrl;
|
22
|
|
|
|
23
|
|
|
/** @var array Settings on which optional fields to fetch */
|
24
|
|
|
protected $fieldSettings = [];
|
25
|
|
|
|
26
|
|
|
/** @var array Other API specific options */
|
27
|
|
|
protected $otherOptions = [];
|
28
|
|
|
|
29
|
|
|
/** @var Diffbot The parent class which spawned this one */
|
30
|
|
|
protected $diffbot;
|
31
|
|
|
|
32
|
|
|
use DiffbotAware;
|
33
|
|
|
|
34
|
122 |
|
public function __construct($url)
|
35
|
|
|
{
|
36
|
122 |
|
if (strcmp($url, 'crawl') !== 0) {
|
37
|
78 |
|
$url = trim((string)$url);
|
38
|
78 |
|
if (strlen($url) < 4) {
|
39
|
3 |
|
throw new \InvalidArgumentException(
|
40
|
|
|
'URL must be a string of at least four characters in length'
|
41
|
3 |
|
);
|
42
|
|
|
}
|
43
|
|
|
|
44
|
75 |
|
$url = (isset(parse_url($url)['scheme'])) ? $url : "http://$url";
|
45
|
|
|
|
46
|
75 |
|
$filtered_url = filter_var($url, FILTER_VALIDATE_URL);
|
47
|
75 |
|
if (!$filtered_url) {
|
48
|
|
|
throw new \InvalidArgumentException(
|
49
|
|
|
'You provided an invalid URL: ' . $url
|
50
|
|
|
);
|
51
|
|
|
}
|
52
|
75 |
|
$url = $filtered_url;
|
53
|
75 |
|
}
|
54
|
119 |
|
$this->url = $url;
|
55
|
119 |
|
}
|
56
|
|
|
|
57
|
|
|
/**
|
58
|
|
|
* Setting the timeout will define how long Diffbot will keep trying
|
59
|
|
|
* to fetch the API results. A timeout can happen for various reasons, from
|
60
|
|
|
* Diffbot's failure, to the site being crawled being exceptionally slow,
|
61
|
|
|
* and more.
|
62
|
|
|
*
|
63
|
|
|
* @param int|null $timeout Defaults to 30000 even if not set
|
64
|
|
|
*
|
65
|
|
|
* @return $this
|
66
|
|
|
*/
|
67
|
12 |
|
public function setTimeout($timeout = null)
|
68
|
|
|
{
|
69
|
12 |
|
if ($timeout === null) {
|
70
|
1 |
|
$timeout = 30000;
|
71
|
1 |
|
}
|
72
|
12 |
|
if (!is_int($timeout)) {
|
73
|
3 |
|
throw new \InvalidArgumentException('Parameter is not an integer');
|
74
|
|
|
}
|
75
|
9 |
|
if ($timeout < 0) {
|
76
|
2 |
|
throw new \InvalidArgumentException(
|
77
|
|
|
'Parameter is negative. Only positive timeouts accepted.'
|
78
|
2 |
|
);
|
79
|
|
|
}
|
80
|
|
|
|
81
|
7 |
|
$this->timeout = $timeout;
|
82
|
|
|
|
83
|
7 |
|
return $this;
|
84
|
|
|
}
|
85
|
|
|
|
86
|
7 |
|
public function call()
|
87
|
|
|
{
|
88
|
7 |
|
$response = $this->diffbot->getHttpClient()->get($this->buildUrl());
|
89
|
|
|
|
90
|
7 |
|
return $this
|
91
|
|
|
->diffbot
|
92
|
7 |
|
->getEntityFactory()
|
93
|
7 |
|
->createAppropriateIterator($response);
|
94
|
|
|
}
|
95
|
|
|
|
96
|
79 |
|
public function buildUrl()
|
97
|
|
|
{
|
98
|
79 |
|
$url = rtrim($this->apiUrl, '/').'?';
|
99
|
|
|
|
100
|
79 |
|
if (strcmp($this->url,'crawl') !== 0) {
|
101
|
|
|
// Add Token
|
102
|
35 |
|
$url .= 'token=' . $this->diffbot->getToken();
|
103
|
|
|
|
104
|
|
|
// Add URL
|
105
|
35 |
|
$url .= '&url=' . urlencode($this->url);
|
106
|
35 |
|
}
|
107
|
|
|
|
108
|
79 |
|
$url .= '&timeout='.$this->timeout;
|
109
|
|
|
|
110
|
|
|
// Add Custom Fields
|
111
|
79 |
|
$fields = $this->fieldSettings;
|
112
|
79 |
|
$fieldString = '';
|
113
|
79 |
|
foreach ($fields as $field => $value) {
|
114
|
17 |
|
$fieldString .= ($value) ? $field . ',' : '';
|
115
|
79 |
|
}
|
116
|
79 |
|
$fieldString = trim($fieldString, ',');
|
117
|
79 |
|
if ($fieldString != '') {
|
118
|
17 |
|
$url .= '&fields=' . $fieldString;
|
119
|
17 |
|
}
|
120
|
|
|
|
121
|
|
|
// Add Other Options
|
122
|
79 |
|
foreach ($this->otherOptions as $option => $value) {
|
123
|
51 |
|
$url .= '&' . $option . '=' . $value;
|
124
|
79 |
|
}
|
125
|
|
|
|
126
|
79 |
|
return $url;
|
127
|
|
|
}
|
128
|
|
|
}
|
129
|
|
|
|