Job::getJobCompletionTimeUTC()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 5
ccs 3
cts 3
cp 1
rs 10
c 0
b 0
f 0
cc 2
nc 2
nop 0
crap 2
1
<?php
2
3
namespace Swader\Diffbot\Abstracts;
4
5
use Swader\Diffbot\Exceptions\DiffbotException;
6
7
abstract class Job extends Entity
8
{
9
10
    /**
11
     * Returns the name of the crawljob
12
     * @return string
13
     */
14 1
    public function getName()
15
    {
16 1
        return (string)$this->data['name'];
17
    }
18
19
    /**
20
     * Should always return either "crawl" or "bulk"
21
     * @return string
22
     */
23 2
    public function getType()
24
    {
25 2
        return $this->data['type'];
26
    }
27
28
    /**
29
     * Timestamp of job creation
30
     *
31
     * @return int
32
     */
33 1
    public function getJobCreationTimeUTC()
34
    {
35 1
        return (isset($this->data['jobCreationTimeUTC']))
36 1
            ? (int)$this->data['jobCreationTimeUTC'] : null;
37
    }
38
39
    /**
40
     * Timestamp of job completion
41
     *
42
     * @return int
43
     */
44 1
    public function getJobCompletionTimeUTC()
45
    {
46 1
        return (isset($this->data['jobCompletionTimeUTC']))
47 1
            ? (int)$this->data['jobCompletionTimeUTC'] : null;
48
    }
49
50
    /**
51
     * Possible statuses
52
     *
53
     * 0    Job is initializing
54
     * 1    Job has reached maxRounds limit
55
     * 2    Job has reached maxToCrawl limit
56
     * 3    Job has reached maxToProcess limit
57
     * 4    Next round to start in _____ seconds
58
     * 5    No URLs were added to the crawl
59
     * 6    Job paused
60
     * 7    Job in progress
61
     * 8    All crawling temporarily paused by root administrator for maintenance.
62
     * 9    Job has completed and no repeat is scheduled
63
     *
64
     * @return array
65
     */
66 3
    public function getJobStatus()
67
    {
68 3
        return (isset($this->data['jobStatus']))
69 3
            ? $this->data['jobStatus'] : [];
70
    }
71
72
    /**
73
     * True or false, depending on whether "job complete" notification was sent
74
     *
75
     * @return bool
76
     */
77 1
    public function getNotificationSent()
78
    {
79 1
        return (bool)$this->data['sentJobDoneNotification'];
80
    }
81
82
    /**
83
     * Number of objects found
84
     *
85
     * @return int
86
     */
87 2
    public function getObjectsFound()
88
    {
89 2
        return (int)$this->data['objectsFound'];
90
    }
91
92
    /**
93
     * Number of URLs harvested
94
     *
95
     * @return int
96
     */
97 2
    public function getUrlsHarvested()
98
    {
99 2
        return (int)$this->data['urlsHarvested'];
100
    }
101
102
    /**
103
     * Returns an array with information about crawls - total attempts,
104
     * successes, and successes this round
105
     *
106
     * @return array
107
     */
108 3
    public function getPageCrawlInfo()
109
    {
110
        return [
111 3
            'attempts' => $this->data['pageCrawlAttempts'],
112 3
            'successes' => $this->data['pageCrawlSuccesses'],
113 3
            'successesThisRound' => $this->data['pageCrawlSuccessesThisRound']
114 3
        ];
115
    }
116
117
    /**
118
     * Returns an array with information about crawls - total attempts,
119
     * successes, and successes this round
120
     *
121
     * @return array
122
     */
123 2
    public function getPageProcessInfo()
124
    {
125
        return [
126 2
            'attempts' => $this->data['pageProcessAttempts'],
127 2
            'successes' => $this->data['pageProcessSuccesses'],
128 2
            'successesThisRound' => $this->data['pageProcessSuccessesThisRound']
129 2
        ];
130
    }
131
132
    /**
133
     * The maximum number of crawl repeats. By default (maxRounds=0) repeating
134
     * crawls will continue indefinitely.
135
     *
136
     * @return int
137
     */
138 1
    public function getMaxRounds()
139
    {
140 1
        return (int)$this->data['maxRounds'];
141
    }
142
143
    /**
144
     * The number of days as a floating-point (e.g. repeat=7.0) to repeat this
145
     * crawl. By default crawls will not be repeated.
146
     *
147
     * @return float
148
     */
149 1
    public function getRepeat()
150
    {
151 1
        return (float)$this->data['repeat'];
152
    }
153
154
    /**
155
     * Wait this many seconds between each URL crawled from a single IP address.
156
     * Number of seconds as an integer or floating-point number
157
     * (e.g., crawlDelay=0.25).
158
     *
159
     * @return float
160
     */
161 1
    public function getCrawlDelay()
162
    {
163 1
        return (float)$this->data['crawlDelay'];
164
    }
165
166
    /**
167
     * Whether or not the job was set to respect robots.txt
168
     *
169
     * @return bool
170
     */
171 1
    public function getObeyRobots()
172
    {
173 1
        return (bool)$this->data['obeyRobots'];
174
    }
175
176
    /**
177
     * How many rounds were completed with the job so far
178
     *
179
     * @return int
180
     */
181 1
    public function getRoundsCompleted()
182
    {
183 1
        return (int)$this->data['roundsCompleted'];
184
    }
185
186
    /**
187
     * Returns timestamp of when next crawl round is about to start or 0 if none
188
     *
189
     * @return int
190
     */
191 2
    public function getRoundStartTime()
192
    {
193 2
        return (int)$this->data['roundStartTime'];
194
    }
195
196
    /**
197
     * Returns timestamp of current time
198
     *
199
     * @return int
200
     */
201 2
    public function getCurrentTime()
202
    {
203 2
        return (int)$this->data['currentTime'];
204
    }
205
206
    /**
207
     * Returns timestamp of current time, UTC.
208
     * Should be the same as getCurrentTime
209
     *
210
     * @return int
211
     */
212 1
    public function getCurrentTimeUTC()
213
    {
214 1
        return (int)$this->data['currentTimeUTC'];
215
    }
216
217
    /**
218
     * The API URL is the URL of the API used to process pages found in the
219
     * crawl. If the job was created with this Diffbot lib, then it was
220
     * automatically built from a pre-configured API instance
221
     *
222
     * The API URL will be URL decoded, whereas it is submitted encoded.
223
     *
224
     * @return string
225
     */
226 1
    public function getApiUrl()
227
    {
228 1
        return (string)$this->data['apiUrl'];
229
    }
230
231
    /**
232
     * @see \Swader\Diffbot\Api\Crawl::setUrlCrawlPattern
233
     * @return string
234
     */
235 1
    public function getUrlCrawlPattern()
236
    {
237 1
        return (string)$this->data['urlCrawlPattern'];
238
    }
239
240
    /**
241
     * @see \Swader\Diffbot\Api\Crawl::setUrlProcessPattern
242
     * @return string
243
     */
244 1
    public function getUrlProcessPattern()
245
    {
246 1
        return (string)$this->data['urlProcessPattern'];
247
    }
248
249
    /**
250
     * @see \Swader\Diffbot\Api\Crawl::setPageProcessPattern
251
     * @return string
252
     */
253 1
    public function getPageProcessPattern()
254
    {
255 1
        return (string)$this->data['pageProcessPattern'];
256
    }
257
258
    /**
259
     * @see \Swader\Diffbot\Api\Crawl::setUrlCrawlRegex
260
     *
261
     * @return string
262
     */
263 1
    public function getUrlCrawlRegex()
264
    {
265 1
        return (string)$this->data['urlCrawlRegEx'];
266
    }
267
268
    /**
269
     * @see \Swader\Diffbot\Api\Crawl::setUrlProcessRegex
270
     *
271
     * @return string
272
     */
273 1
    public function getUrlProcessRegex()
274
    {
275 1
        return (string)$this->data['urlProcessRegEx'];
276
    }
277
278
    /**
279
     * @see \Swader\Diffbot\Api\Crawl::setMaxHops
280
     *
281
     * @return int
282
     */
283 1
    public function getMaxHops()
284
    {
285 1
        return (int)$this->data['maxHops'];
286
    }
287
288
    /**
289
     * Returns the link to the dataset the job produced.
290
     *
291
     * Accepted arguments are: "json", "csv" and "debug".
292
     * It is important to be aware of the difference between the types.
293
     * See "Retrieving Bulk Data" in link.
294
     *
295
     * @see https://www.diffbot.com/dev/docs/crawl/api.jsp
296
     *
297
     * @param string $type
298
     * @return string
299
     * @throws DiffbotException
300
     */
301 2
    public function getDownloadUrl($type = "json")
302
    {
303
        switch ($type) {
304 2
            case "json":
305 1
                return $this->data['downloadJson'];
306 2
            case "debug":
307 1
                return $this->data['downloadUrls'];
308 2
            case "csv":
309 1
                return rtrim($this->data['downloadJson'], '.json') . '.csv';
310 1
            default:
311 1
                break;
312 1
        }
313
314 1
        throw new \InvalidArgumentException(
315
            'Only json, debug, or csv download link available. You asked for: '
316 1
            . $type);
317
    }
318
319
    /**
320
     * Returns the email that was set to be notified after job's completion
321
     *
322
     * @return string
323
     */
324 1
    public function getNotifyEmail()
325
    {
326 1
        return (string)$this->data['notifyEmail'];
327
    }
328
329
    /**
330
     * Returns the webhook that was set to be pinged after job's completion
331
     *
332
     * @return string
333
     */
334 1
    public function getNotifyWebhook()
335
    {
336 1
        return (string)$this->data['notifyWebhook'];
337
    }
338
}