Passed
Pull Request — master (#13)
by Matthias
02:07
created

WebClient::getOption()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 1
nc 2
nop 1
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace Vaites\ApacheTika\Clients;
4
5
use Exception;
6
7
use Vaites\ApacheTika\Client;
8
use Vaites\ApacheTika\Metadata\Metadata;
9
10
/**
11
 * Apache Tika web client
12
 *
13
 * @author  David Martínez <[email protected]>
14
 * @link    http://wiki.apache.org/tika/TikaJAXRS
15
 * @link    https://tika.apache.org/1.12/formats.html
16
 */
17
class WebClient extends Client
18
{
19
    const MODE = 'web';
20
21
    /**
22
     * Cached responses to avoid multiple request for the same file
23
     *
24
     * @var array
25
     */
26
    protected $cache = [];
27
28
    /**
29
     * Apache Tika server host
30
     *
31
     * @var string
32
     */
33
    protected $host = '127.0.0.1';
34
35
    /**
36
     * Apache Tika server port
37
     *
38
     * @var int
39
     */
40
    protected $port = 9998;
41
42
    /**
43
     * Apache Tika server base URL
44
     *
45
     * @var string
46
     */
47
    protected $baseUrl = null;
48
49
    /**
50
     * Number of retries on server error
51
     *
52
     * @var int
53
     */
54
    protected $retries = 3;
55
56
    /**
57
     * Default cURL options
58
     *
59
     * @var array
60
     */
61
    protected $options =
62
    [
63
        CURLINFO_HEADER_OUT    => true,
64
        CURLOPT_HTTPHEADER     => [],
65
        CURLOPT_PUT            => true,
66
        CURLOPT_RETURNTRANSFER => true,
67
        CURLOPT_TIMEOUT        => 5,
68
    ];
69
70
    /**
71
     * Configure class and test if server is running
72
     *
73
     * @param   string  $host
74
     * @param   int     $port
75
     * @param   array   $options
76
     * @throws  \Exception
77
     */
78
    public function __construct($host = null, $port = null, $options = [])
79
    {
80
        if($host)
81
        {
82
            $this->setHost($host);
83
        }
84
85
        if($port)
0 ignored issues
show
Bug Best Practice introduced by
The expression $port of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
86
        {
87
            $this->setPort($port);
88
        }
89
90
        if(!empty($options))
91
        {
92
            $this->setOptions($options);
93
        }
94
95
        $this->setDownloadRemote(true);
96
    }
97
98
    /**
99
     * Get the host
100
     *
101
     * @return  null|string
102
     */
103
    public function getHost()
104
    {
105
        return $this->host;
106
    }
107
108
    /**
109
     * Set the host
110
     *
111
     * @param   string  $host
112
     * @return  $this
113
     */
114
    public function setHost($host)
115
    {
116
        $this->host = $host;
117
118
        return $this;
119
    }
120
121
    /**
122
     * Get the port
123
     *
124
     * @return  null|int
125
     */
126
    public function getPort()
127
    {
128
        return $this->port;
129
    }
130
131
    /**
132
     * Set the port
133
     *
134
     * @param   int     $port
135
     * @return  $this
136
     */
137
    public function setPort($port)
138
    {
139
        $this->port = $port;
140
141
        return $this;
142
    }
143
144
    /**
145
     * Gets the base URL
146
     *
147
     * @return string
148
     */
149
    public function getBaseUrl()
150
    {
151
        if ($this->baseUrl !== null) {
152
            return $this->baseUrl;
153
        } else {
154
            return "http://{$this->host}:{$this->port}/";
155
        }
156
    }
157
158
    /**
159
     * Set the base URL
160
     *
161
     * @param string $baseUrl
162
     */
163
    public function setBaseUrl($baseUrl)
164
    {
165
        $this->baseUrl = $baseUrl;
166
    }
167
168
    /**
169
     * Get the number of retries
170
     *
171
     * @return  int
172
     */
173
    public function getRetries()
174
    {
175
        return $this->retries;
176
    }
177
178
    /**
179
     * Set the number of retries
180
     *
181
     * @param   int     $retries
182
     * @return  $this
183
     */
184
    public function setRetries($retries)
185
    {
186
        $this->retries = $retries;
187
188
        return $this;
189
    }
190
191
    /**
192
     * Get all the options
193
     *
194
     * @return  null|array
195
     */
196
    public function getOptions()
197
    {
198
        return $this->options;
199
    }
200
201
    /**
202
     * Get an specified option
203
     *
204
     * @param   string  $key
205
     * @return  mixed
206
     */
207
    public function getOption($key)
208
    {
209
        return isset($this->options[$key]) ? $this->options[$key] : null;
210
    }
211
212
    /**
213
     * Set a cURL option to be set with curl_setopt()
214
     *
215
     * @link    http://php.net/manual/en/curl.constants.php
216
     * @link    http://php.net/manual/en/function.curl-setopt.php
217
     * @param   string  $key
218
     * @param   mixed   $value
219
     * @return  $this
220
     * @throws  \Exception
221
     */
222
    public function setOption($key, $value)
223
    {
224
        if(in_array($key, [CURLINFO_HEADER_OUT, CURLOPT_PUT, CURLOPT_RETURNTRANSFER]))
225
        {
226
            throw new Exception("Value for cURL option $key cannot be modified", 3);
227
        }
228
229
        $this->options[$key] = $value;
230
231
        return $this;
232
    }
233
234
    /**
235
     * Set the cURL options
236
     *
237
     * @param   array   $options
238
     * @return  $this
239
     * @throws  \Exception
240
     */
241
    public function setOptions($options)
242
    {
243
        foreach($options as $key => $value)
244
        {
245
            $this->setOption($key, $value);
246
        }
247
248
        return $this;
249
    }
250
251
    /**
252
     * Get the timeout value for cURL
253
     *
254
     * @return  int
255
     */
256
    public function getTimeout()
257
    {
258
        return $this->getOption(CURLOPT_TIMEOUT);
259
    }
260
261
    /**
262
     * Set the timeout value for cURL
263
     *
264
     * @param   int     $value
265
     * @return  $this
266
     * @throws  \Exception
267
     */
268
    public function setTimeout($value)
269
    {
270
        $this->setOption(CURLOPT_TIMEOUT, (int) $value);
271
272
        return $this;
273
    }
274
275
    /**
276
     * Configure, make a request and return its results
277
     *
278
     * @param   string  $type
279
     * @param   string  $file
280
     * @return  string
281
     * @throws  \Exception
282
     */
283
    public function request($type, $file = null)
284
    {
285
        static $retries = [];
286
287
        // check if is cached
288
        if(isset($this->cache[sha1($file)][$type]))
289
        {
290
            return $this->cache[sha1($file)][$type];
291
        }
292
        elseif(!isset($retries[sha1($file)]))
293
        {
294
            $retries[sha1($file)] = $this->retries;
295
        }
296
297
        // parameters for cURL request
298
        list($resource, $headers) = $this->getParameters($type, $file);
299
300
        // check the request
301
        $file = parent::checkRequest($type, $file);
302
303
        // cURL options
304
        $options = $this->getCurlOptions($type, $file);
305
306
        // sets headers
307
        foreach($headers as $header)
308
        {
309
            $options[CURLOPT_HTTPHEADER][] = $header;
310
        }
311
312
        // cURL init and options
313
        $options[CURLOPT_URL] = $x = rtrim($this->getBaseUrl(), '/') . "/$resource";
0 ignored issues
show
Unused Code introduced by
The assignment to $x is dead and can be removed.
Loading history...
314
315
        // get the response and the HTTP status code
316
        list($response, $status) = $this->exec($options);
317
318
        // request completed successfully
319
        if($status == 200)
320
        {
321
            if($type == 'meta')
322
            {
323
                $response = Metadata::make($response, $file);
324
            }
325
326
            // cache certain responses
327
            if(in_array($type, ['lang', 'meta']))
328
            {
329
                $this->cache[sha1($file)][$type] = $response;
330
            }
331
        }
332
        // request completed successfully but result is empty
333
        elseif($status == 204)
334
        {
335
            $response = null;
336
        }
337
        // retry on request failed with error 500
338
        elseif($status == 500 && $retries[sha1($file)]--)
339
        {
340
            $response = $this->request($type, $file);
341
        }
342
        // other status code is an error
343
        else
344
        {
345
            $this->error($status, $resource);
346
        }
347
348
        return $response;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $response also could return the type Vaites\ApacheTika\Metada...\Metadata\ImageMetadata which is incompatible with the documented return type string.
Loading history...
349
    }
350
351
    /**
352
     * Make a request to Apache Tika Server
353
     *
354
     * @param   array   $options
355
     * @return  array
356
     * @throws  \Exception
357
     */
358
    protected function exec(array $options = [])
359
    {
360
        // cURL init and options
361
        $curl = curl_init();
362
363
        // we avoid curl_setopt_array($curl, $options) because extrange Windows behaviour (issue #8)
364
        foreach($options as $option => $value)
365
        {
366
            curl_setopt($curl, $option, $value);
367
        }
368
369
        // make the request
370
        if(is_null($this->callback))
371
        {
372
            $this->response = curl_exec($curl);
373
        }
374
        else
375
        {
376
            $this->response = '';
377
            curl_exec($curl);
378
        }
379
380
        // exception if cURL fails
381
        if(curl_errno($curl))
382
        {
383
            throw new Exception(curl_error($curl), curl_errno($curl));
384
        }
385
386
        // return the response and the status code
387
        return [trim($this->response), curl_getinfo($curl, CURLINFO_HTTP_CODE)];
388
    }
389
390
    /**
391
     * Throws an exception for an error status code
392
     *
393
     * @codeCoverageIgnore
394
     *
395
     * @param   int       $status
396
     * @param   string    $resource
397
     * @throws  \Exception
398
     */
399
    protected function error($status, $resource)
400
    {
401
        switch($status)
402
        {
403
            //  method not allowed
404
            case 405:
405
                throw new Exception('Method not allowed', 405);
406
                break;
407
408
            //  unsupported media type
409
            case 415:
410
                throw new Exception('Unsupported media type', 415);
411
                break;
412
413
            //  unprocessable entity
414
            case 422:
415
                throw new Exception('Unprocessable document', 422);
416
                break;
417
418
            // server error
419
            case 500:
420
                throw new Exception('Error while processing document', 500);
421
                break;
422
423
            // unexpected
424
            default:
425
                throw new Exception("Unexpected response for /$resource ($status)", 501);
426
        }
427
    }
428
429
    /**
430
     * Get the parameters to make the request
431
     *
432
     * @link    https://wiki.apache.org/tika/TikaJAXRS#Specifying_a_URL_Instead_of_Putting_Bytes
433
     * @param   string  $type
434
     * @param   string  file
0 ignored issues
show
Bug introduced by
The type Vaites\ApacheTika\Clients\file was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
435
     * @return  array
436
     * @throws  \Exception
437
     */
438
    protected function getParameters($type, $file = null)
439
    {
440
        $headers = [];
441
442
        if(!empty($file) && preg_match('/^http/', $file))
443
        {
444
            $headers[] = "fileUrl:$file";
445
        }
446
447
        switch($type)
448
        {
449
            case 'html':
450
                $resource = 'tika';
451
                $headers[] = 'Accept: text/html';
452
                break;
453
454
            case 'lang':
455
                $resource = 'language/stream';
456
                break;
457
458
            case 'mime':
459
                $name = basename($file);
460
                $resource = 'detect/stream';
461
                $headers[] = "Content-Disposition: attachment, filename=$name";
462
                break;
463
464
            case 'meta':
465
                $resource = 'meta';
466
                $headers[] = 'Accept: application/json';
467
                break;
468
469
            case 'text':
470
                $resource = 'tika';
471
                $headers[] = 'Accept: text/plain';
472
                break;
473
474
            case 'text-main':
475
                $resource = 'tika/main';
476
                $headers[] = 'Accept: text/plain';
477
                break;
478
479
            case 'detectors':
480
            case 'parsers':
481
            case 'mime-types':
482
            case 'version':
483
                $resource = $type;
484
                break;
485
486
            default:
487
                throw new Exception("Unknown type $type");
488
        }
489
490
        return [$resource, $headers];
491
    }
492
493
    /**
494
     * Get the cURL options
495
     *
496
     * @param   string  $type
497
     * @param   string  file
498
     * @return  array
499
     * @throws  \Exception
500
     */
501
    protected function getCurlOptions($type, $file = null)
502
    {
503
        // base options
504
        $options = $this->options;
505
506
        // callback
507
        if(!is_null($this->callback))
508
        {
509
            $callback = $this->callback;
510
511
            $options[CURLOPT_WRITEFUNCTION] = function($handler, $data) use($callback)
512
            {
513
                $this->response .= $data;
514
515
                $callback($data);
516
517
                // safe because cURL must receive the number of *bytes* written
518
                return strlen($data);
519
            };
520
        }
521
522
        // remote file options
523
        if($file && preg_match('/^http/', $file))
524
        {
525
            //
526
        }
527
        // local file options
528
        elseif($file && file_exists($file) && is_readable($file))
529
        {
530
            $options[CURLOPT_INFILE] = fopen($file, 'r');
531
            $options[CURLOPT_INFILESIZE] = filesize($file);
532
        }
533
        // other options for specific requests
534
        elseif(in_array($type,  ['detectors', 'mime-types', 'parsers', 'version']))
535
        {
536
            $options[CURLOPT_PUT] = false;
537
        }
538
        // file not accesible
539
        else
540
        {
541
            throw new Exception("File $file can't be opened");
542
        }
543
544
        return $options;
545
    }
546
}
547