Completed
Push — master ( ab76ce...ee0b8d )
by David
22s queued 11s
created

WebClient   F

Complexity

Total Complexity 70

Size/Duplication

Total Lines 559
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 159
dl 0
loc 559
rs 2.8
c 0
b 0
f 0
wmc 70

21 Methods

Rating   Name   Duplication   Size   Complexity  
A getOption() 0 3 2
A getUrl() 0 3 2
A getRetries() 0 3 1
A getOptions() 0 3 1
A setUrl() 0 12 2
A check() 0 8 2
A setRetries() 0 5 1
B __construct() 0 26 7
A setOption() 0 10 2
A setTimeout() 0 5 1
A getPort() 0 3 1
A getTimeout() 0 3 1
A setHost() 0 5 1
A setOptions() 0 8 2
A setPort() 0 5 1
A getHost() 0 3 1
C request() 0 73 12
C getParameters() 0 53 13
A exec() 0 30 4
B getCurlOptions() 0 44 8
A error() 0 27 5

How to fix   Complexity   

Complex Class

Complex classes like WebClient often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use WebClient, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
namespace Vaites\ApacheTika\Clients;
4
5
use Exception;
6
7
use Vaites\ApacheTika\Client;
8
use Vaites\ApacheTika\Metadata\Metadata;
9
10
/**
11
 * Apache Tika web client
12
 *
13
 * @author  David Martínez <[email protected]>
14
 * @link    http://wiki.apache.org/tika/TikaJAXRS
15
 * @link    https://tika.apache.org/1.12/formats.html
16
 */
17
class WebClient extends Client
18
{
19
    const MODE = 'web';
20
21
    /**
22
     * Cached responses to avoid multiple request for the same file
23
     *
24
     * @var array
25
     */
26
    protected $cache = [];
27
28
    /**
29
     * Apache Tika server host
30
     *
31
     * @var string
32
     */
33
    protected $host = null;
34
35
    /**
36
     * Apache Tika server port
37
     *
38
     * @var int
39
     */
40
    protected $port = null;
41
42
    /**
43
     * Number of retries on server error
44
     *
45
     * @var int
46
     */
47
    protected $retries = 3;
48
49
    /**
50
     * Default cURL options
51
     *
52
     * @var array
53
     */
54
    protected $options =
55
    [
56
        CURLINFO_HEADER_OUT    => true,
57
        CURLOPT_HTTPHEADER     => [],
58
        CURLOPT_PUT            => true,
59
        CURLOPT_RETURNTRANSFER => true,
60
        CURLOPT_TIMEOUT        => 5
61
    ];
62
63
    /**
64
     * Configure class and test if server is running
65
     *
66
     * @param   string  $host
67
     * @param   int     $port
68
     * @param   array   $options
69
     * @throws  \Exception
70
     */
71
    public function __construct($host = null, $port = null, $options = [])
72
    {
73
        if(is_string($host) && filter_var($host, FILTER_VALIDATE_URL))
74
        {
75
            $this->setUrl($host);
76
        }
77
        elseif($host)
78
        {
79
            $this->setHost($host);
80
        }
81
82
        if(is_numeric($port))
83
        {
84
            $this->setPort($port);
85
        }
86
87
        if(!empty($options))
88
        {
89
            $this->setOptions($options);
90
        }
91
92
        $this->setDownloadRemote(true);
93
94
        if(self::$check === true)
95
        {
96
            $this->check();
97
        }
98
    }
99
100
    /**
101
     * Get the base URL
102
     *
103
     * @return string
104
     */
105
    public function getUrl()
106
    {
107
        return sprintf('http://%s:%d', $this->host, $this->port ?: 9998);
108
    }
109
110
    /**
111
     * Set the host and port using an URL
112
     *
113
     * @param   string  $url
114
     * @return $this
115
     */
116
    public function setUrl($url)
117
    {
118
        $url = parse_url($url);
119
120
        $this->setHost($url['host']);
121
122
        if(isset($url['port']))
123
        {
124
            $this->setPort($url['port']);
125
        }
126
127
        return $this;
128
    }
129
130
    /**
131
     * Get the host
132
     *
133
     * @return  null|string
134
     */
135
    public function getHost()
136
    {
137
        return $this->host;
138
    }
139
140
    /**
141
     * Set the host
142
     *
143
     * @param   string  $host
144
     * @return  $this
145
     */
146
    public function setHost($host)
147
    {
148
        $this->host = $host;
149
150
        return $this;
151
    }
152
153
    /**
154
     * Get the port
155
     *
156
     * @return  null|int
157
     */
158
    public function getPort()
159
    {
160
        return $this->port;
161
    }
162
163
    /**
164
     * Set the port
165
     *
166
     * @param   int     $port
167
     * @return  $this
168
     */
169
    public function setPort($port)
170
    {
171
        $this->port = $port;
172
173
        return $this;
174
    }
175
176
    /**
177
     * Get the number of retries
178
     *
179
     * @return  int
180
     */
181
    public function getRetries()
182
    {
183
        return $this->retries;
184
    }
185
186
    /**
187
     * Set the number of retries
188
     *
189
     * @param   int     $retries
190
     * @return  $this
191
     */
192
    public function setRetries($retries)
193
    {
194
        $this->retries = $retries;
195
196
        return $this;
197
    }
198
199
    /**
200
     * Get all the options
201
     *
202
     * @return  null|array
203
     */
204
    public function getOptions()
205
    {
206
        return $this->options;
207
    }
208
209
    /**
210
     * Get an specified option
211
     *
212
     * @param   string  $key
213
     * @return  mixed
214
     */
215
    public function getOption($key)
216
    {
217
        return isset($this->options[$key]) ? $this->options[$key] : null;
218
    }
219
220
    /**
221
     * Set a cURL option to be set with curl_setopt()
222
     *
223
     * @link    http://php.net/manual/en/curl.constants.php
224
     * @link    http://php.net/manual/en/function.curl-setopt.php
225
     * @param   string  $key
226
     * @param   mixed   $value
227
     * @return  $this
228
     * @throws  \Exception
229
     */
230
    public function setOption($key, $value)
231
    {
232
        if(in_array($key, [CURLINFO_HEADER_OUT, CURLOPT_PUT, CURLOPT_RETURNTRANSFER]))
233
        {
234
            throw new Exception("Value for cURL option $key cannot be modified", 3);
235
        }
236
237
        $this->options[$key] = $value;
238
239
        return $this;
240
    }
241
242
    /**
243
     * Set the cURL options
244
     *
245
     * @param   array   $options
246
     * @return  $this
247
     * @throws  \Exception
248
     */
249
    public function setOptions($options)
250
    {
251
        foreach($options as $key => $value)
252
        {
253
            $this->setOption($key, $value);
254
        }
255
256
        return $this;
257
    }
258
259
    /**
260
     * Get the timeout value for cURL
261
     *
262
     * @return  int
263
     */
264
    public function getTimeout()
265
    {
266
        return $this->getOption(CURLOPT_TIMEOUT);
267
    }
268
269
    /**
270
     * Set the timeout value for cURL
271
     *
272
     * @param   int     $value
273
     * @return  $this
274
     * @throws  \Exception
275
     */
276
    public function setTimeout($value)
277
    {
278
        $this->setOption(CURLOPT_TIMEOUT, (int) $value);
279
280
        return $this;
281
    }
282
283
    /**
284
     * Check if server is running
285
     *
286
     * @throws \Exception
287
     */
288
    public function check()
289
    {
290
        if(self::isChecked() === false)
291
        {
292
            self::setChecked(true);
293
294
            // throws an exception if server is unreachable or can't connect
295
            $this->request('version');
296
        }
297
    }
298
299
    /**
300
     * Configure, make a request and return its results
301
     *
302
     * @param   string  $type
303
     * @param   string  $file
304
     * @return  string
305
     * @throws  \Exception
306
     */
307
    public function request($type, $file = null)
308
    {
309
        static $retries = [];
310
311
        // check if not checked
312
        $this->check();
313
314
        // check if is cached
315
        if(isset($this->cache[sha1($file)][$type]))
316
        {
317
            return $this->cache[sha1($file)][$type];
318
        }
319
        elseif(!isset($retries[sha1($file)]))
320
        {
321
            $retries[sha1($file)] = $this->retries;
322
        }
323
324
        // parameters for cURL request
325
        list($resource, $headers) = $this->getParameters($type, $file);
326
327
        // check the request
328
        $file = parent::checkRequest($type, $file);
329
330
        // cURL options
331
        $options = $this->getCurlOptions($type, $file);
332
333
        // sets headers
334
        foreach($headers as $header)
335
        {
336
            $options[CURLOPT_HTTPHEADER][] = $header;
337
        }
338
339
        // cURL init and options
340
        $options[CURLOPT_URL] = $this->getUrl() . "/$resource";
341
342
        // get the response and the HTTP status code
343
        list($response, $status) = $this->exec($options);
344
345
        if ($file && is_resource($options[CURLOPT_INFILE])){
346
            fclose($options[CURLOPT_INFILE]);
347
        }
348
349
        // request completed successfully
350
        if($status == 200)
351
        {
352
            if($type == 'meta')
353
            {
354
                $response = Metadata::make($response, $file);
355
            }
356
357
            // cache certain responses
358
            if(in_array($type, ['lang', 'meta']))
359
            {
360
                $this->cache[sha1($file)][$type] = $response;
361
            }
362
        }
363
        // request completed successfully but result is empty
364
        elseif($status == 204)
365
        {
366
            $response = null;
367
        }
368
        // retry on request failed with error 500
369
        elseif($status == 500 && $retries[sha1($file)]--)
370
        {
371
            $response = $this->request($type, $file);
372
        }
373
        // other status code is an error
374
        else
375
        {
376
            $this->error($status, $resource);
377
        }
378
379
        return $response;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $response also could return the type Vaites\ApacheTika\Metada...\Metadata\ImageMetadata which is incompatible with the documented return type string.
Loading history...
380
    }
381
382
    /**
383
     * Make a request to Apache Tika Server
384
     *
385
     * @param   array   $options
386
     * @return  array
387
     * @throws  \Exception
388
     */
389
    protected function exec(array $options = [])
390
    {
391
        // cURL init and options
392
        $curl = curl_init();
393
394
        // we avoid curl_setopt_array($curl, $options) because extrange Windows behaviour (issue #8)
395
        foreach($options as $option => $value)
396
        {
397
            curl_setopt($curl, $option, $value);
398
        }
399
400
        // make the request
401
        if(is_null($this->callback))
402
        {
403
            $this->response = curl_exec($curl);
404
        }
405
        else
406
        {
407
            $this->response = '';
408
            curl_exec($curl);
409
        }
410
411
        // exception if cURL fails
412
        if(curl_errno($curl))
413
        {
414
            throw new Exception(curl_error($curl), curl_errno($curl));
415
        }
416
417
        // return the response and the status code
418
        return [trim($this->response), curl_getinfo($curl, CURLINFO_HTTP_CODE)];
419
    }
420
421
    /**
422
     * Throws an exception for an error status code
423
     *
424
     * @codeCoverageIgnore
425
     *
426
     * @param   int       $status
427
     * @param   string    $resource
428
     * @throws  \Exception
429
     */
430
    protected function error($status, $resource)
431
    {
432
        switch($status)
433
        {
434
            //  method not allowed
435
            case 405:
436
                throw new Exception('Method not allowed', 405);
437
                break;
438
439
            //  unsupported media type
440
            case 415:
441
                throw new Exception('Unsupported media type', 415);
442
                break;
443
444
            //  unprocessable entity
445
            case 422:
446
                throw new Exception('Unprocessable document', 422);
447
                break;
448
449
            // server error
450
            case 500:
451
                throw new Exception('Error while processing document', 500);
452
                break;
453
454
            // unexpected
455
            default:
456
                throw new Exception("Unexpected response for /$resource ($status)", 501);
457
        }
458
    }
459
460
    /**
461
     * Get the parameters to make the request
462
     *
463
     * @link    https://wiki.apache.org/tika/TikaJAXRS#Specifying_a_URL_Instead_of_Putting_Bytes
464
     * @param   string  $type
465
     * @param   string  $file
466
     * @return  array
467
     * @throws  \Exception
468
     */
469
    protected function getParameters($type, $file = null)
470
    {
471
        $headers = [];
472
473
        if(!empty($file) && preg_match('/^http/', $file))
474
        {
475
            $headers[] = "fileUrl:$file";
476
        }
477
478
        switch($type)
479
        {
480
            case 'html':
481
                $resource = 'tika';
482
                $headers[] = 'Accept: text/html';
483
                break;
484
485
            case 'lang':
486
                $resource = 'language/stream';
487
                break;
488
489
            case 'mime':
490
                $name = basename($file);
491
                $resource = 'detect/stream';
492
                $headers[] = "Content-Disposition: attachment, filename=$name";
493
                break;
494
495
            case 'meta':
496
                $resource = 'meta';
497
                $headers[] = 'Accept: application/json';
498
                break;
499
500
            case 'text':
501
                $resource = 'tika';
502
                $headers[] = 'Accept: text/plain';
503
                break;
504
505
            case 'text-main':
506
                $resource = 'tika/main';
507
                $headers[] = 'Accept: text/plain';
508
                break;
509
510
            case 'detectors':
511
            case 'parsers':
512
            case 'mime-types':
513
            case 'version':
514
                $resource = $type;
515
                break;
516
517
            default:
518
                throw new Exception("Unknown type $type");
519
        }
520
521
        return [$resource, $headers];
522
    }
523
524
    /**
525
     * Get the cURL options
526
     *
527
     * @param   string  $type
528
     * @param   string  $file
529
     * @return  array
530
     * @throws  \Exception
531
     */
532
    protected function getCurlOptions($type, $file = null)
533
    {
534
        // base options
535
        $options = $this->options;
536
537
        // callback
538
        if(!is_null($this->callback))
539
        {
540
            $callback = $this->callback;
541
542
            $options[CURLOPT_WRITEFUNCTION] = function($handler, $data) use($callback)
543
            {
544
                $this->response .= $data;
545
546
                $callback($data);
547
548
                // safe because cURL must receive the number of *bytes* written
549
                return strlen($data);
550
            };
551
        }
552
553
        // remote file options
554
        if($file && preg_match('/^http/', $file))
555
        {
556
            //
557
        }
558
        // local file options
559
        elseif($file && file_exists($file) && is_readable($file))
560
        {
561
            $options[CURLOPT_INFILE] = fopen($file, 'r');
562
            $options[CURLOPT_INFILESIZE] = filesize($file);
563
        }
564
        // other options for specific requests
565
        elseif(in_array($type,  ['detectors', 'mime-types', 'parsers', 'version']))
566
        {
567
            $options[CURLOPT_PUT] = false;
568
        }
569
        // file not accesible
570
        else
571
        {
572
            throw new Exception("File $file can't be opened");
573
        }
574
575
        return $options;
576
    }
577
}
578