Passed
Branch master (ad025b)
by David
01:24
created

WebClient   F

Complexity

Total Complexity 77

Size/Duplication

Total Lines 580
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 167
dl 0
loc 580
rs 2.24
c 0
b 0
f 0
wmc 77

21 Methods

Rating   Name   Duplication   Size   Complexity  
A getOption() 0 3 2
A getUrl() 0 3 2
B request() 0 70 11
A getRetries() 0 3 1
A getOptions() 0 3 1
A setUrl() 0 12 2
A check() 0 8 2
A setRetries() 0 5 1
B __construct() 0 28 7
A setOption() 0 10 2
A setTimeout() 0 5 1
A getPort() 0 3 1
A getTimeout() 0 3 1
A setHost() 0 5 1
A setOptions() 0 8 2
A setPort() 0 5 1
A getHost() 0 3 1
C getParameters() 0 56 16
B exec() 0 39 7
B getCurlOptions() 0 44 8
B error() 0 35 7

How to fix   Complexity   

Complex Class

Complex classes like WebClient often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use WebClient, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
namespace Vaites\ApacheTika\Clients;
4
5
use Exception;
6
7
use Vaites\ApacheTika\Client;
8
9
/**
10
 * Apache Tika web client
11
 *
12
 * @author  David Martínez <[email protected]>
13
 * @link    http://wiki.apache.org/tika/TikaJAXRS
14
 */
15
class WebClient extends Client
16
{
17
    const MODE = 'web';
18
19
    /**
20
     * Cached responses to avoid multiple request for the same file
21
     *
22
     * @var array
23
     */
24
    protected $cache = [];
25
26
    /**
27
     * Apache Tika server host
28
     *
29
     * @var string
30
     */
31
    protected $host = null;
32
33
    /**
34
     * Apache Tika server port
35
     *
36
     * @var int
37
     */
38
    protected $port = null;
39
40
    /**
41
     * Number of retries on server error
42
     *
43
     * @var int
44
     */
45
    protected $retries = 3;
46
47
    /**
48
     * Default cURL options
49
     *
50
     * @var array
51
     */
52
    protected $options =
53
    [
54
        CURLINFO_HEADER_OUT    => true,
55
        CURLOPT_HTTPHEADER     => [],
56
        CURLOPT_PUT            => true,
57
        CURLOPT_RETURNTRANSFER => true,
58
        CURLOPT_TIMEOUT        => 5
59
    ];
60
61
    /**
62
     * Configure class and test if server is running
63
     *
64
     * @param   string  $host
65
     * @param   int     $port
66
     * @param   array   $options
67
     * @param   bool    $check
68
     * @throws  \Exception
69
     */
70
    public function __construct($host = null, $port = null, $options = [], $check = true)
71
    {
72
        parent::__construct();
73
74
        if(is_string($host) && filter_var($host, FILTER_VALIDATE_URL))
75
        {
76
            $this->setUrl($host);
77
        }
78
        elseif($host)
79
        {
80
            $this->setHost($host);
81
        }
82
83
        if(is_numeric($port))
84
        {
85
            $this->setPort($port);
86
        }
87
88
        if(!empty($options))
89
        {
90
            $this->setOptions($options);
91
        }
92
93
        $this->setDownloadRemote(true);
94
95
        if($check === true)
96
        {
97
            $this->check();
98
        }
99
    }
100
101
    /**
102
     * Get the base URL
103
     *
104
     * @return string
105
     */
106
    public function getUrl()
107
    {
108
        return sprintf('http://%s:%d', $this->host, $this->port ?: 9998);
109
    }
110
111
    /**
112
     * Set the host and port using an URL
113
     *
114
     * @param   string  $url
115
     * @return $this
116
     */
117
    public function setUrl($url)
118
    {
119
        $url = parse_url($url);
120
121
        $this->setHost($url['host']);
122
123
        if(isset($url['port']))
124
        {
125
            $this->setPort($url['port']);
126
        }
127
128
        return $this;
129
    }
130
131
    /**
132
     * Get the host
133
     *
134
     * @return  null|string
135
     */
136
    public function getHost()
137
    {
138
        return $this->host;
139
    }
140
141
    /**
142
     * Set the host
143
     *
144
     * @param   string  $host
145
     * @return  $this
146
     */
147
    public function setHost($host)
148
    {
149
        $this->host = $host;
150
151
        return $this;
152
    }
153
154
    /**
155
     * Get the port
156
     *
157
     * @return  null|int
158
     */
159
    public function getPort()
160
    {
161
        return $this->port;
162
    }
163
164
    /**
165
     * Set the port
166
     *
167
     * @param   int     $port
168
     * @return  $this
169
     */
170
    public function setPort($port)
171
    {
172
        $this->port = $port;
173
174
        return $this;
175
    }
176
177
    /**
178
     * Get the number of retries
179
     *
180
     * @return  int
181
     */
182
    public function getRetries()
183
    {
184
        return $this->retries;
185
    }
186
187
    /**
188
     * Set the number of retries
189
     *
190
     * @param   int     $retries
191
     * @return  $this
192
     */
193
    public function setRetries($retries)
194
    {
195
        $this->retries = $retries;
196
197
        return $this;
198
    }
199
200
    /**
201
     * Get all the options
202
     *
203
     * @return  null|array
204
     */
205
    public function getOptions()
206
    {
207
        return $this->options;
208
    }
209
210
    /**
211
     * Get an specified option
212
     *
213
     * @param   string  $key
214
     * @return  mixed
215
     */
216
    public function getOption($key)
217
    {
218
        return isset($this->options[$key]) ? $this->options[$key] : null;
219
    }
220
221
    /**
222
     * Set a cURL option to be set with curl_setopt()
223
     *
224
     * @link    http://php.net/manual/en/curl.constants.php
225
     * @link    http://php.net/manual/en/function.curl-setopt.php
226
     * @param   string  $key
227
     * @param   mixed   $value
228
     * @return  $this
229
     * @throws  \Exception
230
     */
231
    public function setOption($key, $value)
232
    {
233
        if(in_array($key, [CURLINFO_HEADER_OUT, CURLOPT_PUT, CURLOPT_RETURNTRANSFER]))
234
        {
235
            throw new Exception("Value for cURL option $key cannot be modified", 3);
236
        }
237
238
        $this->options[$key] = $value;
239
240
        return $this;
241
    }
242
243
    /**
244
     * Set the cURL options
245
     *
246
     * @param   array   $options
247
     * @return  $this
248
     * @throws  \Exception
249
     */
250
    public function setOptions($options)
251
    {
252
        foreach($options as $key => $value)
253
        {
254
            $this->setOption($key, $value);
255
        }
256
257
        return $this;
258
    }
259
260
    /**
261
     * Get the timeout value for cURL
262
     *
263
     * @return  int
264
     */
265
    public function getTimeout()
266
    {
267
        return $this->getOption(CURLOPT_TIMEOUT);
268
    }
269
270
    /**
271
     * Set the timeout value for cURL
272
     *
273
     * @param   int     $value
274
     * @return  $this
275
     * @throws  \Exception
276
     */
277
    public function setTimeout($value)
278
    {
279
        $this->setOption(CURLOPT_TIMEOUT, (int) $value);
280
281
        return $this;
282
    }
283
284
    /**
285
     * Check if server is running
286
     *
287
     * @throws \Exception
288
     */
289
    public function check()
290
    {
291
        if($this->isChecked() === false)
292
        {
293
            $this->setChecked(true);
294
295
            // throws an exception if server is unreachable or can't connect
296
            $this->request('version');
297
        }
298
    }
299
300
    /**
301
     * Configure, make a request and return its results
302
     *
303
     * @param   string  $type
304
     * @param   string  $file
305
     * @return  string
306
     * @throws  \Exception
307
     */
308
    public function request($type, $file = null)
309
    {
310
        static $retries = [];
311
312
        // check if not checked
313
        $this->check();
314
315
        // check if is cached
316
        if($this->isCached($type, $file))
317
        {
318
            return $this->getCachedResponse($type, $file);
319
        }
320
        elseif(!isset($retries[sha1($file)]))
321
        {
322
            $retries[sha1($file)] = $this->retries;
323
        }
324
325
        // parameters for cURL request
326
        list($resource, $headers) = $this->getParameters($type, $file);
327
328
        // check the request
329
        $file = parent::checkRequest($type, $file);
330
331
        // cURL options
332
        $options = $this->getCurlOptions($type, $file);
333
334
        // sets headers
335
        foreach($headers as $header)
336
        {
337
            $options[CURLOPT_HTTPHEADER][] = $header;
338
        }
339
340
        // cURL init and options
341
        $options[CURLOPT_URL] = $this->getUrl() . "/$resource";
342
343
        // get the response and the HTTP status code
344
        list($response, $status) = $this->exec($options);
345
346
        // reduce memory usage closing cURL resource
347
        if(isset($options[CURLOPT_INFILE]) && is_resource($options[CURLOPT_INFILE]))
348
        {
349
            fclose($options[CURLOPT_INFILE]);
350
        }
351
352
        // request completed successfully
353
        if($status == 200)
354
        {
355
            // cache certain responses
356
            if($this->isCacheable($type))
357
            {
358
                $this->cacheResponse($type, $response, $file);
359
            }
360
        }
361
        // request completed successfully but result is empty
362
        elseif($status == 204)
363
        {
364
            $response = null;
365
        }
366
        // retry on request failed with error 500
367
        elseif($status == 500 && $retries[sha1($file)]--)
368
        {
369
            $response = $this->request($type, $file);
370
        }
371
        // other status code is an error
372
        else
373
        {
374
            $this->error($status, $resource, $file);
375
        }
376
377
        return $response;
378
    }
379
380
    /**
381
     * Make a request to Apache Tika Server
382
     *
383
     * @param   array   $options
384
     * @return  array
385
     * @throws  \Exception
386
     */
387
    protected function exec(array $options = [])
388
    {
389
        // cURL init and options
390
        $curl = curl_init();
391
392
        // add options only if cURL init doesn't fails
393
        if(is_resource($curl))
394
        {
395
            // we avoid curl_setopt_array($curl, $options) because strange Windows behaviour (issue #8)
396
            foreach($options as $option => $value)
397
            {
398
                curl_setopt($curl, $option, $value);
399
            }
400
401
            // make the request directly
402
            if(is_null($this->callback))
403
            {
404
                $this->response = curl_exec($curl) ?: '';
0 ignored issues
show
Documentation Bug introduced by
It seems like curl_exec($curl) ?: '' can also be of type true. However, the property $response is declared as type string. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
405
            }
406
            // with a callback, the response is appended on each block inside the callback
407
            else
408
            {
409
                $this->response = '';
410
                curl_exec($curl);
411
            }
412
        }
413
414
        // exception if cURL fails
415
        if($curl === false)
416
        {
417
            throw new Exception('Unexpected error');
418
        }
419
        elseif(curl_errno($curl))
420
        {
421
            throw new Exception(curl_error($curl), curl_errno($curl));
422
        }
423
424
        // return the response and the status code
425
        return [trim($this->response), curl_getinfo($curl, CURLINFO_HTTP_CODE)];
426
    }
427
428
    /**
429
     * Throws an exception for an error status code
430
     *
431
     * @codeCoverageIgnore
432
     *
433
     * @param   int       $status
434
     * @param   string    $resource
435
     * @param   string    $file
436
     * @throws  \Exception
437
     */
438
    protected function error($status, $resource, $file = null)
439
    {
440
        switch($status)
441
        {
442
            //  method not allowed
443
            case 405:
444
                throw new Exception('Method not allowed', 405);
445
                break;
446
447
            //  unsupported media type
448
            case 415:
449
                throw new Exception('Unsupported media type', 415);
450
                break;
451
452
            //  unprocessable entity
453
            case 422:
454
                $message = 'Unprocessable document';
455
456
                // using remote files require Tika server to be launched with specific options
457
                if($this->downloadRemote == false && preg_match('/^http/', $file))
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like you are loosely comparing two booleans. Considering using the strict comparison === instead.

When comparing two booleans, it is generally considered safer to use the strict comparison operator.

Loading history...
458
                {
459
                    $message .= ' (is server launched using "-enableUnsecureFeatures -enableFileUrl" arguments?)';
460
                }
461
462
                throw new Exception($message, 422);
463
                break;
464
465
            // server error
466
            case 500:
467
                throw new Exception('Error while processing document', 500);
468
                break;
469
470
            // unexpected
471
            default:
472
                throw new Exception("Unexpected response for /$resource ($status)", 501);
473
        }
474
    }
475
476
    /**
477
     * Get the parameters to make the request
478
     *
479
     * @link    https://wiki.apache.org/tika/TikaJAXRS#Specifying_a_URL_Instead_of_Putting_Bytes
480
     * @param   string  $type
481
     * @param   string  $file
482
     * @return  array
483
     * @throws  \Exception
484
     */
485
    protected function getParameters($type, $file = null)
486
    {
487
        $headers = [];
488
489
        if(!empty($file) && preg_match('/^http/', $file))
490
        {
491
            $headers[] = "fileUrl:$file";
492
        }
493
494
        switch($type)
495
        {
496
            case 'html':
497
                $resource = 'tika';
498
                $headers[] = 'Accept: text/html';
499
                break;
500
501
            case 'lang':
502
                $resource = 'language/stream';
503
                break;
504
505
            case 'mime':
506
                $name = basename($file);
507
                $resource = 'detect/stream';
508
                $headers[] = "Content-Disposition: attachment, filename=$name";
509
                break;
510
511
            case 'meta':
512
            case 'rmeta/html':
513
            case 'rmeta/ignore':
514
            case 'rmeta/text':
515
                $resource = $type;
516
                $headers[] = 'Accept: application/json';
517
                break;
518
519
            case 'text':
520
                $resource = 'tika';
521
                $headers[] = 'Accept: text/plain';
522
                break;
523
524
            case 'text-main':
525
                $resource = 'tika/main';
526
                $headers[] = 'Accept: text/plain';
527
                break;
528
529
            case 'detectors':
530
            case 'parsers':
531
            case 'mime-types':
532
            case 'version':
533
                $resource = $type;
534
                break;
535
536
            default:
537
                throw new Exception("Unknown type $type");
538
        }
539
540
        return [$resource, $headers];
541
    }
542
543
    /**
544
     * Get the cURL options
545
     *
546
     * @param   string  $type
547
     * @param   string  $file
548
     * @return  array
549
     * @throws  \Exception
550
     */
551
    protected function getCurlOptions($type, $file = null)
552
    {
553
        // base options
554
        $options = $this->options;
555
556
        // callback
557
        if(!is_null($this->callback))
558
        {
559
            $callback = $this->callback;
560
561
            $options[CURLOPT_WRITEFUNCTION] = function($handler, $data) use($callback)
562
            {
563
                $this->response .= $data;
564
565
                $callback($data);
566
567
                // safe because cURL must receive the number of *bytes* written
568
                return strlen($data);
569
            };
570
        }
571
572
        // remote file options
573
        if($file && preg_match('/^http/', $file))
574
        {
575
            //
576
        }
577
        // local file options
578
        elseif($file && file_exists($file) && is_readable($file))
579
        {
580
            $options[CURLOPT_INFILE] = fopen($file, 'r');
581
            $options[CURLOPT_INFILESIZE] = filesize($file);
582
        }
583
        // other options for specific requests
584
        elseif(in_array($type, ['detectors', 'mime-types', 'parsers', 'version']))
585
        {
586
            $options[CURLOPT_PUT] = false;
587
        }
588
        // file not accesible
589
        else
590
        {
591
            throw new Exception("File $file can't be opened");
592
        }
593
594
        return $options;
595
    }
596
}
597