Passed
Push — master ( 7768f9...daf5ec )
by David
01:16
created

Client::isCached()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 2
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace Vaites\ApacheTika;
4
5
use Closure;
6
use Exception;
7
8
use Vaites\ApacheTika\Clients\CLIClient;
9
use Vaites\ApacheTika\Clients\WebClient;
10
11
/**
12
 * Apache Tika client interface
13
 *
14
 * @author  David Martínez <[email protected]>
15
 * @link    http://wiki.apache.org/tika/TikaJAXRS
16
 * @link    https://tika.apache.org/1.10/formats.html
17
 */
18
abstract class Client
19
{
20
    /**
21
     * List of supported Apache Tika versions
22
     *
23
     * @var array
24
     */
25
    protected static $supportedVersions =
26
    [
27
        '1.7', '1.8', '1.9', '1.10', '1.11', '1.12', '1.13', '1.14',
28
        '1.15', '1.16', '1.17', '1.18', '1.19', '1.19.1', '1.20', '1.21'
29
    ];
30
31
    /**
32
     * Verify JAR or server connection on constructor
33
     *
34
     * @var bool
35
     */
36
    protected static $check = true;
37
38
    /**
39
     * Checked flag
40
     *
41
     * @var bool
42
     */
43
    protected static $checked = false;
44
45
    /**
46
     * Response using callbacks
47
     *
48
     * @var string
49
     */
50
    protected $response = null;
51
52
    /**
53
     * Platform (unix or win)
54
     *
55
     * @var string
56
     */
57
    protected $platform = 'unix';
58
59
    /**
60
     * Cached responses to avoid multiple request for the same file.
61
     *
62
     * @var array
63
     */
64
    protected $cache = [];
65
66
    /**
67
     * Callback called on secuential read
68
     *
69
     * @var \Closure
70
     */
71
    protected $callback = null;
72
73
    /**
74
     * Size of chunks for callback
75
     *
76
     * @var int
77
     */
78
    protected $chunkSize = 1048576;
79
80
    /**
81
     * Remote download flag
82
     *
83
     * @var bool
84
     */
85
    protected $downloadRemote = false;
86
87
    /**
88
     * Configure client
89
     */
90
    public function __construct()
91
    {
92
        if(defined('PHP_WINDOWS_VERSION_MAJOR'))
93
        {
94
            $this->platform = 'win';
95
        }
96
    }
97
98
    /**
99
     * Get a class instance throwing an exception if check fails
100
     *
101
     * @param   string  $param1     path or host
102
     * @param   int     $param2     Java binary path or port for web client
103
     * @param   array   $options    options for cURL request
104
     * @return  \Vaites\ApacheTika\Clients\CLIClient|\Vaites\ApacheTika\Clients\WebClient
105
     * @throws  \Exception
106
     */
107
    public static function make($param1 = null, $param2 = null, $options = [])
108
    {
109
        if (preg_match('/\.jar$/', func_get_arg(0)))
110
        {
111
            return new CLIClient($param1, $param2);
112
        }
113
        else
114
        {
115
            return new WebClient($param1, $param2, $options);
116
        }
117
    }
118
119
    /**
120
     * Get a class instance delaying the check
121
     *
122
     * @param   string  $param1     path or host
123
     * @param   int     $param2     Java binary path or port for web client
124
     * @param   array   $options    options for cURL request
125
     * @return  \Vaites\ApacheTika\Clients\CLIClient|\Vaites\ApacheTika\Clients\WebClient
126
     * @throws  \Exception
127
     */
128
    public static function prepare($param1 = null, $param2 = null, $options = [])
129
    {
130
        self::$check = false;
131
132
        return self::make($param1, $param2, $options);
133
    }
134
135
    /**
136
     * Get the callback
137
     *
138
     * @return  \Closure|null
139
     */
140
    public function getCallback()
141
    {
142
        return $this->callback;
143
    }
144
145
    /**
146
     * Set the callback (callable or closure) for call on secuential read
147
     *
148
     * @param   mixed   $callback
149
     * @return  $this
150
     * @throws  \Exception
151
     */
152
    public function setCallback($callback)
153
    {
154
        if($callback instanceof Closure)
155
        {
156
            $this->callback = $callback;
157
        }
158
        elseif(is_callable($callback))
159
        {
160
            $this->callback = function($chunk) use($callback)
161
            {
162
                return call_user_func_array($callback, [$chunk]);
163
            };
164
        }
165
        else
166
        {
167
            throw new Exception('Invalid callback');
168
        }
169
170
        return $this;
171
    }
172
173
    /**
174
     * Get the chunk size
175
     *
176
     * @return  int
177
     */
178
    public function getChunkSize()
179
    {
180
        return $this->chunkSize;
181
    }
182
183
    /**
184
     * Set the chunk size for secuential read
185
     *
186
     * @param   int     $size
187
     * @return  $this
188
     * @throws  \Exception
189
     */
190
    public function setChunkSize($size)
191
    {
192
        if(static::MODE == 'cli' && is_numeric($size))
0 ignored issues
show
Bug introduced by
The constant Vaites\ApacheTika\Client::MODE was not found. Maybe you did not declare it correctly or list all dependencies?
Loading history...
193
        {
194
            $this->chunkSize = (int)$size;
195
        }
196
        elseif(static::MODE == 'web')
197
        {
198
            throw new Exception('Chunk size is not supported on web mode');
199
        }
200
        else
201
        {
202
            throw new Exception("$size is not a valid chunk size");
203
        }
204
205
        return $this;
206
    }
207
208
    /**
209
     * Get the remote download flag
210
     *
211
     * @return  bool
212
     */
213
    public function getDownloadRemote()
214
    {
215
        return $this->downloadRemote;
216
    }
217
218
    /**
219
     * Set the remote download flag
220
     *
221
     * @param   bool    $download
222
     * @return  $this
223
     */
224
    public function setDownloadRemote($download)
225
    {
226
        $this->downloadRemote = (bool) $download;
227
228
        return $this;
229
    }
230
231
    /**
232
     * Gets file metadata
233
     *
234
     * @param   string  $file
235
     * @return  \Vaites\ApacheTika\Metadata\Metadata
236
     * @throws  \Exception
237
     */
238
    public function getMetadata($file)
239
    {
240
        return $this->request('meta', $file);
0 ignored issues
show
Bug Best Practice introduced by
The expression return $this->request('meta', $file) returns the type string which is incompatible with the documented return type Vaites\ApacheTika\Metadata\Metadata.
Loading history...
241
    }
242
243
    /**
244
     * Detect language
245
     *
246
     * @param   string  $file
247
     * @return  string
248
     * @throws  \Exception
249
     */
250
    public function getLanguage($file)
251
    {
252
        return $this->request('lang', $file);
253
    }
254
255
    /**
256
     * Detect MIME type
257
     *
258
     * @param   string  $file
259
     * @return  string
260
     * @throws \Exception
261
     */
262
    public function getMIME($file)
263
    {
264
        return $this->request('mime', $file);
265
    }
266
267
    /**
268
     * Extracts HTML
269
     *
270
     * @param   string  $file
271
     * @param   mixed   $callback
272
     * @return  string
273
     * @throws  \Exception
274
     */
275
    public function getHTML($file, $callback = null)
276
    {
277
        if(!is_null($callback))
278
        {
279
            $this->setCallback($callback);
280
        }
281
282
        return $this->request('html', $file);
283
    }
284
285
    /**
286
     * Extracts text
287
     *
288
     * @param   string  $file
289
     * @param   mixed   $callback
290
     * @return  string
291
     * @throws  \Exception
292
     */
293
    public function getText($file, $callback = null)
294
    {
295
        if(!is_null($callback))
296
        {
297
            $this->setCallback($callback);
298
        }
299
300
        return $this->request('text', $file);
301
    }
302
303
    /**
304
     * Extracts main text
305
     *
306
     * @param   string  $file
307
     * @param   mixed   $callback
308
     * @return  string
309
     * @throws  \Exception
310
     */
311
    public function getMainText($file, $callback = null)
312
    {
313
        if(!is_null($callback))
314
        {
315
            $this->setCallback($callback);
316
        }
317
318
        return $this->request('text-main', $file);
319
    }
320
321
    /**
322
     * Returns the supported MIME types
323
     *
324
     * @return  string
325
     * @throws  \Exception
326
     */
327
    public function getSupportedMIMETypes()
328
    {
329
        return $this->request('mime-types');
0 ignored issues
show
Bug introduced by
The call to Vaites\ApacheTika\Client::request() has too few arguments starting with file. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

329
        return $this->/** @scrutinizer ignore-call */ request('mime-types');

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
330
    }
331
332
    /**
333
     * Returns the available detectors
334
     *
335
     * @return  string
336
     * @throws  \Exception
337
     */
338
    public function getAvailableDetectors()
339
    {
340
        return $this->request('detectors');
0 ignored issues
show
Bug introduced by
The call to Vaites\ApacheTika\Client::request() has too few arguments starting with file. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

340
        return $this->/** @scrutinizer ignore-call */ request('detectors');

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
341
    }
342
343
    /**
344
     * Returns the available parsers
345
     *
346
     * @return  string
347
     * @throws  \Exception
348
     */
349
    public function getAvailableParsers()
350
    {
351
        return $this->request('parsers');
0 ignored issues
show
Bug introduced by
The call to Vaites\ApacheTika\Client::request() has too few arguments starting with file. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

351
        return $this->/** @scrutinizer ignore-call */ request('parsers');

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
352
    }
353
354
    /**
355
     * Returns current Tika version
356
     *
357
     * @return  string
358
     * @throws  \Exception
359
     */
360
    public function getVersion()
361
    {
362
        return $this->request('version');
0 ignored issues
show
Bug introduced by
The call to Vaites\ApacheTika\Client::request() has too few arguments starting with file. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

362
        return $this->/** @scrutinizer ignore-call */ request('version');

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
363
    }
364
365
    /**
366
     * Return the list of Apache Tika supported versions
367
     *
368
     * @return array
369
     */
370
    public static function getSupportedVersions()
371
    {
372
        return self::$supportedVersions;
373
    }
374
375
    /**
376
     * Sets the checked flag
377
     *
378
     * @param $checked
379
     */
380
    public static function setChecked($checked)
381
    {
382
        self::$checked = (bool) $checked;
383
    }
384
385
    /**
386
     * Checks if instance is checked
387
     *
388
     * @return  bool
389
     */
390
    public static function isChecked()
391
    {
392
        return self::$checked;
393
    }
394
395
    /**
396
     * Check if a response is cached
397
     *
398
     * @param   string  $type
399
     * @param   string  $file
400
     * @return  mixed
401
     */
402
    protected function isCached($type, $file)
403
    {
404
        return isset($this->cache[sha1($file)][$type]);
405
    }
406
407
    /**
408
     * Get a cached response
409
     *
410
     * @param   string  $type
411
     * @param   string  $file
412
     * @return  mixed
413
     */
414
    protected function getCachedResponse($type, $file)
415
    {
416
        return isset($this->cache[sha1($file)][$type]) ? $this->cache[sha1($file)][$type] : null;
417
    }
418
419
    /**
420
     * Check if a request type must be cached
421
     *
422
     * @param   string  $type
423
     * @return  bool
424
     */
425
    protected function isCacheable($type)
426
    {
427
        return in_array($type, ['lang', 'meta']);
428
    }
429
430
    /**
431
     * Caches a response
432
     *
433
     * @param   string  $type
434
     * @param   mixed   $response
435
     * @param   string  $file
436
     * @return  bool
437
     */
438
    protected function cacheResponse($type, $response, $file)
439
    {
440
        $this->cache[sha1($file)][$type] = $response;
441
442
        return true;
443
    }
444
445
    /**
446
     * Checks if a specific version is supported
447
     *
448
     * @param   string  $version
449
     * @return  bool
450
     */
451
    public static function isVersionSupported($version)
452
    {
453
        return in_array($version, self::getSupportedVersions());
454
    }
455
456
    /**
457
     * Check the request before executing
458
     *
459
     * @param   string  $type
460
     * @param   string  $file
461
     * @return  string
462
     * @throws  \Exception
463
     */
464
    public function checkRequest($type, $file)
465
    {
466
        // no checks for getters
467
        if(in_array($type, ['detectors', 'mime-types', 'parsers', 'version']))
468
        {
469
            //
470
        }
471
        // invalid local file
472
        elseif(!preg_match('/^http/', $file) && !file_exists($file))
473
        {
474
            throw new Exception("File $file can't be opened");
475
        }
476
        // invalid remote file
477
        elseif(preg_match('/^http/', $file) && !preg_match('/200/', get_headers($file)[0]))
478
        {
479
            throw new Exception("File $file can't be opened", 2);
480
        }
481
        // download remote file if required only for integrated downloader
482
        elseif(preg_match('/^http/', $file) && $this->downloadRemote)
483
        {
484
            $file = $this->downloadFile($file);
485
        }
486
487
        return $file;
488
    }
489
490
    /**
491
     * Download file to a temporary folder
492
     *
493
     * @link    https://wiki.apache.org/tika/TikaJAXRS#Specifying_a_URL_Instead_of_Putting_Bytes
494
     * @param   string  $file
495
     * @return  string
496
     * @throws  \Exception
497
     */
498
    protected function downloadFile($file)
499
    {
500
        $dest = tempnam(sys_get_temp_dir(), 'TIKA');
501
502
        $fp = fopen($dest, 'w+');
503
504
        if($fp === false)
505
        {
506
            throw new Exception("$dest can't be opened");
507
        }
508
509
        $ch = curl_init($file);
510
        curl_setopt($ch, CURLOPT_FILE, $fp);
0 ignored issues
show
Bug introduced by
It seems like $ch can also be of type false; however, parameter $ch of curl_setopt() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

510
        curl_setopt(/** @scrutinizer ignore-type */ $ch, CURLOPT_FILE, $fp);
Loading history...
511
        curl_setopt($ch, CURLOPT_TIMEOUT, 5);
512
        curl_exec($ch);
0 ignored issues
show
Bug introduced by
It seems like $ch can also be of type false; however, parameter $ch of curl_exec() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

512
        curl_exec(/** @scrutinizer ignore-type */ $ch);
Loading history...
513
514
        if(curl_errno($ch))
0 ignored issues
show
Bug introduced by
It seems like $ch can also be of type false; however, parameter $ch of curl_errno() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

514
        if(curl_errno(/** @scrutinizer ignore-type */ $ch))
Loading history...
515
        {
516
            throw new Exception(curl_error($ch));
0 ignored issues
show
Bug introduced by
It seems like $ch can also be of type false; however, parameter $ch of curl_error() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

516
            throw new Exception(curl_error(/** @scrutinizer ignore-type */ $ch));
Loading history...
517
        }
518
519
        $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
0 ignored issues
show
Bug introduced by
It seems like $ch can also be of type false; however, parameter $ch of curl_getinfo() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

519
        $code = curl_getinfo(/** @scrutinizer ignore-type */ $ch, CURLINFO_HTTP_CODE);
Loading history...
520
521
        curl_close($ch);
0 ignored issues
show
Bug introduced by
It seems like $ch can also be of type false; however, parameter $ch of curl_close() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

521
        curl_close(/** @scrutinizer ignore-type */ $ch);
Loading history...
522
523
        if($code != 200)
524
        {
525
            throw new Exception("$file can't be downloaded", $code);
526
        }
527
528
        return $dest;
529
    }
530
531
    /**
532
     * Check Java binary, JAR path or server connection
533
     *
534
     * @return  void
535
     */
536
    abstract public function check();
537
538
    /**
539
     * Configure and make a request and return its results.
540
     *
541
     * @param   string  $type
542
     * @param   string  $file
543
     * @return  string
544
     * @throws  \Exception
545
     */
546
    abstract public function request($type, $file);
547
}
548