Passed
Push — master ( dcc276...a6d10d )
by David
03:59
created

Client::checkRequest()   C

Complexity

Conditions 12
Paths 6

Size

Total Lines 26
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 10
c 1
b 0
f 0
dl 0
loc 26
rs 6.9666
cc 12
nc 6
nop 2

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace Vaites\ApacheTika;
4
5
use Closure;
6
use Exception;
7
use stdClass;
8
9
use Vaites\ApacheTika\Clients\CLIClient;
10
use Vaites\ApacheTika\Clients\WebClient;
11
use Vaites\ApacheTika\Metadata\Metadata;
12
use Vaites\ApacheTika\Metadata\MetadataInterface;
13
14
/**
15
 * Apache Tika client interface
16
 *
17
 * @author  David Martínez <[email protected]>
18
 * @link    https://tika.apache.org/1.24/formats.html
19
 */
20
abstract class Client
21
{
22
    protected const MODE = null;
23
24
    /**
25
     * Checked flag
26
     *
27
     * @var bool
28
     */
29
    protected $checked = false;
30
31
    /**
32
     * Response using callbacks
33
     *
34
     * @var string
35
     */
36
    protected $response = null;
37
38
    /**
39
     * Platform (unix or win)
40
     *
41
     * @var string
42
     */
43
    protected $platform = null;
44
45
    /**
46
     * Cached responses to avoid multiple request for the same file.
47
     *
48
     * @var array
49
     */
50
    protected $cache = [];
51
52
    /**
53
     * Text encoding
54
     *
55
     * @var string|null
56
     */
57
    protected $encoding = null;
58
59
    /**
60
     * Callback called on secuential read
61
     *
62
     * @var callable|null
63
     */
64
    protected $callback = null;
65
66
    /**
67
     * Enable or disable appending when using callback
68
     *
69
     * @var bool
70
     */
71
    protected $callbackAppend = true;
72
73
    /**
74
     * Size of chunks for callback
75
     *
76
     * @var int
77
     */
78
    protected $chunkSize = 1048576;
79
80
    /**
81
     * Remote download flag
82
     *
83
     * @var bool
84
     */
85
    protected $downloadRemote = false;
86
87
    /**
88
     * Configure client
89
     */
90
    public function __construct()
91
    {
92
        $this->platform = defined('PHP_WINDOWS_VERSION_MAJOR') ? 'win' : 'unix';
93
    }
94
95
    /**
96
     * Get a class instance throwing an exception if check fails
97
     *
98
     * @param string|null     $param1   path or host
99
     * @param string|int|null $param2   Java binary path or port for web client
100
     * @param array           $options  options for cURL request
101
     * @param bool            $check    check JAR file or server connection
102
     * @return \Vaites\ApacheTika\Clients\CLIClient|\Vaites\ApacheTika\Clients\WebClient
103
     * @throws \Exception
104
     */
105
    public static function make(string $param1 = null, $param2 = null, array $options = [], bool $check = true): Client
106
    {
107
        if(preg_match('/\.jar$/', func_get_arg(0)))
108
        {
109
            $path = $param1 ? (string) $param1 : null;
110
            $java = $param2 ? (string) $param2 : null;
111
112
            return new CLIClient($path, $java, $check);
113
        }
114
        else
115
        {
116
            $host = $param1 ? (string) $param1 : null;
117
            $port = $param2 ? (int) $param2 : null;
118
119
            return new WebClient($host, $port, $options, $check);
120
        }
121
    }
122
123
    /**
124
     * Get a class instance delaying the check
125
     *
126
     * @param string|null $param1 path or host
127
     * @param int|null    $param2 Java binary path or port for web client
128
     * @param array       $options options for cURL request
129
     * @return \Vaites\ApacheTika\Clients\CLIClient|\Vaites\ApacheTika\Clients\WebClient
130
     * @throws \Exception
131
     */
132
    public static function prepare($param1 = null, $param2 = null, $options = []): Client
133
    {
134
        return self::make($param1, $param2, $options, false);
135
    }
136
137
    /**
138
     * Get the encoding
139
     */
140
    public function getEncoding(): ?string
141
    {
142
        return $this->encoding;
143
    }
144
145
    /**
146
     * Set the encoding
147
     *
148
     * @throws \Exception
149
     */
150
    public function setEncoding(string $encoding): self
151
    {
152
        if(!empty($encoding))
153
        {
154
            $this->encoding = $encoding;
155
        }
156
        else
157
        {
158
            throw new Exception('Invalid encoding');
159
        }
160
161
        return $this;
162
    }
163
164
    /**
165
     * Get the callback
166
     */
167
    public function getCallback(): ?callable
168
    {
169
        return $this->callback;
170
    }
171
172
    /**
173
     * Set the callback (callable or closure) for call on secuential read
174
     *
175
     * @throws \Exception
176
     */
177
    public function setCallback(callable $callback, bool $append = true): self
178
    {
179
        if($callback instanceof Closure || is_array($callback))
180
        {
181
            $this->callbackAppend = (bool) $append;
182
            $this->callback = $callback;
183
        }
184
        elseif(is_string($callback))
185
        {
186
            $this->callbackAppend = (bool) $append;
187
            $this->callback = function($chunk) use ($callback)
188
            {
189
                return call_user_func_array($callback, [$chunk]);
190
            };
191
        }
192
        else
193
        {
194
            throw new Exception('Invalid callback');
195
        }
196
197
        return $this;
198
    }
199
200
    /**
201
     * Get the chunk size
202
     */
203
    public function getChunkSize(): int
204
    {
205
        return $this->chunkSize;
206
    }
207
208
    /**
209
     * Set the chunk size for secuential read
210
     *
211
     * @throws \Exception
212
     */
213
    public function setChunkSize(int $size): self
214
    {
215
        if(static::MODE == 'cli')
0 ignored issues
show
introduced by
The condition static::MODE == 'cli' is always false.
Loading history...
216
        {
217
            $this->chunkSize = $size;
218
        }
219
        else
220
        {
221
            throw new Exception('Chunk size is not supported on web mode');
222
        }
223
224
        return $this;
225
    }
226
227
    /**
228
     * Get the remote download flag
229
     */
230
    public function getDownloadRemote(): bool
231
    {
232
        return $this->downloadRemote;
233
    }
234
235
    /**
236
     * Set the remote download flag
237
     */
238
    public function setDownloadRemote(bool $download): self
239
    {
240
        $this->downloadRemote = (bool) $download;
241
242
        return $this;
243
    }
244
245
    /**
246
     * Gets file metadata
247
     *
248
     * @throws \Exception
249
     */
250
    public function getMetadata(string $file): MetadataInterface
251
    {
252
        $response = $this->parseJsonResponse($this->request('meta', $file));
253
254
        if($response instanceof stdClass === false)
255
        {
256
            throw new Exception("Unexpected metadata response for $file");
257
        }
258
259
        return Metadata::make($response, $file);
260
    }
261
262
    /**
263
     * Gets recursive file metadata where the returned array indexes are the file name.
264
     *
265
     * Example: for a sample.zip with an example.doc file, the return array looks like if be defined as:
266
     *
267
     *  [
268
     *      'sample.zip' => new Metadata()
269
     *      'sample.zip/example.doc' => new DocumentMetadata()
270
     *  ]
271
     *
272
     * @link https://cwiki.apache.org/confluence/display/TIKA/TikaServer#TikaServer-RecursiveMetadataandContent
273
     * @throws \Exception
274
     */
275
    public function getRecursiveMetadata(string $file, ?string $format = 'ignore'): array
276
    {
277
        if(in_array($format, ['text', 'html', 'ignore']) === false)
278
        {
279
            throw new Exception("Unknown recursive type (must be text, html, ignore or null)");
280
        }
281
282
        $response = $this->parseJsonResponse($this->request("rmeta/$format", $file));
283
284
        if(is_array($response) === false)
285
        {
286
            throw new Exception("Unexpected metadata response for $file");
287
        }
288
289
        $metadata = [];
290
291
        foreach($response as $item)
292
        {
293
            $name = basename($file);
294
            if(isset($item->{'X-TIKA:embedded_resource_path'}))
295
            {
296
                $name .= $item->{'X-TIKA:embedded_resource_path'};
297
            }
298
299
            $metadata[$name] = Metadata::make($item, $file);
300
        }
301
302
        return $metadata;
303
    }
304
305
    /**
306
     * Detect language
307
     *
308
     * @throws \Exception
309
     */
310
    public function getLanguage(string $file): string
311
    {
312
        return $this->request('lang', $file);
313
    }
314
315
    /**
316
     * Detect MIME type
317
     *
318
     * @throws \Exception
319
     */
320
    public function getMIME(string $file): string
321
    {
322
        return $this->request('mime', $file);
323
    }
324
325
    /**
326
     * Extracts HTML
327
     *
328
     * @throws \Exception
329
     */
330
    public function getHTML(string $file, callable $callback = null, bool $append = true): string
331
    {
332
        if(!is_null($callback))
333
        {
334
            $this->setCallback($callback, $append);
335
        }
336
337
        return $this->request('html', $file);
338
    }
339
340
    /**
341
     * Extracts XHTML
342
     *
343
     * @throws \Exception
344
     */
345
    public function getXHTML(string $file, callable $callback = null, bool $append = true): string
346
    {
347
        if(!is_null($callback))
348
        {
349
            $this->setCallback($callback, $append);
350
        }
351
352
        return $this->request('xhtml', $file);
353
    }
354
355
    /**
356
     * Extracts text
357
     *
358
     * @throws \Exception
359
     */
360
    public function getText(string $file, callable $callback = null, bool $append = true): string
361
    {
362
        if(!is_null($callback))
363
        {
364
            $this->setCallback($callback, $append);
365
        }
366
367
        return $this->request('text', $file);
368
    }
369
370
    /**
371
     * Extracts main text
372
     *
373
     * @throws \Exception
374
     */
375
    public function getMainText(string $file, callable $callback = null, bool $append = true): string
376
    {
377
        if(!is_null($callback))
378
        {
379
            $this->setCallback($callback, $append);
380
        }
381
382
        return $this->request('text-main', $file);
383
    }
384
385
    /**
386
     * Returns current Tika version
387
     *
388
     * @throws \Exception
389
     */
390
    public function getVersion(): string
391
    {
392
        return $this->request('version');
393
    }
394
395
    /**
396
     * Return the list of Apache Tika supported versions
397
     *
398
     * @throws \Exception
399
     */
400
    public function getSupportedVersions(): array
401
    {
402
        static $versions = null;
403
404
        if(is_null($versions))
405
        {
406
            $composer = file_get_contents(dirname(__DIR__) . '/composer.json');
407
408
            if($composer === false)
409
            {
410
                throw new Exception("An error ocurred trying to read package's composer.json file");
411
            }
412
413
            $versions = json_decode($composer, true)['extra']['supported-versions'] ?? null;
414
415
            if(empty($versions))
416
            {
417
                throw new Exception("An error ocurred trying to read package's composer.json file");
418
            }
419
        }
420
421
        return $versions;
422
    }
423
424
    /**
425
     * Sets the checked flag
426
     */
427
    public function setChecked(bool $checked): self
428
    {
429
        $this->checked = (bool) $checked;
430
431
        return $this;
432
    }
433
434
    /**
435
     * Checks if instance is checked
436
     */
437
    public function isChecked(): bool
438
    {
439
        return $this->checked;
440
    }
441
442
    /**
443
     * Check if a response is cached
444
     */
445
    protected function isCached(string $type, string $file): bool
446
    {
447
        return isset($this->cache[sha1($file)][$type]);
448
    }
449
450
    /**
451
     * Get a cached response
452
     *
453
     * @return mixed
454
     */
455
    protected function getCachedResponse(string $type, string $file)
456
    {
457
        return $this->cache[sha1($file)][$type] ?? null;
458
    }
459
460
    /**
461
     * Check if a request type must be cached
462
     */
463
    protected function isCacheable(string $type): bool
464
    {
465
        return in_array($type, ['lang', 'meta']);
466
    }
467
468
    /**
469
     * Caches a response
470
     *
471
     * @param mixed $response
472
     */
473
    protected function cacheResponse(string $type, $response, string $file): bool
474
    {
475
        $this->cache[sha1($file)][$type] = $response;
476
477
        return true;
478
    }
479
480
    /**
481
     * Checks if a specific version is supported
482
     */
483
    public function isVersionSupported(string $version): bool
484
    {
485
        return in_array($version, $this->getSupportedVersions());
486
    }
487
488
    /**
489
     * Check if a mime type is supported
490
     *
491
     * @param string $mime
492
     * @return bool
493
     * @throws \Exception
494
     */
495
    public function isMIMETypeSupported(string $mime): bool
496
    {
497
        return array_key_exists($mime, $this->getSupportedMIMETypes());
498
    }
499
500
    /**
501
     * Check the request before executing
502
     *
503
     * @throws \Exception
504
     */
505
    public function checkRequest(string $type, string $file = null): ?string
506
    {
507
        // no checks for getters
508
        if(in_array($type, ['detectors', 'mime-types', 'parsers', 'version']))
509
        {
510
            //
511
        } // invalid local file
512
        elseif($file !== null && !preg_match('/^http/', $file) && !file_exists($file))
513
        {
514
            throw new Exception("File $file can't be opened");
515
        } // invalid remote file
516
        elseif($file !== null && preg_match('/^http/', $file))
517
        {
518
            $headers = get_headers($file);
519
520
            if(empty($headers) || !preg_match('/200/', $headers[0]))
521
            {
522
                throw new Exception("File $file can't be opened", 2);
523
            }
524
        } // download remote file if required only for integrated downloader
525
        elseif($file !== null && preg_match('/^http/', $file) && $this->downloadRemote)
526
        {
527
            $file = $this->downloadFile($file);
528
        }
529
530
        return $file;
531
    }
532
533
    /**
534
     * Parse the response returned by Apache Tika
535
     *
536
     * @return mixed
537
     * @throws \Exception
538
     */
539
    protected function parseJsonResponse(string $response)
540
    {
541
        // an empty response throws an error
542
        if(empty($response) || trim($response) == '')
543
        {
544
            throw new Exception('Empty response');
545
        }
546
547
        // decode the JSON response
548
        $json = json_decode($response);
549
550
        // exceptions if metadata is not valid
551
        if(json_last_error())
552
        {
553
            $message = function_exists('json_last_error_msg') ? json_last_error_msg() : 'Error parsing JSON response';
554
555
            throw new Exception($message, json_last_error());
556
        }
557
558
        return $json;
559
    }
560
561
    /**
562
     * Download file to a temporary folder
563
     *
564
     * @link https://wiki.apache.org/tika/TikaJAXRS#Specifying_a_URL_Instead_of_Putting_Bytes
565
     * @throws \Exception
566
     */
567
    protected function downloadFile(string $file): string
568
    {
569
        $dest = tempnam(sys_get_temp_dir(), 'TIKA');
570
571
        if($dest === false)
572
        {
573
            throw new Exception("Can't create a temporary file at " . sys_get_temp_dir());
574
        }
575
576
        $fp = fopen($dest, 'w+');
577
578
        if($fp === false)
579
        {
580
            throw new Exception("$dest can't be opened");
581
        }
582
583
        $ch = curl_init($file);
584
585
        if($ch === false)
586
        {
587
            throw new Exception("$file can't be downloaded");
588
        }
589
590
        curl_setopt($ch, CURLOPT_FILE, $fp);
591
        curl_setopt($ch, CURLOPT_TIMEOUT, 5);
592
        curl_exec($ch);
593
594
        if(curl_errno($ch))
595
        {
596
            throw new Exception(curl_error($ch));
597
        }
598
599
        $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
600
601
        curl_close($ch);
602
603
        if($code != 200)
604
        {
605
            throw new Exception("$file can't be downloaded", $code);
606
        }
607
608
        return $dest;
609
    }
610
611
    /**
612
     * Must return the supported MIME types
613
     *
614
     * @throws \Exception
615
     */
616
    abstract public function getSupportedMIMETypes(): array;
617
618
    /**
619
     * Must return the available detectors
620
     *
621
     * @throws \Exception
622
     */
623
    abstract public function getAvailableDetectors(): array;
624
625
    /**
626
     * Must return the available parsers
627
     *
628
     * @throws \Exception
629
     */
630
    abstract public function getAvailableParsers(): array;
631
632
    /**
633
     * Check Java binary, JAR path or server connection
634
     */
635
    abstract public function check(): void;
636
637
    /**
638
     * Configure and make a request and return its results.
639
     *
640
     * @throws \Exception
641
     */
642
    abstract public function request(string $type, string $file = null): string;
643
}
644