CLI::getAvailableLanguages()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 5
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 2
nc 2
nop 0
dl 0
loc 5
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace Bicycle\Tesseract\Bridge;
4
5
use Bicycle\Tesseract\Bridge\CLI\Result;
6
use Bicycle\Tesseract\BridgeInterface;
7
8
class CLI implements BridgeInterface
9
{
10
    /** @var Configuration */
11
    private Configuration $configuration;
12
13
    /**
14
     * {@inheritDoc}
15
     */
16
    public function __construct(Configuration $configuration)
17
    {
18
        // Prevent configuration change in runtime
19
        $this->configuration = clone $configuration;
20
        $cliBinary = $this->configuration->getCliBinaryPath();
21
        if (empty($cliBinary)) {
22
            throw new Exception\Exception('Cannot use CLI without proper cli path');
23
        }
24
    }
25
26
    /**
27
     * {@inheritDoc}
28
     */
29
    public function getVersion(): string
30
    {
31
        $output = $this->executeCommand(['--version'])->getOutputArray();
32
        if (empty($output)) {
33
            return '';
34
        }
35
        $matches = [];
36
        preg_match('/[\d\.]+.*$/', (string) $output[0], $matches);
37
38
        return $matches[0] ?? '';
39
    }
40
41
    /**
42
     * {@inheritDoc}
43
     */
44
    public function getAvailableLanguages(): array
45
    {
46
        $output = $this->executeCommand(['--list-langs'])->getOutputArray();
47
48
        return empty($output) ? [] : array_slice($output, 1);
49
    }
50
51
    /**
52
     * {@inheritDoc}
53
     */
54
    public function recognizeFromFile(string $filename, array $languages = []): string
55
    {
56
        if (!\is_readable($filename)) {
57
            throw new Exception\InputProblemException('Cannot read input file');
58
        }
59
        $languagesArg = '';
60
        if (!empty($languages)) {
61
            if (
62
                count($intersection = array_intersect(
63
                    $languages,
64
                    $this->getAvailableLanguages()
65
                )) !== count($languages)
66
            ) {
67
                $exceptionMessage = sprintf(
68
                    'Unknown language(s) %s for recognition.',
69
                    implode(', ', array_diff($languages, $intersection))
70
                );
71
                throw new Exception\UnavailableLanguageException($exceptionMessage);
72
            }
73
            $languagesArg = sprintf('-l %s', escapeshellarg(implode('+', $languages)));
74
        }
75
76
        $tmpOutFile = sprintf(
77
            '%s%sphp_tesseract_ocr_%s',
78
            sys_get_temp_dir(),
79
            DIRECTORY_SEPARATOR,
80
            /**
81
             * It looks like 5 will be enough (640 Kb...).
82
             * Also we need to remove directory separator from generated string.
83
             */
84
            str_replace([DIRECTORY_SEPARATOR], '_', base64_encode(random_bytes(5)))
85
        );
86
87
        // Adding .txt because tesseract automatically add .txt to output files
88
        $realTmpOutFile = $tmpOutFile . '.txt';
89
        $this->executeCommand(
90
            [
91
                escapeshellarg($filename),
92
                escapeshellarg($tmpOutFile),
93
                $languagesArg,
94
            ]
95
        );
96
97
        $recognizedText = rtrim(file_get_contents($realTmpOutFile), "\f");
98
99
        unlink($realTmpOutFile);
100
101
        return $recognizedText;
102
    }
103
104
    /**
105
     * @param array $arguments
106
     *
107
     * @return Result
108
     */
109
    private function executeCommand(array $arguments): Result
110
    {
111
        $output = null;
112
        $resultCode = null;
113
        /** @var string $cliPath see constructor */
114
        $cliPath = $this->configuration->getCliBinaryPath();
115
        exec(
116
            sprintf('%s %s 2>&1', $cliPath, implode(' ', $arguments)),
117
            $output,
118
            $resultCode
119
        );
120
121
        return new Result($resultCode, $output);
122
    }
123
}
124