1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Bicycle\Tesseract\Bridge; |
4
|
|
|
|
5
|
|
|
use Bicycle\Tesseract\BridgeInterface; |
6
|
|
|
|
7
|
|
|
/** |
8
|
|
|
* Please note that here we have \FFI class instance instead of FFI\TesseractInterface. |
9
|
|
|
*/ |
10
|
|
|
class FFI implements BridgeInterface |
11
|
|
|
{ |
12
|
|
|
/** @var Configuration */ |
13
|
|
|
private Configuration $configuration; |
14
|
|
|
|
15
|
|
|
/** |
16
|
|
|
* I have to use interface here, but actually we have here \FFI class instance. Do not set type for this property! |
17
|
|
|
* |
18
|
|
|
* @var FFI\TesseractInterface |
19
|
|
|
*/ |
20
|
|
|
private $ffiInstance; |
21
|
|
|
|
22
|
|
|
/** |
23
|
|
|
* {@inheritDoc} |
24
|
|
|
*/ |
25
|
|
|
public function __construct(Configuration $configuration) |
26
|
|
|
{ |
27
|
|
|
if (!extension_loaded('ffi')) { |
28
|
|
|
throw new Exception\ExtensionRequiredException('FFI extension is required for this functionality'); |
29
|
|
|
} |
30
|
|
|
// Prevent configuration change in runtime |
31
|
|
|
$this->configuration = clone $configuration; |
32
|
|
|
$headerPath = $this->configuration->getCApiHeaderpath(); |
33
|
|
|
if ( |
34
|
|
|
empty($headerPath) || |
35
|
|
|
empty($definitions = file_get_contents($headerPath)) |
36
|
|
|
) { |
37
|
|
|
throw new Exception\Exception('Cannot use FFI without valid header file'); |
38
|
|
|
} |
39
|
|
|
try { |
40
|
|
|
$libaryPath = $this->configuration->getSharedLibraryPath(); |
41
|
|
|
if (empty($libaryPath)) { |
42
|
|
|
throw new Exception\Exception('Problem with connecting library via FFI: empty library path'); |
43
|
|
|
} |
44
|
|
|
/** @var FFI\TesseractInterface ffiInstance */ |
45
|
|
|
$this->ffiInstance = \FFI::cdef( |
46
|
|
|
$definitions, |
47
|
|
|
$libaryPath |
48
|
|
|
); |
49
|
|
|
} catch (\FFI\Exception $e) { |
50
|
|
|
throw new Exception\Exception(sprintf('Problem with connecting library via FFI: %s', $e->getMessage())); |
51
|
|
|
} |
52
|
|
|
} |
53
|
|
|
|
54
|
|
|
/** |
55
|
|
|
* {@inheritDoc} |
56
|
|
|
*/ |
57
|
|
|
public function getVersion(): string |
58
|
|
|
{ |
59
|
|
|
return $this->ffiInstance->TessVersion(); |
60
|
|
|
} |
61
|
|
|
|
62
|
|
|
/** |
63
|
|
|
* {@inheritDoc} |
64
|
|
|
*/ |
65
|
|
|
public function getAvailableLanguages(): array |
66
|
|
|
{ |
67
|
|
|
$result = []; |
68
|
|
|
/** |
69
|
|
|
* @psalm-suppress MixedAssignment |
70
|
|
|
*/ |
71
|
|
|
$baseApiHandle = $this->ffiInstance->TessBaseAPICreate(); |
72
|
|
|
$initFailed = $this->ffiInstance->TessBaseAPIInit3($baseApiHandle, null, null); // Tesseract initialization |
73
|
|
|
if ($initFailed) { |
74
|
|
|
$this->ffiInstance->TessBaseAPIDelete($baseApiHandle); |
75
|
|
|
throw new Exception\Exception('Cannot initialize tesseract'); |
76
|
|
|
} |
77
|
|
|
$languages = $this->ffiInstance->TessBaseAPIGetAvailableLanguagesAsVector($baseApiHandle); |
78
|
|
|
$counter = 0; |
79
|
|
|
// According to body of TessBaseAPIGetAvailableLanguagesAsVector method, last element will be nullptr |
80
|
|
|
while (!\is_null($languages[$counter])) { |
81
|
|
|
/** @psalm-suppress MixedAssignment */ |
82
|
|
|
$result[] = \FFI::string($languages[$counter++]); |
83
|
|
|
} |
84
|
|
|
$this->ffiInstance->TessBaseAPIEnd($baseApiHandle); |
85
|
|
|
$this->ffiInstance->TessBaseAPIDelete($baseApiHandle); |
86
|
|
|
|
87
|
|
|
return $result; |
88
|
|
|
} |
89
|
|
|
|
90
|
|
|
/** |
91
|
|
|
* {@inheritDoc} |
92
|
|
|
*/ |
93
|
|
|
public function recognizeFromFile(string $filename, array $languages = []): string |
94
|
|
|
{ |
95
|
|
|
if (!\is_readable($filename)) { |
96
|
|
|
throw new Exception\InputProblemException('Cannot read input file'); |
97
|
|
|
} |
98
|
|
|
if (empty($languages)) { |
99
|
|
|
$languages[] = 'eng'; |
100
|
|
|
} elseif ( |
101
|
|
|
count($intersection = array_intersect($languages, $this->getAvailableLanguages())) !== count($languages) |
102
|
|
|
) { |
103
|
|
|
$exceptionMessage = sprintf( |
104
|
|
|
'Unknown language(s) %s for recognition.', |
105
|
|
|
implode(', ', array_diff($languages, $intersection)) |
106
|
|
|
); |
107
|
|
|
throw new Exception\UnavailableLanguageException($exceptionMessage); |
108
|
|
|
} |
109
|
|
|
|
110
|
|
|
$resultText = ''; |
111
|
|
|
|
112
|
|
|
/** |
113
|
|
|
* @psalm-suppress MixedAssignment |
114
|
|
|
*/ |
115
|
|
|
$baseApiHandle = $this->ffiInstance->TessBaseAPICreate(); |
116
|
|
|
$initFailed = $this->ffiInstance->TessBaseAPIInit3( |
117
|
|
|
$baseApiHandle, |
118
|
|
|
null, |
119
|
|
|
implode('+', $languages) |
120
|
|
|
); // Tesseract initialization |
121
|
|
|
if ($initFailed) { |
122
|
|
|
$this->ffiInstance->TessBaseAPIDelete($baseApiHandle); |
123
|
|
|
throw new Exception\Exception('Cannot initialize tesseract'); |
124
|
|
|
} |
125
|
|
|
|
126
|
|
|
if ($this->ffiInstance->TessBaseAPIProcessPages($baseApiHandle, $filename, null, 0, null)) { |
127
|
|
|
$resultText = $this->ffiInstance->TessBaseAPIGetUTF8Text($baseApiHandle); |
128
|
|
|
/** @var string $resultText */ |
129
|
|
|
$resultText = \FFI::string($resultText); |
130
|
|
|
} |
131
|
|
|
|
132
|
|
|
$this->ffiInstance->TessBaseAPIEnd($baseApiHandle); |
133
|
|
|
$this->ffiInstance->TessBaseAPIDelete($baseApiHandle); |
134
|
|
|
|
135
|
|
|
return $resultText; |
136
|
|
|
} |
137
|
|
|
} |
138
|
|
|
|