Completed
Push — master ( ccf80a...5ce56d )
by WEBEWEB
01:32
created

DerDeserializer::deserializeDocument()   A

Complexity

Conditions 5
Paths 9

Size

Total Lines 28

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 28
rs 9.1608
c 0
b 0
f 0
cc 5
nc 9
nop 1
1
<?php
2
3
/*
4
 * This file is part of the core-library package.
5
 *
6
 * (c) 2020 WEBEWEB
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11
12
namespace WBW\Library\Core\ThirdParty\OcrLad\Serializer;
13
14
use WBW\Library\Core\ThirdParty\OcrLad\Model\Document;
15
use WBW\Library\Core\ThirdParty\OcrLad\Model\Page;
16
use WBW\Library\Core\ThirdParty\OcrLad\Model\Word;
17
18
/**
19
 * DER deserializer.
20
 *
21
 * @author webeweb <https://github.com/webeweb>
22
 * @package WBW\Library\Core\ThirdParty\OcrLad\Serializer
23
 */
24
class DerDeserializer {
25
26
    /**
27
     * DER delimiter.
28
     *
29
     * @var string
30
     */
31
    const DER_DELIMITER = ";";
32
33
    /**
34
     * Deserialize a document.
35
     *
36
     * @param string $filename The filename.
37
     * @return Document|null Returns the document in case of success, null otherwise.
38
     */
39
    public static function deserializeDocument($filename) {
40
41
        $model = new Document();
42
        $model->setFilename($filename);
43
44
        $stream = fopen($filename, "r");
45
46
        $headers = DerDeserializer::processHeaders(fgets($stream));
47
        foreach ($headers as $current) {
48
49
            $page = DerDeserializer::deserializePage($current);
50
            if (null !== $page) {
51
                $model->addPage($page->setParent($model));
52
            }
53
        }
54
55
        while (true !== feof($stream)) {
56
57
            $word = DerDeserializer::deserializeWord(fgets($stream));
58
            if (null !== $word) {
59
                $model->addWord($word);
60
            }
61
        }
62
63
        fclose($stream);
64
65
        return DerDeserializer::processDocument($model);
66
    }
67
68
    /**
69
     * Deserialize a page.
70
     *
71
     * @param string $rawData The raw data.
72
     * @return Page|null Returns the page in case of success, null otherwise.
73
     */
74
    protected static function deserializePage($rawData) {
75
76
        $data = explode(DerDeserializer::DER_DELIMITER, $rawData);
77
        if (6 !== count($data)) {
78
            return null;
79
        }
80
81
        $model = new Page();
82
        $model->setResolution(intval(preg_replace("/[^0-9]/", "", $data[0])));
83
        $model->setWidth(intval($data[1]));
84
        $model->setHeight(intval($data[2]));
85
        $model->setRotation(intval($data[3]));
86
        $model->setTag(intval($data[4]));
87
88
        return $model;
89
    }
90
91
    /**
92
     * Deserialize a word.
93
     *
94
     * @param string $rawData The raw data.
95
     * @return Word|null Returns the word in case of success, null otherwise.
96
     */
97
    protected static function deserializeWord($rawData) {
98
99
        $data = explode(DerDeserializer::DER_DELIMITER, $rawData);
100
        if (7 !== count($data)) {
101
            return null;
102
        }
103
104
        preg_match("/(LB|NLB).?([0-9]+)/", trim($data[1]), $matches);
105
106
        $model = new Word();
107
        $model->setContent(trim($data[0]));
108
        $model->setType($matches[1]);
109
        $model->setPage(intval($matches[2]));
110
        $model->setOcrConfidence(floatval(str_replace(",", ".", $data[2])));
111
        $model->setX1(floatval($data[3]));
112
        $model->setY1(floatval($data[4]));
113
        $model->setX2(floatval($data[5]));
114
        $model->setY2(floatval($data[6]));
115
116
        return $model;
117
    }
118
119
    /**
120
     * Process the document.
121
     *
122
     * @param Document $document The document.
123
     * @return Document Returns the document.
124
     */
125
    protected static function processDocument(Document $document) {
126
127
        foreach ($document->getWords() as $current) {
128
129
            $document->index($current);
130
131
            $page = $document->getPage($current->getPage() - 1);
132
            $page->addWord($current->setParent($page));
133
        }
134
135
        return $document;
136
    }
137
138
    /**
139
     * Process the headers.
140
     *
141
     * @param string $rawData The raw data.
142
     * @return string[] Returns the headers.
143
     */
144
    protected static function processHeaders($rawData) {
145
146
        $data = explode(DerDeserializer::DER_DELIMITER, $rawData);
147
        if (6 === count($data)) {
148
            return [$rawData];
149
        }
150
151
        $rows   = [];
152
        $buffer = [];
153
154
        foreach ($data as $current) {
155
156
            $buffer[] = $current;
157
            if (6 === count($buffer)) {
158
159
                $rows[] = implode(DerDeserializer::DER_DELIMITER, $buffer);
160
                $buffer = [];
161
            }
162
        }
163
164
        return $rows;
165
    }
166
}