Completed
Push — master ( fedb3d...ad9766 )
by WEBEWEB
01:28
created

DerDeserializer::paginate()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 19
rs 9.6333
c 0
b 0
f 0
cc 3
nc 3
nop 1
1
<?php
2
3
/*
4
 * This file is part of the core-library package.
5
 *
6
 * (c) 2020 WEBEWEB
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11
12
namespace WBW\Library\Core\ThirdParty\OcrLad\Serializer;
13
14
use WBW\Library\Core\ThirdParty\OcrLad\Model\Document;
15
use WBW\Library\Core\ThirdParty\OcrLad\Model\Page;
16
use WBW\Library\Core\ThirdParty\OcrLad\Model\Word;
17
18
/**
19
 * DER deserializer.
20
 *
21
 * @author webeweb <https://github.com/webeweb>
22
 * @package WBW\Library\Core\ThirdParty\OcrLad\Serializer
23
 */
24
class DerDeserializer {
25
26
    /**
27
     * DER delimiter.
28
     *
29
     * @var string
30
     */
31
    const DER_DELIMITER = ";";
32
33
    /**
34
     * Deserialize a document.
35
     *
36
     * @param string $filename The filename.
37
     * @return Document|null Returns the document in case of success, null otherwise.
38
     */
39
    public static function deserializeDocument($filename) {
40
41
        $model = new Document();
42
        $model->setFilename($filename);
43
44
        $stream = fopen($filename, "r");
45
46
        $headers = DerDeserializer::splitHeader(fgets($stream));
47
        foreach ($headers as $current) {
48
49
            $page = DerDeserializer::deserializePage($current);
50
            if (null !== $page) {
51
                $model->addPage($page);
52
            }
53
        }
54
55
        while (true !== feof($stream)) {
56
57
            $word = DerDeserializer::deserializeWord(fgets($stream));
58
            if (null !== $word) {
59
                $model->addWord($word);
60
            }
61
        }
62
63
        fclose($stream);
64
65
        return DerDeserializer::paginate($model);
66
    }
67
68
    /**
69
     * Deserialize a page.
70
     *
71
     * @param string $rawData The raw data.
72
     * @return Page|null Returns the page in case of success, null otherwise.
73
     */
74
    protected static function deserializePage($rawData) {
75
76
        $data = explode(DerDeserializer::DER_DELIMITER, $rawData);
77
        if (6 !== count($data)) {
78
            return null;
79
        }
80
81
        $model = new Page();
82
        $model->setResolution(intval(preg_replace("/[^0-9]/", "", $data[0])));
83
        $model->setWidth(intval($data[1]));
84
        $model->setHeight(intval($data[2]));
85
        $model->setRotation(intval($data[3]));
86
        $model->setTag(intval($data[4]));
87
88
        return $model;
89
    }
90
91
    /**
92
     * Deserialize a word.
93
     *
94
     * @param string $rawData The raw data.
95
     * @return Word|null Returns the word in case of success, null otherwise.
96
     */
97
    protected static function deserializeWord($rawData) {
98
99
        $data = explode(DerDeserializer::DER_DELIMITER, $rawData);
100
        if (7 !== count($data)) {
101
            return null;
102
        }
103
104
        $model = new Word();
105
        $model->setContent(trim($data[0]));
106
        $model->setType(trim($data[1]));
107
        $model->setOcrConfidence(floatval(str_replace(",", ".", $data[2])));
108
        $model->setX1(floatval($data[3]));
109
        $model->setY1(floatval($data[4]));
110
        $model->setX2(floatval($data[5]));
111
        $model->setY2(floatval($data[6]));
112
113
        return $model;
114
    }
115
116
    /**
117
     * Paginate.
118
     *
119
     * @param Document $document The document.
120
     * @return Document Returns the document.
121
     */
122
    protected static function paginate(Document $document) {
123
124
        $last = 0;
125
        $page = 0;
126
127
        foreach ($document->getWords() as $current) {
128
129
            if ($current->getY1() < $last) {
130
                $last = 0;
0 ignored issues
show
Unused Code introduced by
$last is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
131
                ++$page;
132
            }
133
134
            $last = $current->getY1();
135
136
            $document->getPages()[$page]->addWord($current);
137
        }
138
139
        return $document;
140
    }
141
142
    /**
143
     * Split an header.
144
     *
145
     * @param string $rawData The raw data.
146
     * @return string[] Returns the headers.
147
     */
148
    protected static function splitHeader($rawData) {
149
150
        $data = explode(DerDeserializer::DER_DELIMITER, $rawData);
151
        if (6 === count($data)) {
152
            return [$rawData];
153
        }
154
155
        $rows   = [];
156
        $buffer = [];
157
158
        foreach ($data as $current) {
159
160
            $buffer[] = $current;
161
            if (6 === count($buffer)) {
162
163
                $rows[] = implode(DerDeserializer::DER_DELIMITER, $buffer);
164
                $buffer = [];
165
            }
166
        }
167
168
        return $rows;
169
    }
170
}