Group::popMultiUnitValues()   A
last analyzed

Complexity

Conditions 5
Paths 9

Size

Total Lines 29
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 13
nc 9
nop 2
dl 0
loc 29
rs 9.5222
c 0
b 0
f 0
1
<?php
2
declare(strict_types=1);
3
4
namespace MazenTouati\NoEmoji\Entities;
5
6
/**
7
 * A group is a self-contained object that stores and manipulates the data of a single unicodes group
8
 *
9
 * @author Mazen Touati <[email protected]>
10
 */
11
class Group
12
{
13
    /**
14
     * The array key for unicodes with single unit of 16 bit
15
     *
16
     * @var string
17
     */
18
    const SINGLE_UNIT_16_BIT = '16bit';
19
20
    /**
21
     * The array key for unicodes with single unit of 20 bit
22
     *
23
     * @var string
24
     */
25
    const SINGLE_UNIT_20_BIT = '20bit';
26
27
    /**
28
     * The array key for unicodes with multi unit of 16 bit
29
     *
30
     * @var string
31
     */
32
    const MULTI_UNIT_16_BIT = '16bit-MultiUnitValues';
33
34
    /**
35
     * The array key for unicodes with multi unit of 16 bit
36
     *
37
     * @var string
38
     */
39
    const MULTI_UNIT_20_BIT = '20bit-MultiUnitValues';
40
41
    /**
42
     * Group's name or title
43
     *
44
     * @var string
45
     */
46
    public $name = '';
47
48
    /**
49
     * The content of the group as a plain text
50
     *
51
     * @var string
52
     */
53
    private $_content = '';
54
55
    /**
56
     * The unicodes that are extracted from the group
57
     *
58
     * @var array
59
     */
60
    private $_unicodes = [];
61
62
    /**
63
     * Set of pattern to extract the different unicodes from the group
64
     *
65
     * @var array
66
     */
67
    private $_patterns = [
68
        self::SINGLE_UNIT_20_BIT => '/^([0-9A-F]{5}(?: [0-9A-F]+)*)\s*;/m',
69
        self::SINGLE_UNIT_16_BIT  => '/^([0-9A-F]{4}(?: [0-9A-F]+)*)\s*;/m',
70
    ];
71
72
    public function __construct(string $name, string $content)
73
    {
74
        $this->name = $name;
75
        $this->_content = $content;
76
    }
77
78
    /**
79
     * Unicodes getter
80
     *
81
     * @param  string $bits When not null it the method will return the data of the selected bits
82
     *
83
     * @return array The group's unicodes
84
     */
85
    public function getUnicodes($bits = null)
86
    {
87
        if ($bits === null) {
88
            return $this->_unicodes;
89
        }
90
        return isset($this->_unicodes[$bits]) ? $this->_unicodes[$bits] : [];
91
    }
92
93
    /**
94
     * Extracts the unicodes from the group's plain text content
95
     *
96
     * @return Group
97
     */
98
    public function extractUnicodes()
99
    {
100
        $unicodes = [];
101
102
        foreach ($this->_patterns as $bits => $pattern) {
103
            preg_match_all($pattern, $this->_content, $matches);
104
105
            $extractedUnicodes = $matches[1];
106
107
            $multiUnitUnicodes = $this->popMultiUnitValues($extractedUnicodes, $bits);
108
109
            if (!empty($extractedUnicodes)) {
110
                $unicodes[$bits] = $extractedUnicodes;
111
            }
112
113
            if (!empty($multiUnitUnicodes)) {
114
                // For the 20 bit multi unit values we gonna transform the multi unit to single unit
115
                if ($bits == self::SINGLE_UNIT_20_BIT) {
116
                    $unicodes[self::MULTI_UNIT_20_BIT] = $this->flattenMultiUnitValues($multiUnitUnicodes);
117
                }
118
                // The 16 bit multi unit values use an initial unit that is used by ASCII characters so we have to avoid flattening them
119
                else {
120
                    $unicodes[self::MULTI_UNIT_16_BIT] = $multiUnitUnicodes;
121
                }
122
            }
123
        }
124
125
        $this->_unicodes = $unicodes;
126
        return $this;
127
    }
128
129
    /**
130
     * Removes the unicodes with multi units from the input array and return an array with the removed values and discard the modifiers
131
     *
132
     * @param  array  $data Unicodes' array
133
     * @param  string $bits The target bit's key
134
     *
135
     * @return array The removed multi unit values
136
     */
137
    public function popMultiUnitValues(array &$data, string $bits): array
138
    {
139
        $multiUnitValues = [];
140
        $digits = (int)$bits / 4;
141
142
        // Extract the multi unit values
143
        // --
144
        foreach ($data as $k => $v) {
145
            // it's like if count($v) >= $digits ?
146
            if (isset($v[$digits])) {
147
                $multiUnitValues[] = $v;
148
                unset($data[$k]);
149
            }
150
        }
151
        // Re-base source array
152
        $data = array_values($data);
153
154
        // Discard the modifiers
155
        // --
156
        $joinedData = implode(',', $data);
157
        foreach ($multiUnitValues as $k => $v) {
158
            $firstUnit = substr($v, 0, $digits);
159
            // If the first unit is already extracted then this multi unit value represents a modifier => discard
160
            if (strpos($joinedData, $firstUnit) !== false) {
161
                unset($multiUnitValues[$k]);
162
            }
163
        }
164
165
        return array_values($multiUnitValues);
166
    }
167
168
    /**
169
     * Returns only the first unit of the given multi unit values
170
     *
171
     * @param array $data Input multi unit values
172
     *
173
     * @return array Output single unit values
174
     */
175
    public function flattenMultiUnitValues($data): array
176
    {
177
        $new = [];
178
        foreach ($data as $v) {
179
            $units = explode(' ', $v);
180
            // Ignore already existent units
181
            if (in_array($units[0], $new)) {
182
                continue;
183
            }
184
185
            $new[] = $units[0];
186
        }
187
188
        return $new;
189
    }
190
}
191