|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace SPSS\Sav\Record; |
|
4
|
|
|
|
|
5
|
|
|
use SPSS\Buffer; |
|
6
|
|
|
use SPSS\Sav\Record; |
|
7
|
|
|
use SPSS\Utils; |
|
8
|
|
|
|
|
9
|
|
|
class Variable extends Record |
|
10
|
|
|
{ |
|
11
|
|
|
const TYPE = 2; |
|
12
|
|
|
|
|
13
|
|
|
/** |
|
14
|
|
|
* Number of bytes really stored in each segment of a very long string variable. |
|
15
|
|
|
*/ |
|
16
|
|
|
const REAL_VLS_CHUNK = 255; |
|
17
|
|
|
|
|
18
|
|
|
/** |
|
19
|
|
|
* Number of bytes per segment by which the amount of space for very long string variables is allocated. |
|
20
|
|
|
*/ |
|
21
|
|
|
const EFFECTIVE_VLS_CHUNK = 252; |
|
22
|
|
|
|
|
23
|
|
|
/** |
|
24
|
|
|
* Set to 0 for a numeric variable. |
|
25
|
|
|
* For a short string variable or the first part of a long string variable, this is set to the width of the string. |
|
26
|
|
|
* For the second and subsequent parts of a long string variable, set to -1, and the remaining fields in the structure are ignored. |
|
27
|
|
|
* |
|
28
|
|
|
* @var int Variable width. |
|
29
|
|
|
*/ |
|
30
|
|
|
public $width; |
|
31
|
|
|
|
|
32
|
|
|
/** |
|
33
|
|
|
* If the variable has no missing values, set to 0. |
|
34
|
|
|
* If the variable has one, two, or three discrete missing values, set to 1, 2, or 3, respectively. |
|
35
|
|
|
* If the variable has a range for missing variables, set to -2; |
|
36
|
|
|
* if the variable has a range for missing variables plus a single discrete value, set to -3. |
|
37
|
|
|
* A long string variable always has the value 0 here. |
|
38
|
|
|
* A separate record indicates missing values for long string variables |
|
39
|
|
|
* |
|
40
|
|
|
* @var int |
|
41
|
|
|
* @see \SPSS\Sav\Record\Info\LongStringMissingValues |
|
42
|
|
|
*/ |
|
43
|
|
|
public $missingValuesFormat = 0; |
|
44
|
|
|
|
|
45
|
|
|
/** |
|
46
|
|
|
* Print format for this variable. |
|
47
|
|
|
* [decimals, width, format, 0] |
|
48
|
|
|
* |
|
49
|
|
|
* @var array |
|
50
|
|
|
*/ |
|
51
|
|
|
public $print = [0, 0, 0, 0]; |
|
52
|
|
|
|
|
53
|
|
|
/** |
|
54
|
|
|
* Write format for this variable. |
|
55
|
|
|
* [decimals, width, format, 0] |
|
56
|
|
|
* |
|
57
|
|
|
* @var array |
|
58
|
|
|
*/ |
|
59
|
|
|
public $write = [0, 0, 0, 0]; |
|
60
|
|
|
|
|
61
|
|
|
/** |
|
62
|
|
|
* The variable name must begin with a capital letter or the at-sign (‘@’). |
|
63
|
|
|
* Subsequent characters may also be digits, octothorpes (‘#’), dollar signs (‘$’), underscores (‘_’), or full stops (‘.’). |
|
64
|
|
|
* The variable name is padded on the right with spaces. |
|
65
|
|
|
* |
|
66
|
|
|
* @var string Variable name. |
|
67
|
|
|
*/ |
|
68
|
|
|
public $name; |
|
69
|
|
|
|
|
70
|
|
|
/** |
|
71
|
|
|
* It has length label_len, rounded up to the nearest multiple of 32 bits. |
|
72
|
|
|
* The first label_len characters are the variable’s variable label. |
|
73
|
|
|
* |
|
74
|
|
|
* @var string |
|
75
|
|
|
*/ |
|
76
|
|
|
public $label; |
|
77
|
|
|
|
|
78
|
|
|
/** |
|
79
|
|
|
* It has the same number of 8-byte elements as the absolute value of $missingValuesFormat. |
|
80
|
|
|
* Each element is interpreted as a number for numeric variables (with HIGHEST and LOWEST indicated as described in the chapter introduction). |
|
81
|
|
|
* For string variables of width less than 8 bytes, elements are right-padded with spaces; |
|
82
|
|
|
* for string variables wider than 8 bytes, |
|
83
|
|
|
* only the first 8 bytes of each missing value are specified, with the remainder implicitly all spaces. |
|
84
|
|
|
* For discrete missing values, each element represents one missing value. |
|
85
|
|
|
* When a range is present, the first element denotes the minimum value in the range, |
|
86
|
|
|
* and the second element denotes the maximum value in the range. |
|
87
|
|
|
* When a range plus a value are present, the third element denotes the additional discrete missing value. |
|
88
|
|
|
* |
|
89
|
|
|
* @var array |
|
90
|
|
|
*/ |
|
91
|
|
|
public $missingValues = []; |
|
92
|
|
|
|
|
93
|
|
|
/** |
|
94
|
|
|
* Returns true if WIDTH is a very long string width, false otherwise. |
|
95
|
|
|
* |
|
96
|
|
|
*/ |
|
97
|
5 |
|
public static function isVeryLong(int $width): bool |
|
98
|
|
|
{ |
|
99
|
5 |
|
return $width > self::REAL_VLS_CHUNK; |
|
100
|
|
|
} |
|
101
|
|
|
|
|
102
|
|
|
/** |
|
103
|
|
|
* @param Buffer $buffer |
|
104
|
|
|
*/ |
|
105
|
5 |
|
public function read(Buffer $buffer) |
|
106
|
|
|
{ |
|
107
|
5 |
|
$this->width = $buffer->readInt(); |
|
108
|
5 |
|
$hasLabel = $buffer->readInt(); |
|
109
|
5 |
|
$this->missingValuesFormat = $buffer->readInt(); |
|
110
|
5 |
|
$this->print = Utils::intToBytes($buffer->readInt()); |
|
111
|
5 |
|
$this->write = Utils::intToBytes($buffer->readInt()); |
|
112
|
5 |
|
$this->name = rtrim($buffer->readString(8)); |
|
|
|
|
|
|
113
|
5 |
|
if ($hasLabel) { |
|
114
|
4 |
|
$labelLength = $buffer->readInt(); |
|
115
|
4 |
|
$this->label = $buffer->readString($labelLength, 4); |
|
|
|
|
|
|
116
|
|
|
} |
|
117
|
5 |
|
if ($this->missingValuesFormat != 0) { |
|
118
|
|
|
for ($i = 0; $i < abs($this->missingValuesFormat); $i++) { |
|
119
|
|
|
$this->missingValues[] = $buffer->readDouble(); |
|
120
|
|
|
} |
|
121
|
|
|
} |
|
122
|
5 |
|
} |
|
123
|
|
|
|
|
124
|
|
|
/** |
|
125
|
|
|
* @param Buffer $buffer |
|
126
|
|
|
*/ |
|
127
|
5 |
|
public function write(Buffer $buffer) |
|
128
|
|
|
{ |
|
129
|
5 |
|
$hasLabel = ! empty($this->label); |
|
130
|
|
|
|
|
131
|
5 |
|
$buffer->writeInt(self::TYPE); |
|
132
|
5 |
|
$buffer->writeInt(min(255, $this->width)); |
|
133
|
5 |
|
$buffer->writeInt($hasLabel ? 1 : 0); |
|
134
|
5 |
|
$buffer->writeInt($this->missingValuesFormat); |
|
135
|
5 |
|
$buffer->writeInt(Utils::bytesToInt($this->print)); |
|
136
|
5 |
|
$buffer->writeInt(Utils::bytesToInt($this->write)); |
|
137
|
5 |
|
$buffer->writeString($this->name, 8); |
|
138
|
|
|
|
|
139
|
5 |
|
if ($hasLabel) { |
|
140
|
|
|
// Maxlength is 255 bytes, since we write utf8 a char can be multiple bytes |
|
141
|
4 |
|
$labelLength = min(mb_strlen($this->label), 255); |
|
142
|
4 |
|
$label = mb_substr($this->label, 0, $labelLength); |
|
143
|
4 |
|
$labelLengthBytes = mb_strlen($label, '8bit'); |
|
144
|
4 |
|
while ($labelLengthBytes > 255) { |
|
145
|
|
|
// Strip one char, can be multiple bytes |
|
146
|
1 |
|
$label = mb_substr($label, 0, -1); |
|
147
|
1 |
|
$labelLengthBytes = mb_strlen($label, '8bit'); |
|
148
|
|
|
} |
|
149
|
4 |
|
$buffer->writeInt($labelLengthBytes); |
|
150
|
4 |
|
$buffer->writeString($label, Utils::roundUp($labelLengthBytes, 4)); |
|
151
|
|
|
} |
|
152
|
|
|
|
|
153
|
|
|
// TODO: test |
|
154
|
5 |
|
if ($this->missingValuesFormat) { |
|
155
|
|
|
foreach ($this->missingValues as $val) { |
|
156
|
|
|
if ($this->width == 0) { |
|
157
|
|
|
$buffer->writeDouble($val); |
|
158
|
|
|
} else { |
|
159
|
|
|
$buffer->writeString($val, 8); |
|
160
|
|
|
} |
|
161
|
|
|
} |
|
162
|
|
|
} |
|
163
|
|
|
|
|
164
|
|
|
// Write additional segments for very long string variables. |
|
165
|
5 |
|
if (self::isVeryLong($this->width)) { |
|
166
|
1 |
|
$format = Utils::bytesToInt([0, 1, 1, 0]); |
|
167
|
|
|
|
|
168
|
1 |
|
foreach(Utils::getSegments($this->width) as $i => $segmentWidth) { |
|
169
|
1 |
|
if ($i == 0) { |
|
170
|
1 |
|
continue; |
|
171
|
|
|
} |
|
172
|
1 |
|
$buffer->writeInt(self::TYPE); |
|
173
|
1 |
|
$buffer->writeInt($segmentWidth); |
|
174
|
1 |
|
$buffer->writeInt(1); // No variable label |
|
175
|
1 |
|
$buffer->writeInt(0); // No missing values |
|
176
|
1 |
|
$buffer->writeInt($format); // Print format |
|
177
|
1 |
|
$buffer->writeInt($format); // Write format |
|
178
|
1 |
|
$buffer->writeString($this->getSegmentName($i - 1), 8); |
|
179
|
|
|
|
|
180
|
|
|
/** |
|
181
|
|
|
* If the long string variable is interpreted correctly SPSS does not show these segments. |
|
182
|
|
|
* If something goes wrong they will be visible, so we provide a label to explain the situation. |
|
183
|
|
|
*/ |
|
184
|
1 |
|
$segmentLabel = "Segment $i of variable {$this->name}, you should not see this"; |
|
185
|
1 |
|
$length = mb_strlen($segmentLabel, '8BIT'); |
|
186
|
1 |
|
$buffer->writeInt($length); |
|
187
|
1 |
|
$buffer->writeString($segmentLabel, Utils::roundUp($length, 4)); |
|
188
|
|
|
} |
|
189
|
|
|
} |
|
190
|
5 |
|
} |
|
191
|
|
|
|
|
192
|
|
|
/** |
|
193
|
|
|
* Constructs the name for a segment. |
|
194
|
|
|
* A long string variable consists of multiple segment variables. |
|
195
|
|
|
* The names for these segment variables need not be unique across the whole SPSS file. |
|
196
|
|
|
* The names for these segment must have share a prefix of at least length 5 with the base variable name. |
|
197
|
|
|
* @param int $seg Index of the segment |
|
198
|
|
|
* @return string Name of the segment |
|
199
|
|
|
*/ |
|
200
|
1 |
|
private function getSegmentName($seg = 0) |
|
201
|
|
|
{ |
|
202
|
1 |
|
$name = $this->name; |
|
203
|
1 |
|
$name = mb_substr($name, 0, 5); |
|
204
|
1 |
|
$name .= $seg; |
|
205
|
|
|
|
|
206
|
1 |
|
return mb_strtoupper($name); |
|
207
|
|
|
} |
|
208
|
|
|
|
|
209
|
|
|
|
|
210
|
|
|
} |
|
211
|
|
|
|