1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace SPSS\Sav\Record; |
4
|
|
|
|
5
|
|
|
use SPSS\Buffer; |
6
|
|
|
use SPSS\Sav\Record; |
7
|
|
|
use SPSS\Utils; |
8
|
|
|
|
9
|
|
|
class Variable extends Record |
10
|
|
|
{ |
11
|
|
|
const TYPE = 2; |
12
|
|
|
|
13
|
|
|
/** |
14
|
|
|
* Number of bytes really stored in each segment of a very long string variable. |
15
|
|
|
*/ |
16
|
|
|
const REAL_VLS_CHUNK = 255; |
17
|
|
|
|
18
|
|
|
/** |
19
|
|
|
* Number of bytes per segment by which the amount of space for very long string variables is allocated. |
20
|
|
|
*/ |
21
|
|
|
const EFFECTIVE_VLS_CHUNK = 252; |
22
|
|
|
|
23
|
|
|
/** |
24
|
|
|
* Set to 0 for a numeric variable. |
25
|
|
|
* For a short string variable or the first part of a long string variable, this is set to the width of the string. |
26
|
|
|
* For the second and subsequent parts of a long string variable, set to -1, and the remaining fields in the structure are ignored. |
27
|
|
|
* |
28
|
|
|
* @var int Variable width. |
29
|
|
|
*/ |
30
|
|
|
public $width; |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* If the variable has no missing values, set to 0. |
34
|
|
|
* If the variable has one, two, or three discrete missing values, set to 1, 2, or 3, respectively. |
35
|
|
|
* If the variable has a range for missing variables, set to -2; |
36
|
|
|
* if the variable has a range for missing variables plus a single discrete value, set to -3. |
37
|
|
|
* A long string variable always has the value 0 here. |
38
|
|
|
* A separate record indicates missing values for long string variables |
39
|
|
|
* |
40
|
|
|
* @var int |
41
|
|
|
* @see \SPSS\Sav\Record\Info\LongStringMissingValues |
42
|
|
|
*/ |
43
|
|
|
public $missingValuesFormat = 0; |
44
|
|
|
|
45
|
|
|
/** |
46
|
|
|
* Print format for this variable. |
47
|
|
|
* [decimals, width, format, 0] |
48
|
|
|
* |
49
|
|
|
* @var array |
50
|
|
|
*/ |
51
|
|
|
public $print = [0, 0, 0, 0]; |
52
|
|
|
|
53
|
|
|
/** |
54
|
|
|
* Write format for this variable. |
55
|
|
|
* [decimals, width, format, 0] |
56
|
|
|
* |
57
|
|
|
* @var array |
58
|
|
|
*/ |
59
|
|
|
public $write = [0, 0, 0, 0]; |
60
|
|
|
|
61
|
|
|
/** |
62
|
|
|
* The variable name must begin with a capital letter or the at-sign (‘@’). |
63
|
|
|
* Subsequent characters may also be digits, octothorpes (‘#’), dollar signs (‘$’), underscores (‘_’), or full stops (‘.’). |
64
|
|
|
* The variable name is padded on the right with spaces. |
65
|
|
|
* |
66
|
|
|
* @var string Variable name. |
67
|
|
|
*/ |
68
|
|
|
public $name; |
69
|
|
|
|
70
|
|
|
/** |
71
|
|
|
* It has length label_len, rounded up to the nearest multiple of 32 bits. |
72
|
|
|
* The first label_len characters are the variable’s variable label. |
73
|
|
|
* |
74
|
|
|
* @var string |
75
|
|
|
*/ |
76
|
|
|
public $label; |
77
|
|
|
|
78
|
|
|
/** |
79
|
|
|
* It has the same number of 8-byte elements as the absolute value of $missingValuesFormat. |
80
|
|
|
* Each element is interpreted as a number for numeric variables (with HIGHEST and LOWEST indicated as described in the chapter introduction). |
81
|
|
|
* For string variables of width less than 8 bytes, elements are right-padded with spaces; |
82
|
|
|
* for string variables wider than 8 bytes, |
83
|
|
|
* only the first 8 bytes of each missing value are specified, with the remainder implicitly all spaces. |
84
|
|
|
* For discrete missing values, each element represents one missing value. |
85
|
|
|
* When a range is present, the first element denotes the minimum value in the range, |
86
|
|
|
* and the second element denotes the maximum value in the range. |
87
|
|
|
* When a range plus a value are present, the third element denotes the additional discrete missing value. |
88
|
|
|
* |
89
|
|
|
* @var array |
90
|
|
|
*/ |
91
|
|
|
public $missingValues = []; |
92
|
|
|
|
93
|
|
|
/** |
94
|
|
|
* Returns true if WIDTH is a very long string width, false otherwise. |
95
|
|
|
* |
96
|
|
|
* @param int $width |
97
|
|
|
* @return int |
98
|
|
|
*/ |
99
|
5 |
|
public static function isVeryLong($width) |
100
|
|
|
{ |
101
|
5 |
|
return $width > self::REAL_VLS_CHUNK; |
|
|
|
|
102
|
|
|
} |
103
|
|
|
|
104
|
|
|
/** |
105
|
|
|
* @param Buffer $buffer |
106
|
|
|
*/ |
107
|
5 |
|
public function read(Buffer $buffer) |
108
|
|
|
{ |
109
|
5 |
|
$this->width = $buffer->readInt(); |
110
|
5 |
|
$hasLabel = $buffer->readInt(); |
111
|
5 |
|
$this->missingValuesFormat = $buffer->readInt(); |
112
|
5 |
|
$this->print = Utils::intToBytes($buffer->readInt()); |
113
|
5 |
|
$this->write = Utils::intToBytes($buffer->readInt()); |
114
|
5 |
|
$this->name = rtrim($buffer->readString(8)); |
|
|
|
|
115
|
5 |
|
if ($hasLabel) { |
116
|
4 |
|
$labelLength = $buffer->readInt(); |
117
|
4 |
|
$this->label = $buffer->readString($labelLength, 4); |
|
|
|
|
118
|
|
|
} |
119
|
5 |
|
if ($this->missingValuesFormat != 0) { |
120
|
|
|
for ($i = 0; $i < abs($this->missingValuesFormat); $i++) { |
121
|
|
|
$this->missingValues[] = $buffer->readDouble(); |
122
|
|
|
} |
123
|
|
|
} |
124
|
5 |
|
} |
125
|
|
|
|
126
|
|
|
/** |
127
|
|
|
* @param Buffer $buffer |
128
|
|
|
*/ |
129
|
5 |
|
public function write(Buffer $buffer) |
130
|
|
|
{ |
131
|
5 |
|
$seg0width = Utils::segmentAllocWidth($this->width, 0); |
132
|
5 |
|
$hasLabel = ! empty($this->label); |
133
|
|
|
|
134
|
5 |
|
$buffer->writeInt(self::TYPE); |
135
|
5 |
|
$buffer->writeInt($seg0width); |
136
|
5 |
|
$buffer->writeInt($hasLabel ? 1 : 0); |
137
|
5 |
|
$buffer->writeInt($this->missingValuesFormat); |
138
|
5 |
|
$buffer->writeInt(Utils::bytesToInt($this->print)); |
139
|
5 |
|
$buffer->writeInt(Utils::bytesToInt($this->write)); |
140
|
5 |
|
$buffer->writeString($this->name, 8); |
141
|
|
|
|
142
|
5 |
|
if ($hasLabel) { |
143
|
|
|
// Maxlength is 255 bytes, since we write utf8 a char can be multiple bytes |
144
|
4 |
|
$labelLength = min(mb_strlen($this->label), 255); |
145
|
4 |
|
$label = mb_substr($this->label, 0, $labelLength); |
146
|
4 |
|
$labelLengthBytes = mb_strlen($label, '8bit'); |
147
|
4 |
|
while ($labelLengthBytes > 255) { |
148
|
|
|
// Strip one char, can be multiple bytes |
149
|
1 |
|
$label = mb_substr($label, 0, -1); |
150
|
1 |
|
$labelLengthBytes = mb_strlen($label, '8bit'); |
151
|
|
|
} |
152
|
4 |
|
$buffer->writeInt($labelLengthBytes); |
153
|
4 |
|
$buffer->writeString($label, Utils::roundUp($labelLengthBytes, 4)); |
154
|
|
|
} |
155
|
|
|
|
156
|
|
|
// TODO: test |
157
|
5 |
|
if ($this->missingValuesFormat) { |
158
|
|
|
foreach ($this->missingValues as $val) { |
159
|
|
|
if ($this->width == 0) { |
160
|
|
|
$buffer->writeDouble($val); |
161
|
|
|
} else { |
162
|
|
|
$buffer->writeString($val, 8); |
163
|
|
|
} |
164
|
|
|
} |
165
|
|
|
} |
166
|
|
|
|
167
|
|
|
// I think we don't need an empty record |
168
|
|
|
//$this->writeBlank($buffer, $seg0width); |
169
|
|
|
|
170
|
|
|
// Write additional segments for very long string variables. |
171
|
5 |
|
if (self::isVeryLong($this->width)) { |
172
|
1 |
|
$this->writeBlank($buffer, $seg0width); |
173
|
1 |
|
$segmentCount = Utils::widthToSegments($this->width); |
174
|
1 |
|
for ($i = 1; $i < $segmentCount; $i++) { |
175
|
1 |
|
$segmentWidth = Utils::segmentAllocWidth($this->width, $i); |
176
|
1 |
|
$format = Utils::bytesToInt([0, 1, max($segmentWidth, 1), 0]); |
177
|
1 |
|
$buffer->writeInt(self::TYPE); |
178
|
1 |
|
$buffer->writeInt($segmentWidth); |
179
|
1 |
|
$buffer->writeInt($hasLabel); // No variable label |
180
|
1 |
|
$buffer->writeInt(0); // No missing values |
181
|
1 |
|
$buffer->writeInt($format); // Print format |
182
|
1 |
|
$buffer->writeInt($format); // Write format |
183
|
1 |
|
$buffer->writeString($this->getSegmentName($i - 1), 8); |
184
|
|
|
|
185
|
|
|
/** |
186
|
|
|
* If the long string variable is interpreted correctly SPSS does not show these segments. |
187
|
|
|
* If something goes wrong they will be visible, so we provide a label to explain the situation. |
188
|
|
|
*/ |
189
|
1 |
|
$segmentLabel = "Segment $i of variable {$this->name}, you should not see this"; |
190
|
1 |
|
$length = mb_strlen($segmentLabel, '8BIT'); |
191
|
1 |
|
$buffer->writeInt($length); |
192
|
1 |
|
$buffer->writeString($segmentLabel, Utils::roundUp($length, 4)); |
193
|
1 |
|
$this->writeBlank($buffer, $segmentWidth); |
194
|
|
|
} |
195
|
|
|
} |
196
|
5 |
|
} |
197
|
|
|
|
198
|
|
|
/** |
199
|
|
|
* @param Buffer $buffer |
200
|
|
|
* @param int $width |
201
|
|
|
*/ |
202
|
1 |
|
public function writeBlank(Buffer $buffer, $width) |
203
|
|
|
{ |
204
|
|
|
// assert(self::widthToSegments($width) == 1); |
205
|
|
|
|
206
|
1 |
|
for ($i = 8; $i < $width; $i += 8) { |
207
|
1 |
|
$buffer->writeInt(self::TYPE); |
208
|
1 |
|
$buffer->writeInt(-1); |
209
|
1 |
|
$buffer->writeInt(0); |
210
|
1 |
|
$buffer->writeInt(0); |
211
|
1 |
|
$buffer->writeInt(0x011d01); |
212
|
1 |
|
$buffer->writeInt(0x011d01); |
213
|
1 |
|
$buffer->write(' '); |
214
|
|
|
} |
215
|
1 |
|
} |
216
|
|
|
|
217
|
|
|
/** |
218
|
|
|
* Constructs the name for a segment. |
219
|
|
|
* A long string variable consists of multiple segment variables. |
220
|
|
|
* The names for these segment variables need not be unique across the whole SPSS file. |
221
|
|
|
* The names for these segment must have share a prefix of at least length 5 with the base variable name. |
222
|
|
|
* @param int $seg Index of the segment |
223
|
|
|
* @return string Name of the segment |
224
|
|
|
*/ |
225
|
1 |
|
public function getSegmentName($seg = 0) |
226
|
|
|
{ |
227
|
1 |
|
$name = $this->name; |
228
|
1 |
|
$name = mb_substr($name, 0, 5); |
229
|
1 |
|
$name .= $seg; |
230
|
|
|
|
231
|
1 |
|
return mb_strtoupper($name); |
232
|
|
|
} |
233
|
|
|
} |
234
|
|
|
|