1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace SPSS\Sav\Record; |
4
|
|
|
|
5
|
|
|
use SPSS\Buffer; |
6
|
|
|
use SPSS\Sav\Record; |
7
|
|
|
use SPSS\Utils; |
8
|
|
|
|
9
|
|
|
class Variable extends Record |
10
|
|
|
{ |
11
|
|
|
const TYPE = 2; |
12
|
|
|
|
13
|
|
|
/** |
14
|
|
|
* Number of bytes really stored in each segment of a very long string variable. |
15
|
|
|
*/ |
16
|
|
|
const REAL_VLS_CHUNK = 255; |
17
|
|
|
|
18
|
|
|
/** |
19
|
|
|
* Number of bytes per segment by which the amount of space for very long string variables is allocated. |
20
|
|
|
*/ |
21
|
|
|
const EFFECTIVE_VLS_CHUNK = 252; |
22
|
|
|
|
23
|
|
|
/** |
24
|
|
|
* Set to 0 for a numeric variable. |
25
|
|
|
* For a short string variable or the first part of a long string variable, this is set to the width of the string. |
26
|
|
|
* For the second and subsequent parts of a long string variable, set to -1, and the remaining fields in the structure are ignored. |
27
|
|
|
* |
28
|
|
|
* @var int Variable width. |
29
|
|
|
*/ |
30
|
|
|
public $width; |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* If the variable has no missing values, set to 0. |
34
|
|
|
* If the variable has one, two, or three discrete missing values, set to 1, 2, or 3, respectively. |
35
|
|
|
* If the variable has a range for missing variables, set to -2; |
36
|
|
|
* if the variable has a range for missing variables plus a single discrete value, set to -3. |
37
|
|
|
* A long string variable always has the value 0 here. |
38
|
|
|
* A separate record indicates missing values for long string variables |
39
|
|
|
* |
40
|
|
|
* @var int |
41
|
|
|
* @see \SPSS\Sav\Record\Info\LongStringMissingValues |
42
|
|
|
*/ |
43
|
|
|
public $missingValuesFormat = 0; |
44
|
|
|
|
45
|
|
|
/** |
46
|
|
|
* Print format for this variable. |
47
|
|
|
* [decimals, width, format, 0] |
48
|
|
|
* |
49
|
|
|
* @var array |
50
|
|
|
*/ |
51
|
|
|
public $print = [0, 0, 0, 0]; |
52
|
|
|
|
53
|
|
|
/** |
54
|
|
|
* Write format for this variable. |
55
|
|
|
* [decimals, width, format, 0] |
56
|
|
|
* |
57
|
|
|
* @var array |
58
|
|
|
*/ |
59
|
|
|
public $write = [0, 0, 0, 0]; |
60
|
|
|
|
61
|
|
|
/** |
62
|
|
|
* The variable name must begin with a capital letter or the at-sign (‘@’). |
63
|
|
|
* Subsequent characters may also be digits, octothorpes (‘#’), dollar signs (‘$’), underscores (‘_’), or full stops (‘.’). |
64
|
|
|
* The variable name is padded on the right with spaces. |
65
|
|
|
* |
66
|
|
|
* @var string Variable name. |
67
|
|
|
*/ |
68
|
|
|
public $name; |
69
|
|
|
|
70
|
|
|
/** |
71
|
|
|
* It has length label_len, rounded up to the nearest multiple of 32 bits. |
72
|
|
|
* The first label_len characters are the variable’s variable label. |
73
|
|
|
* |
74
|
|
|
* @var string |
75
|
|
|
*/ |
76
|
|
|
public $label; |
77
|
|
|
|
78
|
|
|
/** |
79
|
|
|
* It has the same number of 8-byte elements as the absolute value of $missingValuesFormat. |
80
|
|
|
* Each element is interpreted as a number for numeric variables (with HIGHEST and LOWEST indicated as described in the chapter introduction). |
81
|
|
|
* For string variables of width less than 8 bytes, elements are right-padded with spaces; |
82
|
|
|
* for string variables wider than 8 bytes, |
83
|
|
|
* only the first 8 bytes of each missing value are specified, with the remainder implicitly all spaces. |
84
|
|
|
* For discrete missing values, each element represents one missing value. |
85
|
|
|
* When a range is present, the first element denotes the minimum value in the range, |
86
|
|
|
* and the second element denotes the maximum value in the range. |
87
|
|
|
* When a range plus a value are present, the third element denotes the additional discrete missing value. |
88
|
|
|
* |
89
|
|
|
* @var array |
90
|
|
|
*/ |
91
|
|
|
public $missingValues = []; |
92
|
|
|
|
93
|
|
|
/** |
94
|
|
|
* Returns true if WIDTH is a very long string width, false otherwise. |
95
|
|
|
* |
96
|
|
|
*/ |
97
|
|
|
public static function isVeryLong(int $width): bool |
98
|
|
|
{ |
99
|
|
|
return $width > self::REAL_VLS_CHUNK; |
100
|
|
|
} |
101
|
|
|
|
102
|
|
|
/** |
103
|
|
|
* @param Buffer $buffer |
104
|
|
|
*/ |
105
|
|
|
public function read(Buffer $buffer) |
106
|
|
|
{ |
107
|
|
|
$this->width = $buffer->readInt(); |
108
|
|
|
$hasLabel = $buffer->readInt(); |
109
|
|
|
$this->missingValuesFormat = $buffer->readInt(); |
110
|
|
|
$this->print = Utils::intToBytes($buffer->readInt()); |
111
|
|
|
$this->write = Utils::intToBytes($buffer->readInt()); |
112
|
|
|
$this->name = rtrim($buffer->readString(8)); |
|
|
|
|
113
|
|
|
if ($hasLabel) { |
114
|
|
|
$labelLength = $buffer->readInt(); |
115
|
|
|
$this->label = $buffer->readString($labelLength, 4); |
|
|
|
|
116
|
|
|
} |
117
|
|
|
if ($this->missingValuesFormat != 0) { |
118
|
|
|
for ($i = 0; $i < abs($this->missingValuesFormat); $i++) { |
119
|
|
|
$this->missingValues[] = $buffer->readDouble(); |
120
|
|
|
} |
121
|
|
|
} |
122
|
|
|
} |
123
|
|
|
|
124
|
|
|
/** |
125
|
|
|
* @param Buffer $buffer |
126
|
|
|
*/ |
127
|
|
|
public function write(Buffer $buffer) |
128
|
|
|
{ |
129
|
|
|
$hasLabel = ! empty($this->label); |
130
|
|
|
|
131
|
|
|
$buffer->writeInt(self::TYPE); |
132
|
|
|
$buffer->writeInt(min(255, $this->width)); |
133
|
|
|
$buffer->writeInt($hasLabel ? 1 : 0); |
134
|
|
|
$buffer->writeInt($this->missingValuesFormat); |
135
|
|
|
$buffer->writeInt(Utils::bytesToInt($this->print)); |
136
|
|
|
$buffer->writeInt(Utils::bytesToInt($this->write)); |
137
|
|
|
$buffer->writeString($this->name, 8); |
138
|
|
|
|
139
|
|
|
if ($hasLabel) { |
140
|
|
|
// Maxlength is 255 bytes, since we write utf8 a char can be multiple bytes |
141
|
|
|
$labelLength = min(mb_strlen($this->label), 255); |
142
|
|
|
$label = mb_substr($this->label, 0, $labelLength); |
143
|
|
|
$labelLengthBytes = mb_strlen($label, '8bit'); |
144
|
|
|
while ($labelLengthBytes > 255) { |
145
|
|
|
// Strip one char, can be multiple bytes |
146
|
|
|
$label = mb_substr($label, 0, -1); |
147
|
|
|
$labelLengthBytes = mb_strlen($label, '8bit'); |
148
|
|
|
} |
149
|
|
|
$buffer->writeInt($labelLengthBytes); |
150
|
|
|
$buffer->writeString($label, Utils::roundUp($labelLengthBytes, 4)); |
151
|
|
|
} |
152
|
|
|
|
153
|
|
|
// TODO: test |
154
|
|
|
if ($this->missingValuesFormat) { |
155
|
|
|
foreach ($this->missingValues as $val) { |
156
|
|
|
if ($this->width == 0) { |
157
|
|
|
$buffer->writeDouble($val); |
158
|
|
|
} else { |
159
|
|
|
$buffer->writeString($val, 8); |
160
|
|
|
} |
161
|
|
|
} |
162
|
|
|
} |
163
|
|
|
|
164
|
|
|
// Write additional segments for very long string variables. |
165
|
|
|
if (self::isVeryLong($this->width)) { |
166
|
|
|
$format = Utils::bytesToInt([0, 1, 1, 0]); |
167
|
|
|
|
168
|
|
|
foreach(Utils::getSegments($this->width) as $i => $segmentWidth) { |
169
|
|
|
if ($i == 0) { |
170
|
|
|
continue; |
171
|
|
|
} |
172
|
|
|
$buffer->writeInt(self::TYPE); |
173
|
|
|
$buffer->writeInt($segmentWidth); |
174
|
|
|
$buffer->writeInt(1); // No variable label |
175
|
|
|
$buffer->writeInt(0); // No missing values |
176
|
|
|
$buffer->writeInt($format); // Print format |
177
|
|
|
$buffer->writeInt($format); // Write format |
178
|
|
|
$buffer->writeString($this->getSegmentName($i - 1), 8); |
179
|
|
|
|
180
|
|
|
/** |
181
|
|
|
* If the long string variable is interpreted correctly SPSS does not show these segments. |
182
|
|
|
* If something goes wrong they will be visible, so we provide a label to explain the situation. |
183
|
|
|
*/ |
184
|
|
|
$segmentLabel = "Segment $i of variable {$this->name}, you should not see this"; |
185
|
|
|
$length = mb_strlen($segmentLabel, '8BIT'); |
186
|
|
|
$buffer->writeInt($length); |
187
|
|
|
$buffer->writeString($segmentLabel, Utils::roundUp($length, 4)); |
188
|
|
|
} |
189
|
|
|
} |
190
|
|
|
} |
191
|
|
|
|
192
|
|
|
/** |
193
|
|
|
* Constructs the name for a segment. |
194
|
|
|
* A long string variable consists of multiple segment variables. |
195
|
|
|
* The names for these segment variables need not be unique across the whole SPSS file. |
196
|
|
|
* The names for these segment must have share a prefix of at least length 5 with the base variable name. |
197
|
|
|
* @param int $seg Index of the segment |
198
|
|
|
* @return string Name of the segment |
199
|
|
|
*/ |
200
|
|
|
private function getSegmentName($seg = 0) |
201
|
|
|
{ |
202
|
|
|
$name = $this->name; |
203
|
|
|
$name = mb_substr($name, 0, 5); |
204
|
|
|
$name .= $seg; |
205
|
|
|
|
206
|
|
|
return mb_strtoupper($name); |
207
|
|
|
} |
208
|
|
|
|
209
|
|
|
|
210
|
|
|
} |
211
|
|
|
|