1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* @package midcom.services |
4
|
|
|
* @author The Midgard Project, http://www.midgard-project.org |
5
|
|
|
* @copyright The Midgard Project, http://www.midgard-project.org |
6
|
|
|
* @license http://www.gnu.org/licenses/lgpl.html GNU Lesser General Public License |
7
|
|
|
*/ |
8
|
|
|
|
9
|
|
|
/** |
10
|
|
|
* This class encapsulates a single indexer document. It is used for both indexing |
11
|
|
|
* and retrieval. |
12
|
|
|
* |
13
|
|
|
* A document consists of a number of fields, each field has different properties |
14
|
|
|
* when handled by the indexer (exact behavior depends, as always, on the indexer |
15
|
|
|
* backend in use). On retrieval, this field information is lost, all fields being |
16
|
|
|
* of the same type (naturally). The core indexer backend supports these field |
17
|
|
|
* types: |
18
|
|
|
* |
19
|
|
|
* - <i>date</i> is a date-wrapped field suitable for use with the Date Filter. |
20
|
|
|
* - <i>keyword</i> is store and indexed, but not tokenized. |
21
|
|
|
* - <i>unindexed</i> is stored but neither indexed nor tokenized. |
22
|
|
|
* - <i>unstored</i> is not stored, but indexed and tokenized. |
23
|
|
|
* - <i>text</i> is stored, indexed and tokenized. |
24
|
|
|
* |
25
|
|
|
* This class should not be instantiated directly, a new instance of this class |
26
|
|
|
* can be obtained using the midcom_services_indexer class. |
27
|
|
|
* |
28
|
|
|
* A number of predefined fields are available using member fields. These fields |
29
|
|
|
* are all meta-fields. See their individual documentation for details. All fields |
30
|
|
|
* are mandatory unless mentioned otherwise explicitly and, as always, assumed to |
31
|
|
|
* be in the local charset. |
32
|
|
|
* |
33
|
|
|
* Remember, that both date and unstored fields are not available on retrieval. |
34
|
|
|
* For the core fields, all timestamps are stored twice therefore, once as searchable |
35
|
|
|
* field, and once as readable timestamp. |
36
|
|
|
* |
37
|
|
|
* The class will automatically pass all data to the i18n charset conversion functions, |
38
|
|
|
* thus you work using your site's charset like usual. UTF-8 conversion is done |
39
|
|
|
* implicitly. |
40
|
|
|
* |
41
|
|
|
* @package midcom.services |
42
|
|
|
* @see midcom_services_indexer |
43
|
|
|
*/ |
44
|
|
|
class midcom_services_indexer_document |
45
|
|
|
{ |
46
|
|
|
/** |
47
|
|
|
* An associative array containing all fields of the current document. |
48
|
|
|
* |
49
|
|
|
* Each field is indexed by its name (a string). The value is another |
50
|
|
|
* array containing the fields "name", type" and "content". |
51
|
|
|
* |
52
|
|
|
* @var array |
53
|
|
|
*/ |
54
|
|
|
private $_fields = []; |
55
|
|
|
|
56
|
|
|
/** |
57
|
|
|
* The i18n service, used for charset conversion. |
58
|
|
|
* |
59
|
|
|
* @var midcom_services_i18n |
60
|
|
|
*/ |
61
|
|
|
protected $_i18n; |
62
|
|
|
|
63
|
|
|
/** |
64
|
|
|
* This is the score of this document. Only populated on resultset documents, |
65
|
|
|
* of course. |
66
|
|
|
* |
67
|
|
|
* @var double |
68
|
|
|
*/ |
69
|
|
|
public $score = 0.0; |
70
|
|
|
|
71
|
|
|
/* ------ START OF DOCUMENT FIELDS --------- */ |
72
|
|
|
|
73
|
|
|
/** |
74
|
|
|
* The Resource Identifier of this document. |
75
|
|
|
* |
76
|
|
|
* Must be UTF-8 on assignment already. |
77
|
|
|
* |
78
|
|
|
* This field is mandatory. |
79
|
|
|
* |
80
|
|
|
* @var string |
81
|
|
|
*/ |
82
|
|
|
public $RI = ''; |
83
|
|
|
|
84
|
|
|
/** |
85
|
|
|
* Two letter language code of the document content |
86
|
|
|
* |
87
|
|
|
* This field is optional. |
88
|
|
|
* |
89
|
|
|
* @var string |
90
|
|
|
*/ |
91
|
|
|
public $lang = ''; |
92
|
|
|
|
93
|
|
|
/** |
94
|
|
|
* The GUID of the topic the document is assigned to. |
95
|
|
|
* |
96
|
|
|
* May be empty for non-midgard resources. |
97
|
|
|
* |
98
|
|
|
* This field is mandatory. |
99
|
|
|
* |
100
|
|
|
* @var string GUID |
101
|
|
|
*/ |
102
|
|
|
public $topic_guid = ''; |
103
|
|
|
|
104
|
|
|
/** |
105
|
|
|
* The name of the component responsible for the document. |
106
|
|
|
* |
107
|
|
|
* May be empty for non-midgard resources. |
108
|
|
|
* |
109
|
|
|
* This field is mandatory. |
110
|
|
|
* |
111
|
|
|
* @var string |
112
|
|
|
*/ |
113
|
|
|
public $component = ''; |
114
|
|
|
|
115
|
|
|
/** |
116
|
|
|
* The fully qualified URL to the document, this should be a PermaLink. |
117
|
|
|
* |
118
|
|
|
* This field is mandatory. |
119
|
|
|
* |
120
|
|
|
* @var string |
121
|
|
|
*/ |
122
|
|
|
public $document_url = ''; |
123
|
|
|
|
124
|
|
|
/** |
125
|
|
|
* The time of document creation, this is a UNIX timestamp. |
126
|
|
|
* |
127
|
|
|
* This field is mandatory. |
128
|
|
|
* |
129
|
|
|
* @var int |
130
|
|
|
*/ |
131
|
|
|
public $created = 0; |
132
|
|
|
|
133
|
|
|
/** |
134
|
|
|
* The time of the last document modification, this is a UNIX timestamp. |
135
|
|
|
* |
136
|
|
|
* This field is mandatory. |
137
|
|
|
* |
138
|
|
|
* @var int |
139
|
|
|
*/ |
140
|
|
|
public $edited = 0; |
141
|
|
|
|
142
|
|
|
/** |
143
|
|
|
* The timestamp of indexing. |
144
|
|
|
* |
145
|
|
|
* This field is added automatically and to be considered read-only. |
146
|
|
|
* |
147
|
|
|
* @var int |
148
|
|
|
*/ |
149
|
|
|
public $indexed = 0; |
150
|
|
|
|
151
|
|
|
/** |
152
|
|
|
* The MidgardPerson who created the object. |
153
|
|
|
* |
154
|
|
|
* This is optional. |
155
|
|
|
* |
156
|
|
|
* @var midcom_db_person |
157
|
|
|
*/ |
158
|
|
|
public $creator; |
159
|
|
|
|
160
|
|
|
/** |
161
|
|
|
* The MidgardPerson who modified the object the last time. |
162
|
|
|
* |
163
|
|
|
* This is optional. |
164
|
|
|
* |
165
|
|
|
* @var midcom_db_person |
166
|
|
|
*/ |
167
|
|
|
public $editor; |
168
|
|
|
|
169
|
|
|
/** |
170
|
|
|
* The title of the document |
171
|
|
|
* |
172
|
|
|
* This is mandatory. |
173
|
|
|
* |
174
|
|
|
* @var string |
175
|
|
|
*/ |
176
|
|
|
public $title = ''; |
177
|
|
|
|
178
|
|
|
/** |
179
|
|
|
* The content of the document |
180
|
|
|
* |
181
|
|
|
* This is mandatory. |
182
|
|
|
* |
183
|
|
|
* This field is empty on documents retrieved from the index. |
184
|
|
|
* |
185
|
|
|
* @var string |
186
|
|
|
*/ |
187
|
|
|
public $content = ''; |
188
|
|
|
|
189
|
|
|
/** |
190
|
|
|
* The abstract of the document |
191
|
|
|
* |
192
|
|
|
* This is optional. |
193
|
|
|
* |
194
|
|
|
* @var string |
195
|
|
|
*/ |
196
|
|
|
public $abstract = ''; |
197
|
|
|
|
198
|
|
|
/** |
199
|
|
|
* The author of the document |
200
|
|
|
* |
201
|
|
|
* This is optional. |
202
|
|
|
* |
203
|
|
|
* @var string |
204
|
|
|
*/ |
205
|
|
|
public $author = ''; |
206
|
|
|
|
207
|
|
|
/** |
208
|
|
|
* An additional tag indicating the source of the document for use by the |
209
|
|
|
* component doing the indexing. |
210
|
|
|
* |
211
|
|
|
* This value is not indexed and should not be used by anybody except the |
212
|
|
|
* component doing the indexing. |
213
|
|
|
* |
214
|
|
|
* This is optional. |
215
|
|
|
* |
216
|
|
|
* @var string |
217
|
|
|
*/ |
218
|
|
|
public $source = ''; |
219
|
|
|
|
220
|
|
|
/** |
221
|
|
|
* The full path to the topic that houses the document. |
222
|
|
|
* |
223
|
|
|
* For external resources, this should be either a MidCOM topic, to which this |
224
|
|
|
* resource is associated or some "directory" after which you could filter. |
225
|
|
|
* You may also leave it empty prohibiting it to appear on any topic-specific search. |
226
|
|
|
* |
227
|
|
|
* The value should be fully qualified, as returned by MIDCOM_NAV_FULLURL, including |
228
|
|
|
* a trailing slash, f.x. https://host/path/to/topic/ |
229
|
|
|
* |
230
|
|
|
* This is optional. |
231
|
|
|
* |
232
|
|
|
* @var string |
233
|
|
|
*/ |
234
|
|
|
public $topic_url = ''; |
235
|
|
|
|
236
|
|
|
/** |
237
|
|
|
* The type of the document, set by subclasses and added to the index |
238
|
|
|
* automatically. |
239
|
|
|
* |
240
|
|
|
* The type *must* reflect the original type hierarchy. It is to be set |
241
|
|
|
* using the $this->_set_type call <i>after</i> initializing the base class. |
242
|
|
|
* |
243
|
|
|
* @see is_a() |
244
|
|
|
* @see _set_type() |
245
|
|
|
* @var string |
246
|
|
|
*/ |
247
|
|
|
public $type = ''; |
248
|
|
|
|
249
|
|
|
/** |
250
|
|
|
* This is have support for #651 without rewriting all components' index methods |
251
|
|
|
* |
252
|
|
|
* If set to false the indexer backend will silently skip this document. |
253
|
|
|
* |
254
|
|
|
* @see http://trac.midgard-project.org/ticket/651 |
255
|
|
|
* @var boolean |
256
|
|
|
*/ |
257
|
|
|
public $actually_index = true; |
258
|
|
|
|
259
|
|
|
/* ------ END OF DOCUMENT FIELDS --------- */ |
260
|
|
|
|
261
|
|
|
/** |
262
|
|
|
* Initialize the object, nothing fancy here. |
263
|
|
|
*/ |
264
|
14 |
|
public function __construct() |
265
|
|
|
{ |
266
|
14 |
|
$this->_i18n = midcom::get()->i18n; |
267
|
14 |
|
} |
268
|
|
|
|
269
|
|
|
/** |
270
|
|
|
* Returns the contents of the field name or false on failure. |
271
|
|
|
* |
272
|
|
|
* @return mixed The content of the field or false on failure. |
273
|
|
|
*/ |
274
|
|
|
public function get_field(string $name) |
275
|
|
|
{ |
276
|
|
|
if (!array_key_exists($name, $this->_fields)) { |
277
|
|
|
debug_add("Field {$name} not found in the document.", MIDCOM_LOG_INFO); |
278
|
|
|
return false; |
279
|
|
|
} |
280
|
|
|
return $this->_i18n->convert_from_utf8($this->_fields[$name]['content']); |
281
|
|
|
} |
282
|
|
|
|
283
|
|
|
/** |
284
|
|
|
* Returns the complete internal field records, including type and UTF-8 encoded |
285
|
|
|
* content. |
286
|
|
|
* |
287
|
|
|
* This should normally not be used from the outside, it is geared towards the |
288
|
|
|
* indexer backends, which need the full field information on indexing. |
289
|
|
|
*/ |
290
|
|
|
public function get_fields() : array |
291
|
|
|
{ |
292
|
|
|
return $this->_fields; |
293
|
|
|
} |
294
|
|
|
|
295
|
|
|
/** |
296
|
|
|
* Remove a field from the list. Nonexistent fields are ignored silently. |
297
|
|
|
*/ |
298
|
|
|
public function remove_field(string $name) |
299
|
|
|
{ |
300
|
|
|
unset($this->_fields[$name]); |
301
|
|
|
} |
302
|
|
|
|
303
|
|
|
/** |
304
|
|
|
* Add a date field. A timestamp is expected, which is automatically |
305
|
|
|
* converted to a suitable ISO timestamp before storage. |
306
|
|
|
* |
307
|
|
|
* Direct specification of the ISO timestamp is not yet possible due |
308
|
|
|
* to lacking validation outside the timestamp range. |
309
|
|
|
* |
310
|
|
|
* If a field of the same name is already present, it is overwritten |
311
|
|
|
* silently. |
312
|
|
|
*/ |
313
|
|
|
public function add_date(string $name, int $timestamp) |
314
|
|
|
{ |
315
|
|
|
// This is always UTF-8 conformant. |
316
|
|
|
$this->_add_field($name, 'date', gmstrftime('%Y-%m-%dT%H:%M:%SZ', $timestamp), true); |
317
|
|
|
} |
318
|
|
|
|
319
|
|
|
/** |
320
|
|
|
* Create a normal date field and an unindexed _TS-postfixed timestamp field at the same time. |
321
|
|
|
* |
322
|
|
|
* This is useful because the date fields are not in a readable format, |
323
|
|
|
* it can't even be determined that they were a date in the first place. |
324
|
|
|
* so the _TS field is quite useful if you need the original value for the |
325
|
|
|
* timestamp. |
326
|
|
|
* |
327
|
|
|
* @param string $name The field's name, "_TS" is appended for the plain-timestamp field. |
328
|
|
|
*/ |
329
|
|
|
public function add_date_pair(string $name, int $timestamp) |
330
|
|
|
{ |
331
|
|
|
$this->add_date($name, $timestamp); |
332
|
|
|
$this->add_unindexed("{$name}_TS", $timestamp); |
333
|
|
|
} |
334
|
|
|
|
335
|
|
|
public function add_keyword(string $name, string $content) |
336
|
|
|
{ |
337
|
|
|
$this->_add_field($name, 'keyword', $content); |
338
|
|
|
} |
339
|
|
|
|
340
|
|
|
public function add_unindexed(string $name, string $content) |
341
|
|
|
{ |
342
|
|
|
$this->_add_field($name, 'unindexed', $content); |
343
|
|
|
} |
344
|
|
|
|
345
|
|
|
public function add_unstored(string $name, string $content) |
346
|
|
|
{ |
347
|
|
|
$this->_add_field($name, 'unstored', $this->html2text($content)); |
348
|
|
|
} |
349
|
|
|
|
350
|
|
|
public function add_text(string $name, string $content) |
351
|
|
|
{ |
352
|
|
|
$this->_add_field($name, 'text', $this->html2text($content)); |
353
|
|
|
} |
354
|
|
|
|
355
|
|
|
/** |
356
|
|
|
* Add a search result field, this should normally not be done |
357
|
|
|
* manually, the indexer will call this function when creating a |
358
|
|
|
* document out of a search result. |
359
|
|
|
* |
360
|
|
|
* @param string $content The field's content, which is <b>assumed to be UTF-8 already</b> |
361
|
|
|
*/ |
362
|
|
|
public function add_result(string $name, $content) |
363
|
|
|
{ |
364
|
|
|
$this->_add_field($name, 'result', $content, true); |
365
|
|
|
} |
366
|
|
|
|
367
|
|
|
/** |
368
|
|
|
* Add a person field. |
369
|
|
|
*/ |
370
|
|
|
private function add_person(string $name, ?midcom_db_person $person) |
371
|
|
|
{ |
372
|
|
|
$this->add_text($name, $person->guid ?? ''); |
373
|
|
|
} |
374
|
|
|
|
375
|
|
|
/** |
376
|
|
|
* This will translate all member variables into appropriate |
377
|
|
|
* field records, the backend should call this immediately before |
378
|
|
|
* indexing. |
379
|
|
|
* |
380
|
|
|
* This call will automatically populate indexed with time() |
381
|
|
|
* and author with the name of the creator (if set). |
382
|
|
|
*/ |
383
|
|
|
public function members_to_fields() |
384
|
|
|
{ |
385
|
|
|
// Complete fields |
386
|
|
|
$this->indexed = time(); |
387
|
|
|
if ( $this->author == '' |
388
|
|
|
&& isset($this->creator->name)) { |
389
|
|
|
$this->author = $this->creator->name; |
390
|
|
|
} |
391
|
|
|
|
392
|
|
|
// __RI does not need to be populated, this is done by backends. |
393
|
|
|
$this->add_unindexed('__LANG', $this->lang); |
394
|
|
|
$this->add_text('__TOPIC_GUID', $this->topic_guid); |
395
|
|
|
$this->add_text('__COMPONENT', $this->component); |
396
|
|
|
$this->add_unindexed('__DOCUMENT_URL', $this->document_url); |
397
|
|
|
$this->add_text('__TOPIC_URL', $this->topic_url); |
398
|
|
|
$this->add_date_pair('__CREATED', $this->created); |
399
|
|
|
$this->add_date_pair('__EDITED', $this->edited); |
400
|
|
|
$this->add_date_pair('__INDEXED', $this->indexed); |
401
|
|
|
$this->add_text('title', $this->title); |
402
|
|
|
$this->add_unstored('content', $this->content); |
403
|
|
|
|
404
|
|
|
$this->add_unindexed('__SOURCE', $this->source); |
405
|
|
|
$this->add_person('__CREATOR', $this->creator); |
406
|
|
|
$this->add_person('__EDITOR', $this->editor); |
407
|
|
|
|
408
|
|
|
$this->add_text('author', $this->author); |
409
|
|
|
$this->add_text('abstract', $this->abstract); |
410
|
|
|
$this->add_text('__TYPE', $this->type); |
411
|
|
|
} |
412
|
|
|
|
413
|
|
|
/** |
414
|
|
|
* Populate all relevant members with the respective values after |
415
|
|
|
* retrieving a document from the index |
416
|
|
|
*/ |
417
|
|
|
public function fields_to_members() |
418
|
|
|
{ |
419
|
|
|
$this->RI = $this->get_field('__RI'); |
|
|
|
|
420
|
|
|
$this->lang = $this->get_field('__LANG'); |
|
|
|
|
421
|
|
|
$this->topic_guid = $this->get_field('__TOPIC_GUID'); |
|
|
|
|
422
|
|
|
$this->component = $this->get_field('__COMPONENT'); |
|
|
|
|
423
|
|
|
$this->document_url = $this->get_field('__DOCUMENT_URL'); |
|
|
|
|
424
|
|
|
$this->topic_url = $this->get_field('__TOPIC_URL'); |
|
|
|
|
425
|
|
|
$this->created = $this->get_field('__CREATED_TS'); |
|
|
|
|
426
|
|
|
$this->edited = $this->get_field('__EDITED_TS'); |
|
|
|
|
427
|
|
|
$this->indexed = $this->get_field('__INDEXED_TS'); |
|
|
|
|
428
|
|
|
$this->title = $this->get_field('title'); |
|
|
|
|
429
|
|
|
|
430
|
|
|
$this->source = $this->get_field('__SOURCE'); |
|
|
|
|
431
|
|
|
if ($creator = $this->get_field('__CREATOR')) { |
432
|
|
|
$this->creator = $this->read_person($creator); |
433
|
|
|
} |
434
|
|
|
if ($editor = $this->get_field('__EDITOR')) { |
435
|
|
|
$this->editor = $this->read_person($editor); |
436
|
|
|
} |
437
|
|
|
$this->author = $this->get_field('author'); |
|
|
|
|
438
|
|
|
$this->abstract = $this->get_field('abstract'); |
|
|
|
|
439
|
|
|
$this->type = $this->get_field('__TYPE'); |
|
|
|
|
440
|
|
|
} |
441
|
|
|
|
442
|
|
|
/** |
443
|
|
|
* Internal helper which actually stores a field. |
444
|
|
|
*/ |
445
|
|
|
protected function _add_field(string $name, string $type, $content, bool $is_utf8 = false) |
446
|
|
|
{ |
447
|
|
|
$this->_fields[$name] = [ |
448
|
|
|
'name' => $name, |
449
|
|
|
'type' => $type, |
450
|
|
|
'content' => ($is_utf8 ? $content : $this->_i18n->convert_to_utf8($content)) |
451
|
|
|
]; |
452
|
|
|
} |
453
|
|
|
|
454
|
|
|
/** |
455
|
|
|
* Convert HTML to plain text (relatively simple): |
456
|
|
|
* |
457
|
|
|
* Basically, JavaScript blocks and |
458
|
|
|
* HTML Tags are stripped, and all HTML Entities |
459
|
|
|
* are converted to their native equivalents. |
460
|
|
|
* |
461
|
|
|
* Don't replace with an empty string but with a space, so that constructs like |
462
|
|
|
* <li>torben</li><li>nehmer</li> are recognized correctly. |
463
|
|
|
*/ |
464
|
14 |
|
public function html2text(string $text) : string |
465
|
|
|
{ |
466
|
|
|
$search = [ |
467
|
14 |
|
"'\s*<script[^>]*?>.*?</script>\s*'si", // Strip out javascript |
468
|
|
|
"'\s*<[\/\!]*?[^<>]*?>\s*'si", // Strip out html tags |
469
|
|
|
]; |
470
|
|
|
$replace = [ |
471
|
14 |
|
' ', |
472
|
|
|
' ', |
473
|
|
|
]; |
474
|
14 |
|
$result = $this->_i18n->html_entity_decode(preg_replace($search, $replace, $text)); |
475
|
14 |
|
return trim(preg_replace('/\s+/s', ' ', $result)); |
476
|
|
|
} |
477
|
|
|
|
478
|
|
|
/** |
479
|
|
|
* Checks whether the given document is an instance of given document type. |
480
|
|
|
* |
481
|
|
|
* This is equivalent to the is_a object hierarchy check, except that it |
482
|
|
|
* works with MidCOM documents. |
483
|
|
|
* |
484
|
|
|
* @see $type |
485
|
|
|
* @see _set_type() |
486
|
|
|
*/ |
487
|
|
|
public function is_a(string $document_type) : bool |
488
|
|
|
{ |
489
|
|
|
return str_starts_with($this->type, $document_type); |
490
|
|
|
} |
491
|
|
|
|
492
|
|
|
/** |
493
|
|
|
* Sets the type of the object, reflecting the inheritance hierarchy. |
494
|
|
|
* |
495
|
|
|
* @see $type |
496
|
|
|
* @see is_a() |
497
|
|
|
*/ |
498
|
6 |
|
protected function _set_type(string $type) |
499
|
|
|
{ |
500
|
6 |
|
if (empty($this->type)) { |
501
|
6 |
|
$this->type = $type; |
502
|
|
|
} else { |
503
|
|
|
$this->type .= "_{$type}"; |
504
|
|
|
} |
505
|
6 |
|
} |
506
|
|
|
|
507
|
|
|
/** |
508
|
|
|
* Tries to determine the topic GUID and component using NAPs reverse-lookup capabilities. |
509
|
|
|
* |
510
|
|
|
* If this fails, you have to set the members $topic_guid, $topic_url and |
511
|
|
|
* $component manually. |
512
|
|
|
*/ |
513
|
|
|
protected function process_topic() |
514
|
|
|
{ |
515
|
|
|
$nav = new midcom_helper_nav(); |
516
|
|
|
$object = $nav->resolve_guid($this->source, true); |
517
|
|
|
if (!$object) { |
518
|
|
|
debug_add("Failed to resolve the topic, skipping autodetection."); |
519
|
|
|
return; |
520
|
|
|
} |
521
|
|
|
if ($object[MIDCOM_NAV_TYPE] == 'leaf') { |
522
|
|
|
$object = $nav->get_node($object[MIDCOM_NAV_NODEID]); |
523
|
|
|
} |
524
|
|
|
$this->topic_guid = $object[MIDCOM_NAV_GUID]; |
525
|
|
|
$this->topic_url = $object[MIDCOM_NAV_FULLURL]; |
526
|
|
|
$this->component = $object[MIDCOM_NAV_COMPONENT]; |
527
|
|
|
} |
528
|
|
|
|
529
|
|
|
/** |
530
|
|
|
* Tries to resolve created, revised, author, editor and creator for the document from Midgard object |
531
|
|
|
*/ |
532
|
4 |
|
public function read_metadata_from_object(midcom_core_dbaobject $object) |
533
|
|
|
{ |
534
|
|
|
// if published is set to non-empty value, use it as creation data |
535
|
4 |
|
$this->created = $object->metadata->published ?: $object->metadata->created; |
536
|
|
|
// Revised |
537
|
4 |
|
$this->edited = $object->metadata->revised; |
538
|
|
|
// Heuristics to determine author |
539
|
4 |
|
if (!empty($object->metadata->authors)) { |
540
|
4 |
|
$this->author = $this->read_authorname($object->metadata->authors); |
541
|
|
|
} elseif (!empty($object->metadata->creator)) { |
542
|
|
|
$this->author = $this->read_authorname($object->metadata->creator); |
543
|
|
|
} |
544
|
|
|
// Creator |
545
|
4 |
|
if (isset($object->metadata->creator)) { |
546
|
4 |
|
$this->creator = $this->read_person($object->metadata->creator); |
547
|
|
|
} |
548
|
|
|
// Editor |
549
|
4 |
|
if (isset($object->metadata->revisor)) { |
550
|
4 |
|
$this->editor = $this->read_person($object->metadata->revisor); |
551
|
|
|
} |
552
|
4 |
|
} |
553
|
|
|
|
554
|
|
|
/** |
555
|
|
|
* Get person by given ID, caches results. |
556
|
|
|
*/ |
557
|
4 |
|
private function read_person(string $guid) : ?midcom_db_person |
558
|
|
|
{ |
559
|
|
|
try { |
560
|
4 |
|
return midcom_db_person::get_cached($guid); |
561
|
|
|
} catch (midcom_error $e) { |
562
|
|
|
return null; |
563
|
|
|
} |
564
|
|
|
} |
565
|
|
|
|
566
|
|
|
/** |
567
|
|
|
* Gets person name for given ID (in case it's imploded_wrapped of multiple GUIDs it will use the first) |
568
|
|
|
*/ |
569
|
4 |
|
private function read_authorname(string $input) : string |
570
|
|
|
{ |
571
|
|
|
// Check for imploded_wrapped datamanager storage. |
572
|
4 |
|
if (str_contains($input, '|')) { |
573
|
|
|
// Find first non-empty value in the array and use that |
574
|
4 |
|
$id_arr = array_values(array_filter(explode('|', $input))); |
575
|
4 |
|
$input = $id_arr[0] ?? null; |
576
|
|
|
} |
577
|
|
|
|
578
|
4 |
|
return midcom::get()->auth->get_user($input)->name ?? ''; |
579
|
|
|
} |
580
|
|
|
} |
581
|
|
|
|
Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.
For example, imagine you have a variable
$accountId
that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to theid
property of an instance of theAccount
class. This class holds a proper account, so the id value must no longer be false.Either this assignment is in error or a type check should be added for that assignment.