Passed
Push — master ( 2cae91...cd729b )
by Maxence
17:27
created

IndexDocument::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 2
nc 1
nop 2
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
<?php
2
declare(strict_types=1);
3
4
5
/**
6
 * FullTextSearch - Full text search framework for Nextcloud
7
 *
8
 * This file is licensed under the Affero General Public License version 3 or
9
 * later. See the COPYING file.
10
 *
11
 * @author Maxence Lange <[email protected]>
12
 * @copyright 2018
13
 * @license GNU AGPL version 3 or any later version
14
 *
15
 * This program is free software: you can redistribute it and/or modify
16
 * it under the terms of the GNU Affero General Public License as
17
 * published by the Free Software Foundation, either version 3 of the
18
 * License, or (at your option) any later version.
19
 *
20
 * This program is distributed in the hope that it will be useful,
21
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 * GNU Affero General Public License for more details.
24
 *
25
 * You should have received a copy of the GNU Affero General Public License
26
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
27
 *
28
 */
29
30
31
namespace OCP\FullTextSearch\Model;
32
33
34
use JsonSerializable;
35
36
/**
37
 * Class IndexDocument
38
 *
39
 * This is one of the main class of the FullTextSearch, used as a data transfer
40
 * object. An IndexDocument is created to manage documents around FullTextSearch,
41
 * during an index and during a search.
42
 * The uniqueness of an IndexDocument is made by the Id of the Content Provider
43
 * and the Id of the original document within the Content Provider.
44
 *
45
 * We will call original document the source from which the IndexDocument is
46
 * generated. As an example, an original document can be a file, a mail, ...
47
 *
48
 * @since 15.0.0
49
 *
50
 * @package OC\FullTextSearch\Model
51
 */
52
class IndexDocument implements JsonSerializable {
53
54
55
	const NOT_ENCODED = 0;
56
	const ENCODED_BASE64 = 1;
57
58
59
	/** @var string */
60
	protected $id = '';
61
62
	/** @var string */
63
	protected $providerId = '';
64
65
	/** @var DocumentAccess */
66
	protected $access;
67
68
	/** @var IIndex */
69
	protected $index;
70
71
	/** @var int */
72
	protected $modifiedTime = 0;
73
74
	/** @var string */
75
	protected $source = '';
76
77
	/** @var array */
78
	protected $tags = [];
79
80
	/** @var array */
81
	protected $metaTags = [];
82
83
	/** @var array */
84
	protected $subTags = [];
85
86
	/** @var string */
87
	protected $title = '';
88
89
	/** @var string */
90
	protected $content = '';
91
92
	/** @var string */
93
	protected $hash = '';
94
95
	/** @var array */
96
	protected $parts = [];
97
98
	/** @var string */
99
	protected $link = '';
100
101
	/** @var array */
102
	protected $more = [];
103
104
	/** @var array */
105
	protected $excerpts = [];
106
107
	/** @var string */
108
	protected $score = '';
109
110
	/** @var array */
111
	protected $info = [];
112
113
	/** @var int */
114
	protected $contentEncoded = 0;
115
116
117
	/**
118
	 * IndexDocument constructor.
119
	 *
120
	 * On creation, we assure the uniqueness of the object using the providerId
121
	 * and the Id of the original document.
122
	 *
123
	 * @since 15.0.0
124
	 *
125
	 * @param string $providerId
126
	 * @param string $documentId
127
	 */
128
	public function __construct(string $providerId, string $documentId) {
129
		$this->providerId = $providerId;
130
		$this->id = $documentId;
131
	}
132
133
134
	/**
135
	 * Returns the Id of the original document.
136
	 *
137
	 * @since 15.0.0
138
	 *
139
	 * @return string
140
	 */
141
	final public function getId(): string {
142
		return $this->id;
143
	}
144
145
146
	/**
147
	 * Returns the Id of the provider.
148
	 *
149
	 * @since 15.0.0
150
	 *
151
	 * @return string
152
	 */
153
	final public function getProviderId(): string {
154
		return $this->providerId;
155
	}
156
157
158
	/**
159
	 * Set the Index related to the IndexDocument.
160
	 *
161
	 * @see IIndex
162
	 *
163
	 * @since 15.0.0
164
	 *
165
	 * @param IIndex $index
166
	 *
167
	 * @return IndexDocument
168
	 */
169
	final public function setIndex(IIndex $index): IndexDocument {
170
		$this->index = $index;
171
172
		return $this;
173
	}
174
175
	/**
176
	 * Get the Index.
177
	 *
178
	 * @since 15.0.0
179
	 *
180
	 * @return IIndex
181
	 */
182
	final public function getIndex(): IIndex {
183
		return $this->index;
184
	}
185
186
187
	/**
188
	 * Set the modified time of the original document.
189
	 *
190
	 * @since 15.0.0
191
	 *
192
	 * @param int $modifiedTime
193
	 *
194
	 * @return IndexDocument
195
	 */
196
	final public function setModifiedTime(int $modifiedTime): IndexDocument {
197
		$this->modifiedTime = $modifiedTime;
198
199
		return $this;
200
	}
201
202
	/**
203
	 * Get the modified time of the original document.
204
	 *
205
	 * @since 15.0.0
206
	 *
207
	 * @return int
208
	 */
209
	final public function getModifiedTime(): int {
210
		return $this->modifiedTime;
211
	}
212
213
	/**
214
	 * Check if the original document of the IndexDocument is older than $time.
215
	 *
216
	 * @since 15.0.0
217
	 *
218
	 * @param int $time
219
	 *
220
	 * @return bool
221
	 */
222
	final public function isOlderThan(int $time): bool {
223
		return ($this->modifiedTime < $time);
224
	}
225
226
227
	/**
228
	 * Set the read rights of the original document using a DocumentAccess.
229
	 *
230
	 * @see DocumentAccess
231
	 *
232
	 * @since 15.0.0
233
	 *
234
	 * @param DocumentAccess $access
235
	 *
236
	 * @return $this
237
	 */
238
	final public function setAccess(DocumentAccess $access) {
239
		$this->access = $access;
240
241
		return $this;
242
	}
243
244
	/**
245
	 * Get the DocumentAccess related to the original document.
246
	 *
247
	 * @since 15.0.0
248
	 *
249
	 * @return DocumentAccess
250
	 */
251
	final public function getAccess(): DocumentAccess {
252
		return $this->access;
253
	}
254
255
256
	/**
257
	 * Add a tag to the list.
258
	 *
259
	 * @since 15.0.0
260
	 *
261
	 * @param string $tag
262
	 *
263
	 * @return IndexDocument
264
	 */
265
	final public function addTag(string $tag): IndexDocument {
266
		$this->tags[] = $tag;
267
268
		return $this;
269
	}
270
271
	/**
272
	 * Set the list of tags assigned to the original document.
273
	 *
274
	 * @since 15.0.0
275
	 *
276
	 * @param array $tags
277
	 *
278
	 * @return IndexDocument
279
	 */
280
	final public function setTags(array $tags): IndexDocument {
281
		$this->tags = $tags;
282
283
		return $this;
284
	}
285
286
	/**
287
	 * Get the list of tags assigned to the original document.
288
	 *
289
	 * @since 15.0.0
290
	 *
291
	 * @return array
292
	 */
293
	final public function getTags(): array {
294
		return $this->tags;
295
	}
296
297
298
	/**
299
	 * Add a meta tag to the list.
300
	 *
301
	 * @since 15.0.0
302
	 *
303
	 * @param string $tag
304
	 *
305
	 * @return IndexDocument
306
	 */
307
	final public function addMetaTag(string $tag): IndexDocument {
308
		$this->metaTags[] = $tag;
309
310
		return $this;
311
	}
312
313
	/**
314
	 * Set the list of meta tags assigned to the original document.
315
	 *
316
	 * @since 15.0.0
317
	 *
318
	 * @param array $tags
319
	 *
320
	 * @return IndexDocument
321
	 */
322
	final public function setMetaTags(array $tags): IndexDocument {
323
		$this->metaTags = $tags;
324
325
		return $this;
326
	}
327
328
	/**
329
	 * Get the list of meta tags assigned to the original document.
330
	 *
331
	 * @since 15.0.0
332
	 *
333
	 * @return array
334
	 */
335
	final public function getMetaTags(): array {
336
		return $this->metaTags;
337
	}
338
339
340
	/**
341
	 * Add a sub tag to the list.
342
	 *
343
	 * @since 15.0.0
344
	 *
345
	 * @param string $sub
346
	 * @param string $tag
347
	 *
348
	 * @return IndexDocument
349
	 */
350
	final public function addSubTag(string $sub, string $tag): IndexDocument {
351
		$this->subTags[$sub] = $tag;
352
353
		return $this;
354
	}
355
356
	/**
357
	 * Set the list of sub tags assigned to the original document.
358
	 *
359
	 * @since 15.0.0
360
	 *
361
	 * @param array $tags
362
	 *
363
	 * @return IndexDocument
364
	 */
365
	final public function setSubTags(array $tags): IndexDocument {
366
		$this->subTags = $tags;
367
368
		return $this;
369
	}
370
371
	/**
372
	 * Get the list of sub tags assigned to the original document.
373
	 * If $formatted is true, the result will be formatted in a one
374
	 * dimensional array.
375
	 *
376
	 * @since 15.0.0
377
	 *
378
	 * @param bool $formatted
379
	 *
380
	 * @return array
381
	 */
382
	final public function getSubTags(bool $formatted = false): array {
383
		if ($formatted === false) {
384
			return $this->subTags;
385
		}
386
387
		$subTags = [];
388
		$ak = array_keys($this->subTags);
389
		foreach ($ak as $source) {
390
			$tags = $this->subTags[$source];
391
			foreach ($tags as $tag) {
392
				$subTags[] = $source . '_' . $tag;
393
			}
394
		}
395
396
		return $subTags;
397
	}
398
399
400
	/**
401
	 * Set the source of the original document.
402
	 *
403
	 * @since 15.0.0
404
	 *
405
	 * @param string $source
406
	 *
407
	 * @return IndexDocument
408
	 */
409
	final public function setSource(string $source): IndexDocument {
410
		$this->source = $source;
411
412
		return $this;
413
	}
414
415
	/**
416
	 * Get the source of the original document.
417
	 *
418
	 * @since 15.0.0
419
	 *
420
	 * @return string
421
	 */
422
	final public function getSource(): string {
423
		return $this->source;
424
	}
425
426
427
	/**
428
	 * Set the title of the original document.
429
	 *
430
	 * @since 15.0.0
431
	 *
432
	 * @param string $title
433
	 *
434
	 * @return IndexDocument
435
	 */
436
	final public function setTitle(string $title): IndexDocument {
437
		$this->title = $title;
438
439
		return $this;
440
	}
441
442
	/**
443
	 * Get the title of the original document.
444
	 *
445
	 * @since 15.0.0
446
	 *
447
	 * @return string
448
	 */
449
	final public function getTitle(): string {
450
		return $this->title;
451
	}
452
453
454
	/**
455
	 * Set the content of the document.
456
	 * $encoded can be NOT_ENCODED or ENCODED_BASE64 if the content is raw or
457
	 * encoded in base64.
458
	 *
459
	 * @since 15.0.0
460
	 *
461
	 * @param string $content
462
	 * @param int $encoded
463
	 *
464
	 * @return IndexDocument
465
	 */
466
	final public function setContent(string $content, int $encoded = 0): IndexDocument {
467
		$this->content = $content;
468
		$this->contentEncoded = $encoded;
469
470
		return $this;
471
	}
472
473
	/**
474
	 * Get the content of the original document.
475
	 *
476
	 * @since 15.0.0
477
	 *
478
	 * @return string
479
	 */
480
	final public function getContent(): string {
481
		return $this->content;
482
	}
483
484
	/**
485
	 * Returns the type of the encoding on the content.
486
	 *
487
	 * @since 15.0.0
488
	 *
489
	 * @return int
490
	 */
491
	final public function isContentEncoded(): int {
492
		return $this->contentEncoded;
493
	}
494
495
	/**
496
	 * Return the size of the content.
497
	 *
498
	 * @since 15.0.0
499
	 *
500
	 * @return int
501
	 */
502
	final public function getContentSize(): int {
503
		return strlen($this->getContent());
504
	}
505
506
507
	/**
508
	 * Generate an hash, based on the content of the original document.
509
	 *
510
	 * @since 15.0.0
511
	 *
512
	 * @return IndexDocument
513
	 */
514
	final public function initHash(): IndexDocument {
515
		if ($this->getContent() === '' || is_null($this->getContent())) {
516
			return $this;
517
		}
518
519
		$this->hash = hash("md5", $this->getContent());
520
521
		return $this;
522
	}
523
524
	/**
525
	 * Set the hash of the original document.
526
	 *
527
	 * @since 15.0.0
528
	 *
529
	 * @param string $hash
530
	 *
531
	 * @return IndexDocument
532
	 */
533
	final public function setHash(string $hash): IndexDocument {
534
		$this->hash = $hash;
535
536
		return $this;
537
	}
538
539
	/**
540
	 * Get the hash of the original document.
541
	 *
542
	 * @since 15.0.0
543
	 *
544
	 * @return string
545
	 */
546
	final public function getHash(): string {
547
		return $this->hash;
548
	}
549
550
551
	/**
552
	 * Add a part, identified by a string, and its content.
553
	 *
554
	 * It is strongly advised to use alphanumerical chars with no space in the
555
	 * $part string.
556
	 *
557
	 * @since 15.0.0
558
	 *
559
	 * @param string $part
560
	 * @param string $content
561
	 *
562
	 * @return IndexDocument
563
	 */
564
	final public function addPart(string $part, string $content): IndexDocument {
565
		$this->parts[$part] = $content;
566
567
		return $this;
568
	}
569
570
	/**
571
	 * Set all parts and their content.
572
	 *
573
	 * @since 15.0.0
574
	 *
575
	 * @param array $parts
576
	 *
577
	 * @return IndexDocument
578
	 */
579
	final public function setParts(array $parts): IndexDocument {
580
		$this->parts = $parts;
581
582
		return $this;
583
	}
584
585
	/**
586
	 * Get all parts of the IndexDocument.
587
	 *
588
	 * @since 15.0.0
589
	 *
590
	 * @return array
591
	 */
592
	final public function getParts(): array {
593
		return $this->parts;
594
	}
595
596
597
	/**
598
	 * Add a link, usable by the frontend.
599
	 *
600
	 * @since 15.0.0
601
	 *
602
	 * @param string $link
603
	 *
604
	 * @return IndexDocument
605
	 */
606
	final public function setLink(string $link): IndexDocument {
607
		$this->link = $link;
608
609
		return $this;
610
	}
611
612
	/**
613
	 * Get the link.
614
	 *
615
	 * @since 15.0.0
616
	 *
617
	 * @return string
618
	 */
619
	final public function getLink(): string {
620
		return $this->link;
621
	}
622
623
624
	/**
625
	 * Set more information that couldn't be set using other method.
626
	 *
627
	 * @since 15.0.0
628
	 *
629
	 * @param array $more
630
	 *
631
	 * @return IndexDocument
632
	 */
633
	final public function setMore(array $more): IndexDocument {
634
		$this->more = $more;
635
636
		return $this;
637
	}
638
639
	/**
640
	 * Get more information.
641
	 *
642
	 * @since 15.0.0
643
	 *
644
	 * @return array
645
	 */
646
	final public function getMore(): array {
647
		return $this->more;
648
	}
649
650
651
	/**
652
	 * Add some excerpt of the content of the original document, usually based
653
	 * on the search request.
654
	 *
655
	 * @since 15.0.0
656
	 *
657
	 * @param string $excerpt
658
	 *
659
	 * @return IndexDocument
660
	 */
661
	final public function addExcerpt(string $excerpt): IndexDocument {
662
		$excerpt = $this->cleanExcerpt($excerpt);
663
664
		$this->excerpts[] = $excerpt;
665
666
		return $this;
667
	}
668
669
	/**
670
	 * Set all excerpts of the content of the original document.
671
	 *
672
	 * @since 15.0.0
673
	 *
674
	 * @param array $excerpts
675
	 *
676
	 * @return IndexDocument
677
	 */
678
	final public function setExcerpts(array $excerpts): IndexDocument {
679
		$excerpts = array_map([$this, 'cleanExcerpt'], $excerpts);
680
681
		$this->excerpts = $excerpts;
682
683
		return $this;
684
	}
685
686
	/**
687
	 * Get all excerpts of the content of the original document.
688
	 *
689
	 * @since 15.0.0
690
	 *
691
	 * @return array
692
	 */
693
	final public function getExcerpts(): array {
694
		return $this->excerpts;
695
	}
696
697
	/**
698
	 * Clean excerpt.
699
	 *
700
	 * @since 15.0.0
701
	 *
702
	 * @param string $excerpt
703
	 *
704
	 * @return string
705
	 */
706
	final public function cleanExcerpt(string $excerpt): string {
707
		$excerpt = str_replace("\\n", ' ', $excerpt);
708
		$excerpt = str_replace("\\r", ' ', $excerpt);
709
		$excerpt = str_replace("\\t", ' ', $excerpt);
710
		$excerpt = str_replace("\n", ' ', $excerpt);
711
		$excerpt = str_replace("\r", ' ', $excerpt);
712
		$excerpt = str_replace("\t", ' ', $excerpt);
713
714
		return $excerpt;
715
	}
716
717
	/**
718
	 * Set the score to the result assigned to this document during a search
719
	 * request.
720
	 *
721
	 * @since 15.0.0
722
	 *
723
	 * @param string $score
724
	 *
725
	 * @return IndexDocument
726
	 */
727
	final public function setScore(string $score): IndexDocument {
728
		$this->score = $score;
729
730
		return $this;
731
	}
732
733
	/**
734
	 * Get the score.
735
	 *
736
	 * @since 15.0.0
737
	 *
738
	 * @return string
739
	 */
740
	final public function getScore(): string {
741
		return $this->score;
742
	}
743
744
745
	/**
746
	 * Set some information about the original document that will be available
747
	 * to the front-end when displaying search result. (as string)
748
	 * Because this information will not be indexed, this method can also be
749
	 * used to manage some data while filling the IndexDocument before its
750
	 * indexing.
751
	 *
752
	 * @since 15.0.0
753
	 *
754
	 * @param string $info
755
	 * @param string $value
756
	 *
757
	 * @return IndexDocument
758
	 */
759
	final public function setInfo(string $info, string $value): IndexDocument {
760
		$this->info[$info] = $value;
761
762
		return $this;
763
	}
764
765
	/**
766
	 * Get an information about a document. (string)
767
	 *
768
	 * @since 15.0.0
769
	 *
770
	 * @param string $info
771
	 * @param string $default
772
	 *
773
	 * @return string
774
	 */
775
	final public function getInfo(string $info, string $default = ''): string {
776
		if (!key_exists($info, $this->info)) {
777
			return $default;
778
		}
779
780
		return $this->info[$info];
781
	}
782
783
	/**
784
	 * Set some information about the original document that will be available
785
	 * to the front-end when displaying search result. (as array)
786
	 * Because this information will not be indexed, this method can also be
787
	 * used to manage some data while filling the IndexDocument before its
788
	 * indexing.
789
	 *
790
	 * @since 15.0.0
791
	 *
792
	 * @param string $info
793
	 * @param array $value
794
	 *
795
	 * @return IndexDocument
796
	 */
797
	final public function setInfoArray(string $info, array $value): IndexDocument {
798
		$this->info[$info] = $value;
799
800
		return $this;
801
	}
802
803
	/**
804
	 * Get an information about a document. (array)
805
	 *
806
	 * @since 15.0.0
807
	 *
808
	 * @param string $info
809
	 * @param array $default
810
	 *
811
	 * @return array
812
	 */
813
	final public function getInfoArray(string $info, array $default = []): array {
814
		if (!key_exists($info, $this->info)) {
815
			return $default;
816
		}
817
818
		return $this->info[$info];
819
	}
820
821
	/**
822
	 * Set some information about the original document that will be available
823
	 * to the front-end when displaying search result. (as int)
824
	 * Because this information will not be indexed, this method can also be
825
	 * used to manage some data while filling the IndexDocument before its
826
	 * indexing.
827
	 *
828
	 * @since 15.0.0
829
	 *
830
	 * @param string $info
831
	 * @param int $value
832
	 *
833
	 * @return IndexDocument
834
	 */
835
	final public function setInfoInt(string $info, int $value): IndexDocument {
836
		$this->info[$info] = $value;
837
838
		return $this;
839
	}
840
841
	/**
842
	 * Get an information about a document. (int)
843
	 *
844
	 * @since 15.0.0
845
	 *
846
	 * @param string $info
847
	 * @param int $default
848
	 *
849
	 * @return int
850
	 */
851
	final public function getInfoInt(string $info, int $default = 0): int {
852
		if (!key_exists($info, $this->info)) {
853
			return $default;
854
		}
855
856
		return $this->info[$info];
857
	}
858
859
	/**
860
	 * Set some information about the original document that will be available
861
	 * to the front-end when displaying search result. (as bool)
862
	 * Because this information will not be indexed, this method can also be
863
	 * used to manage some data while filling the IndexDocument before its
864
	 * indexing.
865
	 *
866
	 * @since 15.0.0
867
	 *
868
	 * @param string $info
869
	 * @param bool $value
870
	 *
871
	 * @return IndexDocument
872
	 */
873
	final public function setInfoBool(string $info, bool $value): IndexDocument {
874
		$this->info[$info] = $value;
875
876
		return $this;
877
	}
878
879
	/**
880
	 * Get an information about a document. (bool)
881
	 *
882
	 * @since 15.0.0
883
	 *
884
	 * @param string $info
885
	 * @param bool $default
886
	 *
887
	 * @return bool
888
	 */
889
	final public function getInfoBool(string $info, bool $default = false): bool {
890
		if (!key_exists($info, $this->info)) {
891
			return $default;
892
		}
893
894
		return $this->info[$info];
895
	}
896
897
	/**
898
	 * Get all info.
899
	 *
900
	 * @since 15.0.0
901
	 *
902
	 * @return array
903
	 */
904
	final public function getInfoAll(): array {
905
906
		$info = [];
907
		foreach ($this->info as $k => $v) {
908
			if (substr($k, 0, 1) === '_') {
909
				continue;
910
			}
911
912
			$info[$k] = $v;
913
		}
914
915
		return $info;
916
	}
917
918
919
	/**
920
	 * @since 15.0.0
921
	 *
922
	 * On some version of PHP, it is better to force destruct the object.
923
	 * And during the index, the number of generated IndexDocument can be
924
	 * _huge_.
925
	 */
926
	public function __destruct() {
927
		unset($this->id);
928
		unset($this->providerId);
929
		unset($this->access);
930
		unset($this->modifiedTime);
931
		unset($this->title);
932
		unset($this->content);
933
		unset($this->hash);
934
		unset($this->link);
935
		unset($this->source);
936
		unset($this->tags);
937
		unset($this->metaTags);
938
		unset($this->subTags);
939
		unset($this->more);
940
		unset($this->excerpts);
941
		unset($this->score);
942
		unset($this->info);
943
		unset($this->contentEncoded);
944
	}
945
946
	/**
947
	 * @since 15.0.0
948
	 *
949
	 * @return array
950
	 */
951
	public function jsonSerialize() {
952
		return [
953
			'id' => $this->getId(),
954
			'providerId' => $this->getProviderId(),
955
			'access' => $this->access,
956
			'modifiedTime' => $this->getModifiedTime(),
957
			'title' => $this->getTitle(),
958
			'link' => $this->getLink(),
959
			'index' => $this->index,
960
			'source' => $this->getSource(),
961
			'info' => $this->getInfoAll(),
962
			'hash' => $this->getHash(),
963
			'contentSize' => $this->getContentSize(),
964
			'tags' => $this->getTags(),
965
			'metatags' => $this->getMetaTags(),
966
			'subtags' => $this->getSubTags(),
967
			'more' => $this->getMore(),
968
			'excerpts' => $this->getExcerpts(),
969
			'score' => $this->getScore()
970
		];
971
	}
972
973
}
974
975