Passed
Push — develop ( 169afe...f2bd80 )
by Jens
02:39
created

DocumentTokenizer::getTokens()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 2
nc 1
nop 0
dl 0
loc 4
rs 10
c 0
b 0
f 0
1
<?php
2
/**
3
 * User: jensk
4
 * Date: 21-2-2017
5
 * Time: 10:38
6
 */
7
8
namespace library\search;
9
10
11
use library\storage\Document;
12
use library\storage\JsonStorage;
13
14
class DocumentTokenizer
15
{
16
	/**
17
	 * @var Document
18
	 */
19
	protected $document;
20
21
	/**
22
	 * @var array
23
	 */
24
	protected $tokenVector = array();
25
	protected $storage;
26
27
	/**
28
	 * Tokenizer constructor.
29
	 *
30
	 * @param \library\storage\Document $document
31
	 * @param JsonStorage $storage
32
	 */
33
	public function __construct(Document $document, JsonStorage $storage)
34
	{
35
		$this->document = $document;
36
		$this->storage = $storage;
37
		$this->tokenize();
38
	}
39
40
	/**
41
	 * Execute tokenization of all document fields
42
	 */
43
	private function tokenize()
44
	{
45
		$this->tokenizeTitle();
46
		$this->tokenizeFields();
47
		$this->tokenizeBricks();
48
		$this->tokenizeDynamicBricks();
49
		$this->tokenVector = array_filter($this->tokenVector);
50
		arsort($this->tokenVector);
51
	}
52
53
	private function tokenizeTitle()
54
	{
55
		$filteredString = new CharacterFilter($this->document->title);
56
		$tokenizer = new Tokenizer($filteredString);
57
		$this->addTokenVectorToVector($tokenizer->getTokenVector(), 'title');
58
	}
59
60
	private function tokenizeFields()
61
	{
62
		$fields = $this->document->fields;
63
		$documentDefinition = $this->storage->getDocumentTypeBySlug($this->document->documentTypeSlug);
64
		foreach ($fields as $fieldName => $field) {
65
			$fieldType = $this->getFieldType($fieldName, $documentDefinition);
66
			$this->tokenizeField($field, $fieldName, $fieldType);
67
		}
68
	}
69
70
	private function tokenizeField($field, $fieldName, $fieldType)
71
	{
72
		foreach ($field as $value) {
73
			// Only index fields that contain text
74
			if (in_array($fieldType, array('String', 'Text', 'Rich Text'))) {
75
				$filteredString = new CharacterFilter($value);
76
				$tokenizer = new Tokenizer($filteredString);
77
				$this->addTokenVectorToVector($tokenizer->getTokenVector(), $fieldName);
78
			}
79
		}
80
	}
81
82
	private function tokenizeBricks()
83
	{
84
		$bricks = $this->document->bricks;
85
		foreach ($bricks as $brickSlug => $brick) {
86
			$this->tokenizeBrick($brick, $brickSlug);
87
		}
88
	}
89
90
	private function tokenizeBrick($brick, $brickSlug)
91
	{
92
		$fields  = $brick->fields;
93
		$brickDefinition = $this->storage->getBrickBySlug($brick->type);
94
		foreach ($fields as $fieldName => $field) {
95
			$fieldType = $this->getFieldType($fieldName, $brickDefinition);
96
			$this->tokenizeField($field, $brickSlug . '__' . $fieldName, $fieldType);
97
		}
98
	}
99
100
	private function tokenizeDynamicBricks()
101
	{
102
		$dynamicBricks = $this->document->dynamicBricks;
103
		foreach ($dynamicBricks as $key => $brick) {
104
			$this->tokenizeBrick($brick, 'dynamicBricks__' . $brick->type . $key);
105
		}
106
	}
107
108
	public function getTokens()
109
	{
110
		return $this->tokenVector;
111
	}
112
113
	/**
114
	 * Add a token to the existing tokenvector
115
	 * @param     		$token
116
	 * @param string    $field
117
	 * @param int 		$count
118
	 */
119
	private function addTokenToVector($token, $field, $count = 1)
120
	{
121
		if (!empty($token)) {
122
			if (isset($this->tokenVector[$field][$token])) {
123
				$this->tokenVector[$field][$token] += $count;
124
			} else {
125
				$this->tokenVector[$field][$token] = $count;
126
			}
127
		}
128
	}
129
130
	/**
131
	 * Add a complete token vector to the existing one.
132
	 * @param $tokenVector
133
	 * @param $field
134
	 */
135
	private function addTokenVectorToVector($tokenVector, $field)
136
	{
137
		foreach ($tokenVector as $token => $count) {
138
			$this->addTokenToVector($token, $field, $count);
139
		}
140
	}
141
142
	/**
143
	 * Get the type for a field
144
	 * @param $fieldName
145
	 * @param $documentDefinition
146
	 * @return mixed
147
	 * @throws \Exception
148
	 */
149
	private function getFieldType($fieldName, $documentDefinition)
150
	{
151
		foreach ($documentDefinition->fields as $fieldTypeDefinition) {
152
			if ($fieldTypeDefinition->slug === $fieldName) {
153
				return $fieldTypeDefinition->type;
154
			}
155
		}
156
157
		throw new \Exception('Unknown field type for field' . $fieldName . ' in document ' . $this->document->path);
158
	}
159
}