Passed
Push — master ( 9c6499...c22bc5 )
by Jens
04:52 queued 02:21
created

DocumentTokenizer   A

Complexity

Total Complexity 24

Size/Duplication

Total Lines 148
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 6

Importance

Changes 0
Metric Value
dl 0
loc 148
rs 10
c 0
b 0
f 0
wmc 24
lcom 1
cbo 6

12 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 6 1
A tokenize() 0 9 1
A tokenizeTitle() 0 6 1
A tokenizeFields() 0 9 2
A tokenizeField() 0 11 3
A tokenizeBricks() 0 9 3
A tokenizeBrick() 0 9 2
A tokenizeDynamicBricks() 0 7 2
A getTokens() 0 4 1
A addTokenToVector() 0 10 3
A addTokenVectorToVector() 0 6 2
A getFieldType() 0 10 3
1
<?php
2
/**
3
 * User: jensk
4
 * Date: 21-2-2017
5
 * Time: 10:38
6
 */
7
8
namespace CloudControl\Cms\search;
9
10
11
use CloudControl\Cms\storage\Document;
12
use CloudControl\Cms\storage\Storage;
13
14
class DocumentTokenizer
15
{
16
	/**
17
	 * @var Document
18
	 */
19
	protected $document;
20
21
	/**
22
	 * @var array
23
	 */
24
	protected $tokenVector = array();
25
	protected $storage;
26
27
	/**
28
	 * Tokenizer constructor.
29
	 *
30
	 * @param \CloudControl\Cms\storage\Document $document
31
	 * @param Storage                   $storage
32
	 */
33
	public function __construct(Document $document, Storage $storage)
34
	{
35
		$this->document = $document;
36
		$this->storage = $storage;
37
		$this->tokenize();
38
	}
39
40
	/**
41
	 * Execute tokenization of all document fields
42
	 */
43
	private function tokenize()
44
	{
45
		$this->tokenizeTitle();
46
		$this->tokenizeFields();
47
		$this->tokenizeBricks();
48
		$this->tokenizeDynamicBricks();
49
		$this->tokenVector = array_filter($this->tokenVector);
50
		arsort($this->tokenVector);
51
	}
52
53
	private function tokenizeTitle()
54
	{
55
		$filteredString = new CharacterFilter($this->document->title);
56
		$tokenizer = new Tokenizer($filteredString);
57
		$this->addTokenVectorToVector($tokenizer->getTokenVector(), 'title');
58
	}
59
60
	private function tokenizeFields()
61
	{
62
		$fields = $this->document->fields;
63
		$documentDefinition = $this->storage->getDocumentTypes()->getDocumentTypeBySlug($this->document->documentTypeSlug);
64
		foreach ($fields as $fieldName => $field) {
65
			$fieldType = $this->getFieldType($fieldName, $documentDefinition);
66
			$this->tokenizeField($field, $fieldName, $fieldType);
67
		}
68
	}
69
70
	private function tokenizeField($field, $fieldName, $fieldType)
71
	{
72
		foreach ($field as $value) {
73
			// Only index fields that contain text
74
			if (in_array($fieldType, array('String', 'Text', 'Rich Text'))) {
75
				$filteredString = new CharacterFilter($value);
76
				$tokenizer = new Tokenizer($filteredString);
77
				$this->addTokenVectorToVector($tokenizer->getTokenVector(), $fieldName);
78
			}
79
		}
80
	}
81
82
	private function tokenizeBricks()
83
	{
84
		$bricks = $this->document->bricks;
85
		foreach ($bricks as $brickSlug => $bricks) {
86
			foreach ($bricks as $brick) {
87
				$this->tokenizeBrick($brick, $brickSlug);
88
			}
89
		}
90
	}
91
92
	private function tokenizeBrick($brick, $brickSlug)
93
	{
94
		$fields  = $brick->fields;
95
		$brickDefinition = $this->storage->getBricks()->getBrickBySlug($brick->type);
96
		foreach ($fields as $fieldName => $field) {
97
			$fieldType = $this->getFieldType($fieldName, $brickDefinition);
98
			$this->tokenizeField($field, $brickSlug . '__' . $fieldName, $fieldType);
99
		}
100
	}
101
102
	private function tokenizeDynamicBricks()
103
	{
104
		$dynamicBricks = $this->document->dynamicBricks;
105
		foreach ($dynamicBricks as $key => $brick) {
106
			$this->tokenizeBrick($brick, 'dynamicBricks__' . $brick->type . $key);
107
		}
108
	}
109
110
	public function getTokens()
111
	{
112
		return $this->tokenVector;
113
	}
114
115
	/**
116
	 * Add a token to the existing tokenvector
117
	 * @param     		$token
118
	 * @param string    $field
119
	 * @param int 		$count
120
	 */
121
	private function addTokenToVector($token, $field, $count = 1)
122
	{
123
		if (!empty($token)) {
124
			if (isset($this->tokenVector[$field][$token])) {
125
				$this->tokenVector[$field][$token] += $count;
126
			} else {
127
				$this->tokenVector[$field][$token] = $count;
128
			}
129
		}
130
	}
131
132
	/**
133
	 * Add a complete token vector to the existing one.
134
	 * @param $tokenVector
135
	 * @param $field
136
	 */
137
	private function addTokenVectorToVector($tokenVector, $field)
138
	{
139
		foreach ($tokenVector as $token => $count) {
140
			$this->addTokenToVector($token, $field, $count);
141
		}
142
	}
143
144
	/**
145
	 * Get the type for a field
146
	 * @param $fieldName
147
	 * @param $documentDefinition
148
	 * @return mixed
149
	 * @throws \Exception
150
	 */
151
	private function getFieldType($fieldName, $documentDefinition)
152
	{
153
		foreach ($documentDefinition->fields as $fieldTypeDefinition) {
154
			if ($fieldTypeDefinition->slug === $fieldName) {
155
				return $fieldTypeDefinition->type;
156
			}
157
		}
158
159
		throw new \Exception('Unknown field type for field' . $fieldName . ' in document ' . $this->document->path);
160
	}
161
}