buildSearchTermsForEntity()   B
last analyzed

Complexity

Conditions 6
Paths 4

Size

Total Lines 19
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 15
CRAP Score 6

Importance

Changes 3
Bugs 0 Features 0
Metric Value
c 3
b 0
f 0
dl 0
loc 19
ccs 15
cts 15
cp 1
rs 8.8571
cc 6
eloc 10
nc 4
nop 1
crap 6
1
<?php
2
3
namespace Wikibase\EntityStore\MongoDB;
4
5
use Deserializers\Deserializer;
6
use Deserializers\Exceptions\DeserializationException;
7
use MongoBinData;
8
use Serializers\Serializer;
9
use Wikibase\DataModel\Entity\EntityDocument;
10
use Wikibase\DataModel\Entity\EntityId;
11
use Wikibase\DataModel\Entity\EntityIdParser;
12
use Wikibase\DataModel\Entity\EntityIdParsingException;
13
use Wikibase\DataModel\Entity\Item;
14
use Wikibase\DataModel\Entity\Property;
15
use Wikibase\EntityStore\EntityStore;
16
use Wikibase\EntityStore\EntityStoreOptions;
17
use Wikibase\EntityStore\FeatureNotSupportedException;
18
19
/**
20
 * Internal class
21
 *
22
 * @licence GPLv2+
23
 * @author Thomas Pellissier Tanon
24
 */
25
class MongoDBDocumentBuilder {
26
27
	public static $SUPPORTED_ENTITY_TYPES = [
28
		Item::ENTITY_TYPE,
29
		Property::ENTITY_TYPE
30
	];
31
32
	public static $SUPPORTED_DATAVALUE_TYPES = [
33
		'string',
34
		'time',
35
		'wikibase-entityid'
36
	];
37
38
	/**
39
	 * @var Serializer
40
	 */
41
	private $entitySerializer;
42
43
	/**
44
	 * @var Deserializer
45
	 */
46
	private $entityDeserializer;
47
48
	/**
49
	 * @var EntityIdParser
50
	 */
51
	private $entityIdParser;
52
53
	/**
54
	 * @var EntityStoreOptions
55
	 */
56
	private $options;
57
58
	/**
59
	 * @param Serializer $entitySerializer
60
	 * @param Deserializer $entityDeserializer
61
	 * @param EntityIdParser $entityIdParser
62
	 * @param EntityStoreOptions $options
63
	 */
64 10
	public function __construct(
65
		Serializer $entitySerializer,
66
		Deserializer $entityDeserializer,
67
		EntityIdParser $entityIdParser,
68
		EntityStoreOptions $options
69
	) {
70 10
		$this->entitySerializer = $entitySerializer;
71 10
		$this->entityDeserializer = $entityDeserializer;
72 10
		$this->entityIdParser = $entityIdParser;
73 10
		$this->options = $options;
74 10
	}
75
76
	/**
77
	 * @param EntityDocument $entityDocument
78
	 * @return array
79
	 */
80 3
	public function buildDocumentForEntity( EntityDocument $entityDocument ) {
81 3
		return $this->addIndexedDataToSerialization(
82 3
			$this->filterLanguages( $this->entitySerializer->serialize( $entityDocument ) )
83 3
		);
84
	}
85
86 3
	private function addIndexedDataToSerialization( array $serialization ) {
87 3
		$serialization['_id'] = $serialization['id'];
88 3
		$serialization['sterms'] = $this->buildSearchTermsForEntity( $serialization );
89 3
		$serialization['sclaims'] = $this->buildSearchClaimsForEntity( $serialization );
90
91 3
		return $serialization;
92
	}
93
94 3
	private function filterLanguages( array $serialization ) {
95 3
		$languagesOption = $this->options->getOption( EntityStore::OPTION_LANGUAGES );
96
97 3
		if( $languagesOption === null ) {
98 1
			return $serialization;
99
		}
100
101 2
		$languages = array_flip( $languagesOption );
102 2
		if( array_key_exists( 'labels', $serialization ) ) {
103 2
			$serialization['labels'] = array_intersect_key( $serialization['labels'], $languages );
104 2
		}
105 2
		if( array_key_exists( 'descriptions', $serialization ) ) {
106 2
			$serialization['descriptions'] = array_intersect_key( $serialization['descriptions'], $languages );
107 2
		}
108 2
		if( array_key_exists( 'aliases', $serialization ) ) {
109 2
			$serialization['aliases'] = array_intersect_key( $serialization['aliases'], $languages );
110 2
		}
111
112 2
		return $serialization;
113
	}
114
115 3
	private function buildSearchTermsForEntity( array $serialization ) {
116 3
		$searchTerms = [];
117
118 3
		if( array_key_exists( 'labels', $serialization ) ) {
119 3
			foreach( $serialization['labels'] as $label ) {
120 3
				$searchTerms[$label['language']][] = $this->cleanTextForSearch( $label['value'] );
121 3
			}
122 3
		}
123
124 3
		if( array_key_exists( 'aliases', $serialization ) ) {
125 3
			foreach( $serialization['aliases'] as $aliasGroup ) {
126 3
				foreach( $aliasGroup as $alias ) {
127 3
					$searchTerms[$alias['language']][] = $this->cleanTextForSearch( $alias['value'] );
128 3
				}
129 3
			}
130 3
		}
131
132 3
		return $searchTerms;
133
	}
134
135
	/**
136
	 * @param string $text
137
	 * @return string
138
	 */
139 6
	public function cleanTextForSearch( $text ) {
140 6
		$text = mb_strtolower( $text, 'UTF-8' ); //TODO: said to be very slow
141 6
		$text = trim( $text );
142
143 6
		return new MongoBinData( md5( $text, true ), MongoBinData::GENERIC );
144
	}
145
146 3
	private function buildSearchClaimsForEntity( array $serialization ) {
147 3
		if( !array_key_exists( 'claims', $serialization ) ) {
148 2
			return [];
149
		}
150
151 2
		$searchClaims = [];
152
153 2
		foreach( $serialization['claims'] as $claimBag ) {
154 2
			foreach( $claimBag as $claim ) {
155 2
				$this->addSnakToSearchClaims( $claim['mainsnak'], $searchClaims );
156 2
			}
157 2
		}
158
159 2
		return $searchClaims;
160
	}
161
162 2
	private function addSnakToSearchClaims( array $snak, array &$searchClaims ) {
163 2
		if( $snak['snaktype'] !== 'value' ) {
164
			return;
165
		}
166
167 2
		$valueType = $snak['datavalue']['type'];
168 2
		if( !$this->isSupportedDataValueType( $valueType ) ) {
169 2
			return;
170
		}
171
172 2
		$searchClaims[$valueType][] = $snak['property'] . '-' . $this->buildSearchedDataValue( $snak['datavalue'] );
173 2
	}
174
175 2
	private function isSupportedDataValueType( $type ) {
176 2
		return in_array( $type, self::$SUPPORTED_DATAVALUE_TYPES );
177
	}
178
179 2
	private function buildSearchedDataValue( array $dataValue ) {
180 2
		$value = $dataValue['value'];
181
182 2
		switch( $dataValue['type'] ) {
183 2
			case 'string':
184 2
				return $this->buildSearchedStringValue( $value );
185 2
			case 'time':
186 2
				return $value['time'];
187 2
			case 'wikibase-entityid':
188 2
				return $this->buildSearchedEntityIdValue( $value );
189
			default:
190
				throw new FeatureNotSupportedException( 'Not supported DataValue type: ' . $dataValue['type'] );
191
		}
192
	}
193
194
	/**
195
	 * Does an hash if needed to keep string size lower than MongoDB index key limit
196
	 *
197
	 * @param string $value
198
	 * @return string
199
	 */
200 2
	public function buildSearchedStringValue( $value ) {
201 2
		if( strlen( $value ) <= 32 ) {
202 2
			return $value;
203
		}
204
205 1
		return md5( $value );
206
	}
207
208 2
	private function buildSearchedEntityIdValue( array $value ) {
209 2
		switch( $value['entity-type'] ) {
210 2
			case 'item':
211 2
				return 'Q' . $value['numeric-id'];
212 1
			case 'property':
213 1
				return 'P' . $value['numeric-id'];
214
			default:
215
				throw new FeatureNotSupportedException( 'Unknown entity type: ' . $value['entity-type'] );
216
		}
217
	}
218
219
	/**
220
	 * @param array $document
221
	 * @return EntityDocument|null
222
	 */
223 3
	public function buildEntityForDocument( array $document ) {
224
		try {
225 3
			return $this->entityDeserializer->deserialize( $document );
226 1
		} catch( DeserializationException $exception ) {
227 1
			return null;
228
		}
229
	}
230
231
	/**
232
	 * @param array $document
233
	 * @return EntityId
234
	 * @throws EntityIdParsingException
235
	 */
236 3
	public function buildEntityIdForDocument( array $document ) {
237 3
		if( !array_key_exists( '_id', $document ) ) {
238 1
			throw new EntityIdParsingException();
239
		}
240
241 2
		return $this->entityIdParser->parse( $document['_id'] );
242
	}
243
}
244