1 | <?php |
||
25 | class MongoDBDocumentBuilder { |
||
26 | |||
27 | public static $SUPPORTED_ENTITY_TYPES = [ |
||
28 | Item::ENTITY_TYPE, |
||
29 | Property::ENTITY_TYPE |
||
30 | ]; |
||
31 | |||
32 | public static $SUPPORTED_DATAVALUE_TYPES = [ |
||
33 | 'string', |
||
34 | 'time', |
||
35 | 'wikibase-entityid' |
||
36 | ]; |
||
37 | |||
38 | /** |
||
39 | * @var Serializer |
||
40 | */ |
||
41 | private $entitySerializer; |
||
42 | |||
43 | /** |
||
44 | * @var Deserializer |
||
45 | */ |
||
46 | private $entityDeserializer; |
||
47 | |||
48 | /** |
||
49 | * @var EntityIdParser |
||
50 | */ |
||
51 | private $entityIdParser; |
||
52 | |||
53 | /** |
||
54 | * @var EntityStoreOptions |
||
55 | */ |
||
56 | private $options; |
||
57 | |||
58 | /** |
||
59 | * @param Serializer $entitySerializer |
||
60 | * @param Deserializer $entityDeserializer |
||
61 | * @param EntityIdParser $entityIdParser |
||
62 | * @param EntityStoreOptions $options |
||
63 | */ |
||
64 | 10 | public function __construct( |
|
65 | Serializer $entitySerializer, |
||
66 | Deserializer $entityDeserializer, |
||
67 | EntityIdParser $entityIdParser, |
||
68 | EntityStoreOptions $options |
||
69 | ) { |
||
70 | 10 | $this->entitySerializer = $entitySerializer; |
|
71 | 10 | $this->entityDeserializer = $entityDeserializer; |
|
72 | 10 | $this->entityIdParser = $entityIdParser; |
|
73 | 10 | $this->options = $options; |
|
74 | 10 | } |
|
75 | |||
76 | /** |
||
77 | * @param EntityDocument $entityDocument |
||
78 | * @return array |
||
79 | */ |
||
80 | 3 | public function buildDocumentForEntity( EntityDocument $entityDocument ) { |
|
81 | 3 | return $this->addIndexedDataToSerialization( |
|
82 | 3 | $this->filterLanguages( $this->entitySerializer->serialize( $entityDocument ) ) |
|
83 | 3 | ); |
|
84 | } |
||
85 | |||
86 | 3 | private function addIndexedDataToSerialization( array $serialization ) { |
|
87 | 3 | $serialization['_id'] = $serialization['id']; |
|
88 | 3 | $serialization['sterms'] = $this->buildSearchTermsForEntity( $serialization ); |
|
89 | 3 | $serialization['sclaims'] = $this->buildSearchClaimsForEntity( $serialization ); |
|
90 | |||
91 | 3 | return $serialization; |
|
92 | } |
||
93 | |||
94 | 3 | private function filterLanguages( array $serialization ) { |
|
95 | 3 | $languagesOption = $this->options->getOption( EntityStore::OPTION_LANGUAGES ); |
|
96 | |||
97 | 3 | if( $languagesOption === null ) { |
|
98 | 1 | return $serialization; |
|
99 | } |
||
100 | |||
101 | 2 | $languages = array_flip( $languagesOption ); |
|
102 | 2 | if( array_key_exists( 'labels', $serialization ) ) { |
|
103 | 2 | $serialization['labels'] = array_intersect_key( $serialization['labels'], $languages ); |
|
104 | 2 | } |
|
105 | 2 | if( array_key_exists( 'descriptions', $serialization ) ) { |
|
106 | 2 | $serialization['descriptions'] = array_intersect_key( $serialization['descriptions'], $languages ); |
|
107 | 2 | } |
|
108 | 2 | if( array_key_exists( 'aliases', $serialization ) ) { |
|
109 | 2 | $serialization['aliases'] = array_intersect_key( $serialization['aliases'], $languages ); |
|
110 | 2 | } |
|
111 | |||
112 | 2 | return $serialization; |
|
113 | } |
||
114 | |||
115 | 3 | private function buildSearchTermsForEntity( array $serialization ) { |
|
116 | 3 | $searchTerms = []; |
|
117 | |||
118 | 3 | if( array_key_exists( 'labels', $serialization ) ) { |
|
119 | 3 | foreach( $serialization['labels'] as $label ) { |
|
120 | 3 | $searchTerms[$label['language']][] = $this->cleanTextForSearch( $label['value'] ); |
|
121 | 3 | } |
|
122 | 3 | } |
|
123 | |||
124 | 3 | if( array_key_exists( 'aliases', $serialization ) ) { |
|
125 | 3 | foreach( $serialization['aliases'] as $aliasGroup ) { |
|
126 | 3 | foreach( $aliasGroup as $alias ) { |
|
127 | 3 | $searchTerms[$alias['language']][] = $this->cleanTextForSearch( $alias['value'] ); |
|
128 | 3 | } |
|
129 | 3 | } |
|
130 | 3 | } |
|
131 | |||
132 | 3 | return $searchTerms; |
|
133 | } |
||
134 | |||
135 | /** |
||
136 | * @param string $text |
||
137 | * @return string |
||
138 | */ |
||
139 | 6 | public function cleanTextForSearch( $text ) { |
|
140 | 6 | $text = mb_strtolower( $text, 'UTF-8' ); //TODO: said to be very slow |
|
141 | 6 | $text = trim( $text ); |
|
142 | |||
143 | 6 | return new MongoBinData( md5( $text, true ), MongoBinData::GENERIC ); |
|
144 | } |
||
145 | |||
146 | 3 | private function buildSearchClaimsForEntity( array $serialization ) { |
|
161 | |||
162 | 2 | private function addSnakToSearchClaims( array $snak, array &$searchClaims ) { |
|
163 | 2 | if( $snak['snaktype'] !== 'value' ) { |
|
164 | return; |
||
165 | } |
||
166 | |||
167 | 2 | $valueType = $snak['datavalue']['type']; |
|
168 | 2 | if( !$this->isSupportedDataValueType( $valueType ) ) { |
|
169 | 2 | return; |
|
170 | } |
||
171 | |||
172 | 2 | $searchClaims[$valueType][] = $snak['property'] . '-' . $this->buildSearchedDataValue( $snak['datavalue'] ); |
|
173 | 2 | } |
|
174 | |||
175 | 2 | private function isSupportedDataValueType( $type ) { |
|
178 | |||
179 | 2 | private function buildSearchedDataValue( array $dataValue ) { |
|
193 | |||
194 | /** |
||
195 | * Does an hash if needed to keep string size lower than MongoDB index key limit |
||
196 | * |
||
197 | * @param string $value |
||
198 | * @return string |
||
199 | */ |
||
200 | 2 | public function buildSearchedStringValue( $value ) { |
|
207 | |||
208 | 2 | private function buildSearchedEntityIdValue( array $value ) { |
|
218 | |||
219 | /** |
||
220 | * @param array $document |
||
221 | * @return EntityDocument|null |
||
222 | */ |
||
223 | 3 | public function buildEntityForDocument( array $document ) { |
|
230 | |||
231 | /** |
||
232 | * @param array $document |
||
233 | * @return EntityId |
||
234 | * @throws EntityIdParsingException |
||
235 | */ |
||
236 | 3 | public function buildEntityIdForDocument( array $document ) { |
|
243 | } |
||
244 |