Completed
Push — master ( e1228c...abc836 )
by Jordi
04:46
created

addZCTextIndex()   A

Complexity

Conditions 3

Size

Total Lines 28
Code Lines 23

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 23
dl 0
loc 28
rs 9.328
c 0
b 0
f 0
cc 3
nop 2
1
# -*- coding: utf-8 -*-
2
#
3
# This file is part of SENAITE.CORE.
4
#
5
# SENAITE.CORE is free software: you can redistribute it and/or modify it under
6
# the terms of the GNU General Public License as published by the Free Software
7
# Foundation, version 2.
8
#
9
# This program is distributed in the hope that it will be useful, but WITHOUT
10
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12
# details.
13
#
14
# You should have received a copy of the GNU General Public License along with
15
# this program; if not, write to the Free Software Foundation, Inc., 51
16
# Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17
#
18
# Copyright 2018-2019 by it's authors.
19
# Some rights reserved, see README and LICENSE.
20
21
from bika.lims import logger
22
from bika.lims.catalog.analysis_catalog import \
23
    bika_catalog_analysis_listing_definition
24
from bika.lims.catalog.analysisrequest_catalog import \
25
    bika_catalog_analysisrequest_listing_definition
26
from bika.lims.catalog.autoimportlogs_catalog import \
27
    bika_catalog_autoimportlogs_listing_definition
28
from bika.lims.catalog.report_catalog import bika_catalog_report_definition
29
from bika.lims.catalog.worksheet_catalog import \
30
    bika_catalog_worksheet_listing_definition
31
from Products.CMFCore.utils import getToolByName
32
33
34
def getCatalogDefinitions():
35
    """
36
    Returns a dictionary with catalogs definitions.
37
    """
38
    final = {}
39
    analysis_request = bika_catalog_analysisrequest_listing_definition
40
    analysis = bika_catalog_analysis_listing_definition
41
    autoimportlogs = bika_catalog_autoimportlogs_listing_definition
42
    worksheet = bika_catalog_worksheet_listing_definition
43
    report = bika_catalog_report_definition
44
    # Merging the catalogs
45
    final.update(analysis_request)
46
    final.update(analysis)
47
    final.update(autoimportlogs)
48
    final.update(worksheet)
49
    final.update(report)
50
    return final
51
52
53
def getCatalog(instance, field='UID'):
54
    """
55
    Returns the catalog that stores objects of instance passed in type.
56
    If an object is indexed by more than one catalog, the first match
57
    will be returned.
58
59
    :param instance: A single object
60
    :type instance: ATContentType
61
    :returns: The first catalog that stores the type of object passed in
62
    """
63
    uid = instance.UID()
64
    if 'workflow_skiplist' in instance.REQUEST and \
65
        [x for x in instance.REQUEST['workflow_skiplist']
66
         if x.find(uid) > -1]:
67
        return None
68
    else:
69
        # grab the first catalog we are indexed in.
70
        # we're only indexed in one.
71
        at = getToolByName(instance, 'archetype_tool')
72
        plone = instance.portal_url.getPortalObject()
73
        catalog_name = instance.portal_type in at.catalog_map \
74
            and at.catalog_map[instance.portal_type][0] or 'portal_catalog'
75
        catalog = getToolByName(plone, catalog_name)
76
        return catalog
77
78
79
def setup_catalogs(
80
        portal, catalogs_definition={},
81
        force_reindex=False, catalogs_extension={}, force_no_reindex=False):
82
    """
83
    Setup the given catalogs. Redefines the map between content types and
84
    catalogs and then checks the indexes and metacolumns, if one index/column
85
    doesn't exist in the catalog_definition any more it will be
86
    removed, otherwise, if a new index/column is found, it will be created.
87
88
    :param portal: The Plone's Portal object
89
    :param catalogs_definition: a dictionary with the following structure
90
        {
91
            CATALOG_ID: {
92
                'types':   ['ContentType', ...],
93
                'indexes': {
94
                    'UID': 'FieldIndex',
95
                    ...
96
                },
97
                'columns': [
98
                    'Title',
99
                    ...
100
                ]
101
            }
102
        }
103
    :type catalogs_definition: dict
104
    :param force_reindex: Force to reindex the catalogs even if there's no need
105
    :type force_reindex: bool
106
    :param force_no_reindex: Force reindexing NOT to happen.
107
    :param catalog_extensions: An extension for the primary catalogs definition
108
        Same dict structure as param catalogs_definition. Allows to add
109
        columns and indexes required by Bika-specific add-ons.
110
    :type catalog_extensions: dict
111
    """
112
    # If not given catalogs_definition, use the LIMS one
113
    if not catalogs_definition:
114
        catalogs_definition = getCatalogDefinitions()
115
116
    # Merge the catalogs definition of the extension with the primary
117
    # catalog definition
118
    definition = _merge_catalog_definitions(catalogs_definition,
119
                                            catalogs_extension)
120
121
    # Mapping content types in catalogs
122
    # This variable will be used to clean reindex the catalog. Saves the
123
    # catalogs ids
124
    archetype_tool = getToolByName(portal, 'archetype_tool')
125
    clean_and_rebuild = _map_content_types(archetype_tool, definition)
126
127
    # Indexing
128
    for cat_id in definition.keys():
129
        reindex = False
130
        reindex = _setup_catalog(
131
            portal, cat_id, definition.get(cat_id, {}))
132
        if (reindex or force_reindex) and (cat_id not in clean_and_rebuild):
133
            # add the catalog if it has not been added before
134
            clean_and_rebuild.append(cat_id)
135
    # Reindex the catalogs which needs it
136
    if not force_no_reindex:
137
        _cleanAndRebuildIfNeeded(portal, clean_and_rebuild)
138
    return clean_and_rebuild
139
140
141
def _merge_catalog_definitions(dict1, dict2):
142
    """
143
    Merges two dictionaries that represent catalogs definitions. The first
144
    dictionary contains the catalogs structure by default and the second dict
145
    contains additional information. Usually, the former is the Bika LIMS
146
    catalogs definition and the latter is the catalogs definition of an add-on
147
    The structure of each dict as follows:
148
        {
149
            CATALOG_ID: {
150
                'types':   ['ContentType', ...],
151
                'indexes': {
152
                    'UID': 'FieldIndex',
153
                    ...
154
                },
155
                'columns': [
156
                    'Title',
157
                    ...
158
                ]
159
            }
160
        }
161
162
    :param dict1: The dictionary to be used as the main template (defaults)
163
    :type dict1: dict
164
    :param dict2: The dictionary with additional information
165
    :type dict2: dict
166
    :returns: A merged dict with the same structure as the dicts passed in
167
    :rtype: dict
168
    """
169
    if not dict2:
170
        return dict1.copy()
171
172
    outdict = {}
173
    # Use dict1 as a template
174
    for k, v in dict1.items():
175
        if k not in dict2 and isinstance(v, dict):
176
            outdict[k] = v.copy()
177
            continue
178
        if k not in dict2 and isinstance(v, list):
179
            outdict[k] = v[:]
180
            continue
181
        if k == 'indexes':
182
            sdict1 = v.copy()
183
            sdict2 = dict2[k].copy()
184
            sdict1.update(sdict2)
185
            outdict[k] = sdict1
186
            continue
187
        if k in ['types', 'columns']:
188
            list1 = v
189
            list2 = dict2[k]
190
            outdict[k] = list(set(list1 + list2))
191
            continue
192
        if isinstance(v, dict):
193
            sdict1 = v.copy()
194
            sdict2 = dict2[k].copy()
195
            outdict[k] = _merge_catalog_definitions(sdict1, sdict2)
196
197
    # Now, add the rest of keys from dict2 that don't exist in dict1
198
    for k, v in dict2.items():
199
        if k in outdict:
200
            continue
201
        outdict[k] = v.copy()
202
    return outdict
203
204
205
def _map_content_types(archetype_tool, catalogs_definition):
206
    """
207
    Updates the mapping for content_types against catalogs
208
    :archetype_tool: an archetype_tool object
209
    :catalogs_definition: a dictionary like
210
        {
211
            CATALOG_ID: {
212
                'types':   ['ContentType', ...],
213
                'indexes': {
214
                    'UID': 'FieldIndex',
215
                    ...
216
                },
217
                'columns': [
218
                    'Title',
219
                    ...
220
                ]
221
            }
222
        }
223
    """
224
    # This will be a dictionari like {'content_type':['catalog_id', ...]}
225
    ct_map = {}
226
    # This list will contain the atalog ids to be rebuild
227
    to_reindex = []
228
    # getting the dictionary of mapped content_types in the catalog
229
    map_types = archetype_tool.catalog_map
230
    for catalog_id in catalogs_definition.keys():
231
        catalog_info = catalogs_definition.get(catalog_id, {})
232
        # Mapping the catalog with the defined types
233
        types = catalog_info.get('types', [])
234
        for t in types:
235
            tmp_l = ct_map.get(t, [])
236
            tmp_l.append(catalog_id)
237
            ct_map[t] = tmp_l
238
    # Mapping
239
    for t in ct_map.keys():
240
        catalogs_list = ct_map[t]
241
        # Getting the previus mapping
242
        perv_catalogs_list = archetype_tool.catalog_map.get(t, [])
243
        # If the mapping has changed, update it
244
        set1 = set(catalogs_list)
245
        set2 = set(perv_catalogs_list)
246
        if set1 != set2:
247
            archetype_tool.setCatalogsByType(t, catalogs_list)
248
            # Adding to reindex only the catalogs that have changed
249
            to_reindex = to_reindex + list(set1 - set2) + list(set2 - set1)
250
    return to_reindex
251
252
253
def _setup_catalog(portal, catalog_id, catalog_definition):
254
    """
255
    Given a catalog definition it updates the indexes, columns and content_type
256
    definitions of the catalog.
257
    :portal: the Plone site object
258
    :catalog_id: a string as the catalog id
259
    :catalog_definition: a dictionary like
260
        {
261
            'types':   ['ContentType', ...],
262
            'indexes': {
263
                'UID': 'FieldIndex',
264
                ...
265
            },
266
            'columns': [
267
                'Title',
268
                ...
269
            ]
270
        }
271
    """
272
273
    reindex = False
274
    catalog = getToolByName(portal, catalog_id, None)
275
    if catalog is None:
276
        logger.warning('Could not find the %s tool.' % (catalog_id))
277
        return False
278
    # Indexes
279
    indexes_ids = catalog_definition.get('indexes', {}).keys()
280
    # Indexing
281
    for idx in indexes_ids:
282
        # The function returns if the index needs to be reindexed
283
        indexed = _addIndex(catalog, idx, catalog_definition['indexes'][idx])
284
        reindex = True if indexed else reindex
285
    # Removing indexes
286
    in_catalog_idxs = catalog.indexes()
287
    to_remove = list(set(in_catalog_idxs)-set(indexes_ids))
288
    for idx in to_remove:
289
        # The function returns if the index has been deleted
290
        desindexed = _delIndex(catalog, idx)
291
        reindex = True if desindexed else reindex
292
    # Columns
293
    columns_ids = catalog_definition.get('columns', [])
294
    for col in columns_ids:
295
        created = _addColumn(catalog, col)
296
        reindex = True if created else reindex
297
    # Removing columns
298
    in_catalog_cols = catalog.schema()
299
    to_remove = list(set(in_catalog_cols)-set(columns_ids))
300
    for col in to_remove:
301
        # The function returns if the index has been deleted
302
        desindexed = _delColumn(catalog, col)
303
        reindex = True if desindexed else reindex
304
    return reindex
305
306
307
def _addIndex(catalog, index, indextype):
308
    """
309
    This function indexes the index element into the catalog if it isn't yet.
310
    :catalog: a catalog object
311
    :index: an index id as string
312
    :indextype: the type of the index as string
313
    :returns: a boolean as True if the element has been indexed and it returns
314
    False otherwise.
315
    """
316
    if index not in catalog.indexes():
317
        try:
318
            if indextype == 'ZCTextIndex':
319
                addZCTextIndex(catalog, index)
320
            else:
321
                catalog.addIndex(index, indextype)
322
            logger.info('Catalog index %s added to %s.' % (index, catalog.id))
323
            return True
324
        except:
325
            logger.error(
326
                'Catalog index %s error while adding to %s.'
327
                % (index, catalog.id))
328
    return False
329
330
331
def _addColumn(cat, col):
332
    """
333
    This function adds a metadata column to the acatalog.
334
    :cat: a catalog object
335
    :col: a column id as string
336
    :returns: a boolean as True if the element has been added and
337
        False otherwise
338
    """
339
    # First check if the metadata column already exists
340
    if col not in cat.schema():
341
        try:
342
            cat.addColumn(col)
343
            logger.info('Column %s added to %s.' % (col, cat.id))
344
            return True
345
        except:
346
            logger.error(
347
                'Catalog column %s error while adding to %s.' % (col, cat.id))
348
    return False
349
350
351
def _delIndex(catalog, index):
352
    """
353
    This function desindexes the index element from the catalog.
354
    :catalog: a catalog object
355
    :index: an index id as string
356
    :returns: a boolean as True if the element has been desindexed and it
357
    returns False otherwise.
358
    """
359
    if index in catalog.indexes():
360
        try:
361
            catalog.delIndex(index)
362
            logger.info(
363
                'Catalog index %s deleted from %s.' % (index, catalog.id))
364
            return True
365
        except:
366
            logger.error(
367
                'Catalog index %s error while deleting from %s.'
368
                % (index, catalog.id))
369
    return False
370
371
372
def _delColumn(cat, col):
373
    """
374
    This function deletes a metadata column of the acatalog.
375
    :cat: a catalog object
376
    :col: a column id as string
377
    :returns: a boolean as True if the element has been removed and
378
        False otherwise
379
    """
380
    # First check if the metadata column already exists
381
    if col in cat.schema():
382
        try:
383
            cat.delColumn(col)
384
            logger.info('Column %s deleted from %s.' % (col, cat.id))
385
            return True
386
        except:
387
            logger.error(
388
                'Catalog column %s error while deleting from %s.'
389
                % (col, cat.id))
390
    return False
391
392
393
def addZCTextIndex(catalog, index_name):
394
395
    if catalog is None:
396
        logger.warning('Could not find the catalog tool.' + catalog)
397
        return
398
399
    # Create lexicon to be able to add ZCTextIndex
400
    wordSplitter = Empty()
401
    wordSplitter.group = 'Word Splitter'
402
    wordSplitter.name = 'Unicode Whitespace splitter'
403
    caseNormalizer = Empty()
404
    caseNormalizer.group = 'Case Normalizer'
405
    caseNormalizer.name = 'Unicode Case Normalizer'
406
    stopWords = Empty()
407
    stopWords.group = 'Stop Words'
408
    stopWords.name = 'Remove listed and single char words'
409
    elem = [wordSplitter, caseNormalizer, stopWords]
410
    zc_extras = Empty()
411
    zc_extras.index_type = 'Okapi BM25 Rank'
412
    zc_extras.lexicon_id = 'Lexicon'
413
414
    try:
415
        catalog.manage_addProduct['ZCTextIndex'].manage_addLexicon('Lexicon',
416
                                                               'Lexicon', elem)
417
    except:
418
        logger.warning('Could not add ZCTextIndex to '+str(catalog))
419
420
    catalog.addIndex(index_name, 'ZCTextIndex', zc_extras)
421
422
423
def _cleanAndRebuildIfNeeded(portal, cleanrebuild):
424
    """
425
    Rebuild the given catalogs.
426
    :portal: the Plone portal object
427
    :cleanrebuild: a list with catalog ids
428
    """
429
    for cat in cleanrebuild:
430
        catalog = getToolByName(portal, cat)
431
        if catalog:
432
            catalog.clearFindAndRebuild()
433
        else:
434
            logger.warning("Catalog '%s' not found" % cat)
435
436
437
class Empty:
438
    """
439
    Just a class to use when we need an object with some attributes to send to
440
    another objects an a parameter.
441
    """
442
    pass
443