Changeset 15739 for main/waeup.kofa


Ignore:
Timestamp:
2 Nov 2019, 01:16:36 (5 years ago)
Author:
uli
Message:

Add helpers to reindex catalogs.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/waeup.kofa/trunk/src/waeup/kofa/utils/helpers.py

    r15627 r15739  
    3535from zope.interface import implementedBy
    3636from zope.interface.interface import Method, Attribute
     37from zope.intid.interfaces import IIntIds
    3738from zope.schema import getFieldNames
    3839from zope.schema.fieldproperty import FieldProperty
     
    4142from zope.pluggableauth.interfaces import IAuthenticatorPlugin
    4243from zope.formlib.widget import renderElement
     44
    4345
    4446BUFSIZE = 8 * 1024
     
    918920    parser.feed(html_code)
    919921    return parser.form_vars
     922
     923
     924def get_catalog_docids(cat):
     925    """Get all docids for a given catalog `cat`.
     926
     927    Catalogs store the ids of objects they index. Get all of these object ids.
     928    This function works at least for catalogs that provide field- and text
     929    indexes only.
     930    """
     931    result = []
     932    for index in cat.values():
     933        try:
     934            # FieldIndexes
     935            result.extend(list(index._rev_index.keys()))
     936        except AttributeError:
     937            # TextIndexes
     938            result.extend(list(index.index._docwords.keys()))
     939    return set(result)
     940
     941
     942def reindex_cat(cat):
     943    """Reindex all objects stored in a catalog `cat`.
     944
     945    Regular catalogs try to reindex all stored object ids of a ZODB when asked
     946    to reindex all contents. That can be overkill. This function reindexes only
     947    those objects, that were already stored in a catalog. It was tested for
     948    catalogs with at least 650000 objects.
     949
     950    Please note, that reindexing catalgos, can take a considerable amount of
     951    time. 100.000 objects took about 12 minutes to reindex on a 16 GB machine.
     952    """
     953    d1 = datetime.datetime.now()
     954    print("Collecting doc ids...")
     955    uidutil = getUtility(IIntIds, context=cat)
     956    uids = get_catalog_docids(cat)
     957    print("Found %s entries..." % len(uids))
     958    for docid in uids:
     959        ob = uidutil.getObject(docid)
     960        cat.index_doc(docid, ob)
     961    d2 = datetime.datetime.now()
     962    print("Finished. %s" % (d2 - d1))
Note: See TracChangeset for help on using the changeset viewer.