source: main/waeup.kofa/trunk/src/waeup/kofa/documents/batching.py @ 17774

Last change on this file since 17774 was 17763, checked in by Henrik Bettermann, 8 months ago

DocumentProcessorBase?: import state and history

  • Property svn:keywords set to Id
File size: 6.6 KB
Line 
1## $Id: batching.py 17763 2024-05-11 19:55:45Z henrik $
2##
3## Copyright (C) 2014 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""Batch processing components for document objects.
19
20Batch processors eat CSV files to add, update or remove large numbers
21of certain kinds of objects at once.
22"""
23import grok
24import unicodecsv as csv  # XXX: csv ops should move to dedicated module.
25from time import time
26from ast import literal_eval
27from datetime import datetime
28from zope.i18n import translate
29from zope.interface import Interface
30from zope.schema import getFields
31from zope.component import queryUtility, getUtility, createObject
32from zope.event import notify
33from zope.catalog.interfaces import ICatalog
34from hurry.workflow.interfaces import IWorkflowState, IWorkflowInfo
35from waeup.kofa.interfaces import (IObjectHistory,
36    IBatchProcessor, FatalCSVError, IObjectConverter, IUserAccount,
37    IGNORE_MARKER)
38from waeup.kofa.interfaces import IKofaUtils
39from waeup.kofa.interfaces import MessageFactory as _
40from waeup.kofa.documents.interfaces import (
41    IPDFDocument, IHTMLDocument, IRESTDocument)
42from waeup.kofa.utils.batching import BatchProcessor
43
44
45class DocumentProcessorBase(BatchProcessor):
46    """This is the base class for all kinds of document processors.
47    The `checkConversion` method checks whether `class_name` in a row
48    corresponds with the processor chosen. This is to avoid accidentally
49    wrong imports.
50
51    Document processors do not import workflow states or transitions which
52    means, all imported documents will be unpublished after batch creation.
53    In other words, publishing can't be done by import, it has do be done
54    via the UI.
55    """
56    grok.implements(IBatchProcessor)
57    grok.provides(IBatchProcessor)
58    grok.context(Interface)
59    grok.baseclass()
60
61    util_name = None
62    name = None
63    iface = None
64
65    location_fields = ['document_id',]
66    additional_fields = ['class_name', 'state', 'history']
67
68    factory_name = None
69
70    mode = None
71
72    @property
73    def available_fields(self):
74        return sorted(list(set(
75                    self.additional_fields +
76                    getFields(self.iface).keys())))
77
78    def parentsExist(self, row, site):
79        return 'documents' in site.keys()
80
81    def entryExists(self, row, site):
82        document_id = row.get('document_id', None)
83        cat = queryUtility(ICatalog, name='documents_catalog')
84        results = list(cat.searchResults(document_id=(document_id, document_id)))
85        if results:
86            return True
87        return False
88
89    def getParent(self, row, site):
90        return site['documents']
91
92    def getEntry(self, row, site):
93        if not self.entryExists(row, site):
94            return None
95        parent = self.getParent(row, site)
96        return parent.get(row['document_id'])
97
98    def addEntry(self, obj, row, site):
99        parent = self.getParent(row, site)
100        parent.addDocument(obj)
101        return
102
103    def delEntry(self, row, site):
104        document = self.getEntry(row, site)
105        parent = self.getParent(row, site)
106        if document is not None:
107            grok.getSite().logger.info(
108                '%s - Document removed' % document.document_id)
109            del parent[document.document_id]
110        return
111
112    def updateEntry(self, obj, row, site, filename):
113        """Update obj to the values given in row.
114        """
115        items_changed = super(DocumentProcessorBase, self).updateEntry(
116            obj, row, site, filename)
117        # Replace entire history
118        if 'history' in row:
119            new_history = row.get('history', IGNORE_MARKER)
120            if new_history not in (IGNORE_MARKER, ''):
121                history = IObjectHistory(obj)
122                history._annotations[
123                    history.history_key] = literal_eval(new_history)
124                items_changed += ('%s=%s, ' % ('history', new_history))
125            row.pop('history')
126        # Update state
127        if 'state' in row:
128            state = row.get('state', IGNORE_MARKER)
129            if state not in (IGNORE_MARKER, ''):
130                IWorkflowState(obj).setState(state)
131                msg = _("State '${a}' set", mapping = {'a':state})
132                history = IObjectHistory(obj)
133                history.addMessage(msg)
134                items_changed += ('%s=%s, ' % ('state', state))
135            row.pop('state')
136        # Log actions...
137        location_field = self.location_fields[0]
138        grok.getSite().logger.info(
139            '%s - %s - %s - updated: %s'
140            % (self.name, filename, row[location_field], items_changed))
141        return
142
143    def checkConversion(self, row, mode='ignore'):
144        """Validates all values in row.
145        """
146        errs, inv_errs, conv_dict = super(
147            DocumentProcessorBase, self).checkConversion(row, mode=mode)
148        # We need to check if the class_name corresponds with the
149        # processor chosen. This is to avoid accidentally wrong imports.
150        if mode == 'create':
151            class_name = row.get('class_name', None)
152            if class_name != self.factory_name.strip('waeup.'):
153                errs.append(('class_name','wrong processor'))
154        return errs, inv_errs, conv_dict
155
156
157class PDFDocumentProcessor(DocumentProcessorBase):
158    """A batch processor for IPDFDocument objects.
159    """
160    util_name = 'pdfdocumentprocessor'
161    grok.name(util_name)
162
163    name = _('Public PDF Document Processor')
164    iface = IPDFDocument
165
166    factory_name = 'waeup.PDFDocument'
167
168
169class HTMLDocumentProcessor(DocumentProcessorBase):
170    """A batch processor for IHTMLDocument objects.
171    """
172    util_name = 'htmldocumentprocessor'
173    grok.name(util_name)
174
175    name = _('Public HTML Document Processor')
176    iface = IHTMLDocument
177
178    factory_name = 'waeup.HTMLDocument'
179
180
181class RESTDocumentProcessor(DocumentProcessorBase):
182    """A batch processor for IRESTDocument objects.
183    """
184    util_name = 'restdocumentprocessor'
185    grok.name(util_name)
186
187    name = _('Public REST Document Processor')
188    iface = IRESTDocument
189
190    factory_name = 'waeup.RESTDocument'
Note: See TracBrowser for help on using the repository browser.