source: main/waeup.ikoba/trunk/src/waeup/ikoba/documents/batching.py @ 12517

Last change on this file since 12517 was 12410, checked in by Henrik Bettermann, 10 years ago

Add exporter and batch processor for REST documents.

Fix DocumentProcessorBase?.

  • Property svn:keywords set to Id
File size: 5.3 KB
Line 
1## $Id: batching.py 12410 2015-01-07 08:49:38Z henrik $
2##
3## Copyright (C) 2014 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""Batch processing components for document objects.
19
20Batch processors eat CSV files to add, update or remove large numbers
21of certain kinds of objects at once.
22"""
23import grok
24import unicodecsv as csv  # XXX: csv ops should move to dedicated module.
25from time import time
26from datetime import datetime
27from zope.i18n import translate
28from zope.interface import Interface
29from zope.schema import getFields
30from zope.component import queryUtility, getUtility, createObject
31from zope.event import notify
32from zope.catalog.interfaces import ICatalog
33from hurry.workflow.interfaces import IWorkflowState, IWorkflowInfo
34from waeup.ikoba.interfaces import (
35    IBatchProcessor, FatalCSVError, IObjectConverter, IUserAccount,
36    IObjectHistory, IGNORE_MARKER)
37from waeup.ikoba.interfaces import IIkobaUtils
38from waeup.ikoba.interfaces import MessageFactory as _
39from waeup.ikoba.documents.interfaces import (
40    IPDFDocument, IHTMLDocument, IRESTDocument)
41from waeup.ikoba.utils.batching import BatchProcessor
42
43
44class DocumentProcessorBase(BatchProcessor):
45    """A base for batch processors for IDocument objects.
46    """
47    grok.implements(IBatchProcessor)
48    grok.provides(IBatchProcessor)
49    grok.context(Interface)
50    grok.baseclass()
51
52    util_name = None
53    name = None
54    iface = None
55
56    location_fields = ['document_id',]
57    additional_fields = ['class_name',]
58
59    factory_name = None
60
61    mode = None
62
63    @property
64    def available_fields(self):
65        return sorted(list(set(
66                    self.additional_fields +
67                    getFields(self.iface).keys())))
68
69    def parentsExist(self, row, site):
70        return 'documents' in site.keys()
71
72    def entryExists(self, row, site):
73        document_id = row.get('document_id', None)
74        cat = queryUtility(ICatalog, name='documents_catalog')
75        results = list(cat.searchResults(document_id=(document_id, document_id)))
76        if results:
77            return True
78        return False
79
80    def getParent(self, row, site):
81        return site['documents']
82
83    def getEntry(self, row, site):
84        if not self.entryExists(row, site):
85            return None
86        parent = self.getParent(row, site)
87        return parent.get(row['document_id'])
88
89    def addEntry(self, obj, row, site):
90        parent = self.getParent(row, site)
91        parent.addDocument(obj)
92        return
93
94    def delEntry(self, row, site):
95        document = self.getEntry(row, site)
96        parent = self.getParent(row, site)
97        if document is not None:
98            grok.getSite().logger.info(
99                '%s - Document removed' % document.document_id)
100            del parent[document.document_id]
101        return
102
103    def updateEntry(self, obj, row, site, filename):
104        """Update obj to the values given in row.
105        """
106        items_changed = super(DocumentProcessorBase, self).updateEntry(
107            obj, row, site, filename)
108        # Log actions...
109        location_field = self.location_fields[0]
110        grok.getSite().logger.info(
111            '%s - %s - %s - updated: %s'
112            % (self.name, filename, row[location_field], items_changed))
113        return
114
115    def checkConversion(self, row, mode='ignore'):
116        """Validates all values in row.
117        """
118        errs, inv_errs, conv_dict = super(
119            DocumentProcessorBase, self).checkConversion(row, mode=mode)
120        # We need to check if the class_name corresponds with the
121        # processor chosen. This is to avoid accidentally wrong imports.
122        if mode == 'create':
123            class_name = row.get('class_name', None)
124            if class_name != self.factory_name.strip('waeup.'):
125                errs.append(('class_name','wrong processor'))
126        return errs, inv_errs, conv_dict
127
128
129class PDFDocumentProcessor(DocumentProcessorBase):
130    """A batch processor for IPDFDocument objects.
131    """
132    util_name = 'pdfdocumentprocessor'
133    grok.name(util_name)
134
135    name = _('Public PDF Document Processor')
136    iface = IPDFDocument
137
138    factory_name = 'waeup.PDFDocument'
139
140
141class HTMLDocumentProcessor(DocumentProcessorBase):
142    """A batch processor for IHTMLDocument objects.
143    """
144    util_name = 'htmldocumentprocessor'
145    grok.name(util_name)
146
147    name = _('Public HTML Document Processor')
148    iface = IHTMLDocument
149
150    factory_name = 'waeup.HTMLDocument'
151
152
153class RESTDocumentProcessor(DocumentProcessorBase):
154    """A batch processor for IRESTDocument objects.
155    """
156    util_name = 'restdocumentprocessor'
157    grok.name(util_name)
158
159    name = _('Public REST Document Processor')
160    iface = IRESTDocument
161
162    factory_name = 'waeup.RESTDocument'
Note: See TracBrowser for help on using the repository browser.