source: main/waeup.ikoba/trunk/src/waeup/ikoba/documents/batching.py @ 12408

Last change on this file since 12408 was 12267, checked in by Henrik Bettermann, 10 years ago

Use a baseclass for document batch processors.

  • Property svn:keywords set to Id
File size: 4.9 KB
Line 
1## $Id: batching.py 12267 2014-12-20 15:59:04Z henrik $
2##
3## Copyright (C) 2014 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""Batch processing components for document objects.
19
20Batch processors eat CSV files to add, update or remove large numbers
21of certain kinds of objects at once.
22"""
23import grok
24import unicodecsv as csv  # XXX: csv ops should move to dedicated module.
25from time import time
26from datetime import datetime
27from zope.i18n import translate
28from zope.interface import Interface
29from zope.schema import getFields
30from zope.component import queryUtility, getUtility, createObject
31from zope.event import notify
32from zope.catalog.interfaces import ICatalog
33from hurry.workflow.interfaces import IWorkflowState, IWorkflowInfo
34from waeup.ikoba.interfaces import (
35    IBatchProcessor, FatalCSVError, IObjectConverter, IUserAccount,
36    IObjectHistory, IGNORE_MARKER)
37from waeup.ikoba.interfaces import IIkobaUtils
38from waeup.ikoba.interfaces import MessageFactory as _
39from waeup.ikoba.documents.interfaces import IPDFDocument, IHTMLDocument
40from waeup.ikoba.utils.batching import BatchProcessor
41
42
43class DocumentProcessorBase(BatchProcessor):
44    """A base for batch processors for IDocument objects.
45    """
46    grok.implements(IBatchProcessor)
47    grok.provides(IBatchProcessor)
48    grok.context(Interface)
49    grok.baseclass()
50
51    util_name = None
52    name = None
53    iface = None
54
55    location_fields = ['document_id',]
56    factory_name = None
57
58    additional_fields = ['class_name']
59    additional_headers = ['class_name']
60
61    mode = None
62
63    def parentsExist(self, row, site):
64        return 'documents' in site.keys()
65
66    def entryExists(self, row, site):
67        document_id = row.get('document_id', None)
68        cat = queryUtility(ICatalog, name='documents_catalog')
69        results = list(cat.searchResults(document_id=(document_id, document_id)))
70        if results:
71            return True
72        return False
73
74    def getParent(self, row, site):
75        return site['documents']
76
77    def getEntry(self, row, site):
78        if not self.entryExists(row, site):
79            return None
80        parent = self.getParent(row, site)
81        return parent.get(row['document_id'])
82
83    def addEntry(self, obj, row, site):
84        parent = self.getParent(row, site)
85        parent.addDocument(obj)
86        return
87
88    def delEntry(self, row, site):
89        document = self.getEntry(row, site)
90        parent = self.getParent(row, site)
91        if document is not None:
92            grok.getSite().logger.info(
93                '%s - Document removed' % document.document_id)
94            del parent[document.document_id]
95        return
96
97    def updateEntry(self, obj, row, site, filename):
98        """Update obj to the values given in row.
99        """
100        items_changed = super(DocumentProcessorBase, self).updateEntry(
101            obj, row, site, filename)
102        # Log actions...
103        location_field = self.location_fields[0]
104        grok.getSite().logger.info(
105            '%s - %s - %s - updated: %s'
106            % (self.name, filename, row[location_field], items_changed))
107        return
108
109    def checkConversion(self, row, mode='ignore'):
110        """Validates all values in row.
111        """
112        errs, inv_errs, conv_dict = super(
113            DocumentProcessorBase, self).checkConversion(row, mode=mode)
114        # We need to check if the class_name corresponds with the
115        # processor chosen. This is to avoid accidentally wrong imports.
116        if mode == 'create':
117            class_name = row.get('class_name', None)
118            if class_name != self.factory_name.strip('waeup.'):
119                errs.append(('class_name','wrong processor'))
120        document_id = row.get('document_id', None)
121        return errs, inv_errs, conv_dict
122
123class PDFDocumentProcessor(DocumentProcessorBase):
124    """A batch processor for IPDFDocument objects.
125    """
126    util_name = 'pdfdocumentprocessor'
127    grok.name(util_name)
128
129    name = _('Public PDF Document Processor')
130    iface = IPDFDocument
131
132    factory_name = 'waeup.PDFDocument'
133
134class HTMLDocumentProcessor(PDFDocumentProcessor):
135    """A batch processor for IHTMLDocument objects.
136    """
137    util_name = 'htmldocumentprocessor'
138    grok.name(util_name)
139
140    name = _('Public HTML Document Processor')
141    iface = IHTMLDocument
142
143    factory_name = 'waeup.HTMLDocument'
Note: See TracBrowser for help on using the repository browser.