## $Id: batching.py 12094 2014-11-30 08:12:27Z henrik $ ## ## Copyright (C) 2014 Uli Fouquet & Henrik Bettermann ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ## """Batch processing components for customer objects. Batch processors eat CSV files to add, update or remove large numbers of certain kinds of objects at once. Here we define the processors for customers specific objects like customers and subobjects. """ import grok import unicodecsv as csv # XXX: csv ops should move to dedicated module. from time import time from datetime import datetime from zope.i18n import translate from zope.interface import Interface from zope.schema import getFields from zope.component import queryUtility, getUtility, createObject from zope.event import notify from zope.catalog.interfaces import ICatalog from hurry.workflow.interfaces import IWorkflowState, IWorkflowInfo from waeup.ikoba.interfaces import ( IBatchProcessor, FatalCSVError, IObjectConverter, IUserAccount, IObjectHistory, IGNORE_MARKER) from waeup.ikoba.interfaces import IIkobaUtils from waeup.ikoba.interfaces import MessageFactory as _ from waeup.ikoba.documents.utils import generate_document_id from waeup.ikoba.customers.interfaces import ( ICustomer, ICustomerUpdateByRegNo, ICustomerDocument, IApplication) from waeup.ikoba.customers.workflow import ( IMPORTABLE_REGISTRATION_STATES, IMPORTABLE_REGISTRATION_TRANSITIONS, IMPORTABLE_APPLICATION_STATES, IMPORTABLE_APPLICATION_TRANSITIONS) from waeup.ikoba.utils.batching import BatchProcessor class CustomerProcessor(BatchProcessor): """A batch processor for ICustomer objects. """ grok.implements(IBatchProcessor) grok.provides(IBatchProcessor) grok.context(Interface) util_name = 'customerprocessor' grok.name(util_name) name = _('Customer Processor') iface = ICustomer iface_byregnumber = ICustomerUpdateByRegNo location_fields = [] factory_name = 'waeup.Customer' @property def available_fields(self): fields = getFields(self.iface) return sorted(list(set( ['customer_id', 'reg_number', 'password', 'state', 'transition'] + fields.keys()))) def checkHeaders(self, headerfields, mode='create'): if 'state' in headerfields and 'transition' in headerfields: raise FatalCSVError( "State and transition can't be imported at the same time!") if not 'reg_number' in headerfields and not 'customer_id' \ in headerfields: raise FatalCSVError( "Need at least columns customer_id or reg_number " + "for import!") if mode == 'create': for field in self.required_fields: if not field in headerfields: raise FatalCSVError( "Need at least columns %s for import!" % ', '.join(["'%s'" % x for x in self.required_fields])) # Check for fields to be ignored... not_ignored_fields = [x for x in headerfields if not x.startswith('--')] if len(set(not_ignored_fields)) < len(not_ignored_fields): raise FatalCSVError( "Double headers: each column name may only appear once.") return True def parentsExist(self, row, site): return 'customers' in site.keys() def getLocator(self, row): if row.get('customer_id', None) not in (None, IGNORE_MARKER): return 'customer_id' elif row.get('reg_number', None) not in (None, IGNORE_MARKER): return 'reg_number' else: return None # The entry never exists in create mode. def entryExists(self, row, site): return self.getEntry(row, site) is not None def getParent(self, row, site): return site['customers'] def getEntry(self, row, site): if not 'customers' in site.keys(): return None if self.getLocator(row) == 'customer_id': if row['customer_id'] in site['customers']: customer = site['customers'][row['customer_id']] return customer elif self.getLocator(row) == 'reg_number': reg_number = row['reg_number'] cat = queryUtility(ICatalog, name='customers_catalog') results = list( cat.searchResults(reg_number=(reg_number, reg_number))) if results: return results[0] return None def addEntry(self, obj, row, site): parent = self.getParent(row, site) parent.addCustomer(obj) # Reset _curr_cust_id if customer_id has been imported if self.getLocator(row) == 'customer_id': parent._curr_cust_id -= 1 # We have to log this if state is provided. If not, # logging is done by the event handler handle_customer_added if 'state' in row: parent.logger.info('%s - Customer record created' % obj.customer_id) history = IObjectHistory(obj) history.addMessage(_('Customer record created')) return def delEntry(self, row, site): customer = self.getEntry(row, site) if customer is not None: parent = self.getParent(row, site) parent.logger.info('%s - Customer removed' % customer.customer_id) del parent[customer.customer_id] pass def checkUpdateRequirements(self, obj, row, site): """Checks requirements the object must fulfill when being updated. This method is not used in case of deleting or adding objects. Returns error messages as strings in case of requirement problems. """ transition = row.get('transition', IGNORE_MARKER) if transition not in (IGNORE_MARKER, ''): allowed_transitions = IWorkflowInfo(obj).getManualTransitionIds() if transition not in allowed_transitions: return 'Transition not allowed.' return None def updateEntry(self, obj, row, site, filename): """Update obj to the values given in row. """ items_changed = '' # Remove customer_id from row if empty if 'customer_id' in row and row['customer_id'] in (None, IGNORE_MARKER): row.pop('customer_id') # Update password # XXX: Take DELETION_MARKER into consideration if 'password' in row: passwd = row.get('password', IGNORE_MARKER) if passwd not in ('', IGNORE_MARKER): if passwd.startswith('{SSHA}'): # already encrypted password obj.password = passwd else: # not yet encrypted password IUserAccount(obj).setPassword(passwd) items_changed += ('%s=%s, ' % ('password', passwd)) row.pop('password') # Update registration state if 'state' in row: state = row.get('state', IGNORE_MARKER) if state not in (IGNORE_MARKER, ''): value = row['state'] IWorkflowState(obj).setState(value) msg = _("State '${a}' set", mapping={'a': value}) history = IObjectHistory(obj) history.addMessage(msg) items_changed += ('%s=%s, ' % ('state', state)) row.pop('state') if 'transition' in row: transition = row.get('transition', IGNORE_MARKER) if transition not in (IGNORE_MARKER, ''): value = row['transition'] IWorkflowInfo(obj).fireTransition(value) items_changed += ('%s=%s, ' % ('transition', transition)) row.pop('transition') # apply other values... items_changed += super(CustomerProcessor, self).updateEntry( obj, row, site, filename) # Log actions... parent = self.getParent(row, site) if hasattr(obj, 'customer_id'): # Update mode: the customer exists and we can get the customer_id. # Create mode: the record contains the customer_id parent.logger.info( '%s - %s - %s - updated: %s' % (self.name, filename, obj.customer_id, items_changed)) else: # Create mode: the customer does not yet exist # XXX: It seems that this never happens because customer_id # is always set. parent.logger.info( '%s - %s - %s - imported: %s' % (self.name, filename, obj.customer_id, items_changed)) return items_changed def getMapping(self, path, headerfields, mode): """Get a mapping from CSV file headerfields to actually used fieldnames. """ result = dict() reader = csv.reader(open(path, 'rb')) raw_header = reader.next() for num, field in enumerate(headerfields): if field not in ['customer_id', 'reg_number'] and mode == 'remove': continue if field == u'--IGNORE--': # Skip ignored columns in failed and finished data files. continue result[raw_header[num]] = field return result def checkConversion(self, row, mode='create'): """Validates all values in row. """ iface = self.iface if mode in ['update', 'remove']: if self.getLocator(row) == 'reg_number': iface = self.iface_byregnumber converter = IObjectConverter(iface) errs, inv_errs, conv_dict = converter.fromStringDict( row, self.factory_name, mode=mode) if 'transition' in row: if row['transition'] not in IMPORTABLE_REGISTRATION_TRANSITIONS: if row['transition'] not in (IGNORE_MARKER, ''): errs.append(('transition', 'not allowed')) if 'state' in row: if row['state'] not in IMPORTABLE_REGISTRATION_STATES: if row['state'] not in (IGNORE_MARKER, ''): errs.append(('state', 'not allowed')) else: # State is an attribute of Customer and must not # be changed if empty. conv_dict['state'] = IGNORE_MARKER try: # Correct cust_id counter. As the IConverter for customers # creates customer objects that are not used afterwards, we # have to fix the site-wide customer_id counter. site = grok.getSite() customers = site['customers'] customers._curr_cust_id -= 1 except (KeyError, TypeError, AttributeError): pass return errs, inv_errs, conv_dict class CustomerProcessorBase(BatchProcessor): """A base for customer subitem processors. Helps reducing redundancy. """ grok.baseclass() # additional available fields # beside 'customer_id', 'reg_number' and 'matric_number' additional_fields = [] #: header fields additionally required additional_headers = [] @property def available_fields(self): fields = ['customer_id', 'reg_number'] + self.additional_fields return sorted(list(set(fields + getFields( self.iface).keys()))) def checkHeaders(self, headerfields, mode='ignore'): if not 'reg_number' in headerfields and not 'customer_id' \ in headerfields: raise FatalCSVError( "Need at least columns customer_id " + "or reg_number for import!") for name in self.additional_headers: if not name in headerfields: raise FatalCSVError( "Need %s for import!" % name) # Check for fields to be ignored... not_ignored_fields = [x for x in headerfields if not x.startswith('--')] if len(set(not_ignored_fields)) < len(not_ignored_fields): raise FatalCSVError( "Double headers: each column name may only appear once.") return True def _getCustomer(self, row, site): NON_VALUES = ['', IGNORE_MARKER] if not 'customers' in site.keys(): return None if row.get('customer_id', '') not in NON_VALUES: if row['customer_id'] in site['customers']: customer = site['customers'][row['customer_id']] return customer elif row.get('reg_number', '') not in NON_VALUES: reg_number = row['reg_number'] cat = queryUtility(ICatalog, name='customers_catalog') results = list( cat.searchResults(reg_number=(reg_number, reg_number))) if results: return results[0] return None def parentsExist(self, row, site): return self.getParent(row, site) is not None def entryExists(self, row, site): return self.getEntry(row, site) is not None def checkConversion(self, row, mode='ignore'): """Validates all values in row. """ converter = IObjectConverter(self.iface) errs, inv_errs, conv_dict = converter.fromStringDict( row, self.factory_name, mode=mode) return errs, inv_errs, conv_dict def getMapping(self, path, headerfields, mode): """Get a mapping from CSV file headerfields to actually used fieldnames. """ result = dict() reader = csv.reader(open(path, 'rb')) raw_header = reader.next() for num, field in enumerate(headerfields): if field not in ['customer_id', 'reg_number', 'document_id', 'code', 'level' ] and mode == 'remove': continue if field == u'--IGNORE--': # Skip ignored columns in failed and finished data files. continue result[raw_header[num]] = field return result class CustomerDocumentProcessor(CustomerProcessorBase): """A batch processor for ICustomerDocument objects. """ grok.implements(IBatchProcessor) grok.provides(IBatchProcessor) grok.context(Interface) util_name = 'customerdocumentprocessor' grok.name(util_name) name = _('CustomerDocument Processor') iface = ICustomerDocument factory_name = 'waeup.CustomerSampleDocument' location_fields = [] additional_fields = ['document_id', 'class_name'] additional_headers = ['class_name'] def checkHeaders(self, headerfields, mode='ignore'): super(CustomerDocumentProcessor, self).checkHeaders(headerfields) if mode in ('update', 'remove') and not 'document_id' in headerfields: raise FatalCSVError( "Need document_id for import in update and remove modes!") return True def getParent(self, row, site): customer = self._getCustomer(row, site) if customer is None: return None return customer['documents'] def getEntry(self, row, site): documents = self.getParent(row, site) if documents is None: return None document_id = row.get('document_id', None) if document_id is None: return None entry = documents.get(document_id) return entry def updateEntry(self, obj, row, site, filename): """Update obj to the values given in row. """ items_changed = super(CustomerDocumentProcessor, self).updateEntry( obj, row, site, filename) customer = self.getParent(row, site).__parent__ customer.__parent__.logger.info( '%s - %s - %s - updated: %s' % (self.name, filename, customer.customer_id, items_changed)) return def addEntry(self, obj, row, site): parent = self.getParent(row, site) document_id = row['document_id'].strip('#') parent[document_id] = obj return def delEntry(self, row, site): document = self.getEntry(row, site) parent = self.getParent(row, site) if document is not None: customer = self._getCustomer(row, site) customer.__parent__.logger.info('%s - Document removed: %s' % (customer.customer_id, document.document_id)) del parent[document.document_id] return def checkConversion(self, row, mode='ignore'): """Validates all values in row. """ errs, inv_errs, conv_dict = super( CustomerDocumentProcessor, self).checkConversion(row, mode=mode) # We need to check if the class_name corresponds with the # processor chosen. This is to avoid accidentally wrong imports. if mode != 'remove': class_name = row.get('class_name', None) if class_name != self.factory_name.strip('waeup.'): errs.append(('class_name','wrong processor')) # We have to check document_id. document_id = row.get('document_id', None) if mode == 'create': if not document_id: document_id = generate_document_id() conv_dict['document_id'] = document_id return errs, inv_errs, conv_dict cat = queryUtility(ICatalog, name='documents_catalog') results = list( cat.searchResults(document_id=(document_id, document_id))) if results: # document_id must not exist. errs.append(('document_id','id exists')) else: if not document_id.startswith('d'): errs.append(('document_id','invalid format')) return errs, inv_errs, conv_dict class ApplicationProcessor(CustomerProcessorBase): """A batch processor for IApplication objects. """ grok.implements(IBatchProcessor) grok.provides(IBatchProcessor) grok.context(Interface) util_name = 'applicationprocessor' grok.name(util_name) name = _('Application Processor') iface = IApplication factory_name = 'waeup.SampleApplication' location_fields = [] additional_fields = ['application_id', 'class_name'] additional_headers = ['class_name'] def checkHeaders(self, headerfields, mode='ignore'): super(ApplicationProcessor, self).checkHeaders(headerfields) if mode in ('update', 'remove') and not 'application_id' in headerfields: raise FatalCSVError( "Need application_id for import in update and remove modes!") return True def getParent(self, row, site): customer = self._getCustomer(row, site) if customer is None: return None return customer['applications'] def getEntry(self, row, site): applications = self.getParent(row, site) if applications is None: return None application_id = row.get('application_id', None) if application_id is None: return None entry = applications.get(application_id) return entry def updateEntry(self, obj, row, site, filename): """Update obj to the values given in row. """ items_changed = super(ApplicationProcessor, self).updateEntry( obj, row, site, filename) customer = self.getParent(row, site).__parent__ customer.__parent__.logger.info( '%s - %s - %s - updated: %s' % (self.name, filename, customer.customer_id, items_changed)) return def addEntry(self, obj, row, site): parent = self.getParent(row, site) application_id = row['application_id'].strip('#') parent[application_id] = obj return def delEntry(self, row, site): application = self.getEntry(row, site) parent = self.getParent(row, site) if application is not None: customer = self._getCustomer(row, site) customer.__parent__.logger.info('%s - Application removed: %s' % (customer.customer_id, application.application_id)) del parent[application.application_id] return def checkConversion(self, row, mode='ignore'): """Validates all values in row. """ errs, inv_errs, conv_dict = super( ApplicationProcessor, self).checkConversion(row, mode=mode) # We need to check if the class_name corresponds with the # processor chosen. This is to avoid accidentally wrong imports. if mode != 'remove': class_name = row.get('class_name', None) if class_name != self.factory_name.strip('waeup.'): errs.append(('class_name','wrong processor')) # We have to check application_id. application_id = row.get('application_id', None) if mode == 'create': if not application_id: application_id = generate_application_id() conv_dict['application_id'] = application_id return errs, inv_errs, conv_dict cat = queryUtility(ICatalog, name='applications_catalog') results = list( cat.searchResults(application_id=(application_id, application_id))) if results: # application_id must not exist. errs.append(('application_id','id exists')) else: if not application_id.startswith('a'): errs.append(('application_id','invalid format')) return errs, inv_errs, conv_dict