## $Id: batching.py 7811 2012-03-08 19:00:51Z uli $ ## ## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ## """Batch processing components for student objects. Batch processors eat CSV files to add, update or remove large numbers of certain kinds of objects at once. Here we define the processors for students specific objects like students, studycourses, payment tickets and accommodation tickets. """ import grok import csv from zope.interface import Interface from zope.schema import getFields from zope.component import queryUtility, getUtility from zope.event import notify from zope.catalog.interfaces import ICatalog from hurry.workflow.interfaces import IWorkflowState from waeup.kofa.interfaces import ( IBatchProcessor, FatalCSVError, IObjectConverter, IUserAccount, IObjectHistory) from waeup.kofa.students.interfaces import ( IStudent, IStudentStudyCourse, IStudentUpdateByRegNo, IStudentUpdateByMatricNo, IStudentStudyLevel, ICourseTicket, IStudentOnlinePayment) from waeup.kofa.students.workflow import IMPORTABLE_STATES from waeup.kofa.utils.batching import BatchProcessor class StudentProcessor(BatchProcessor): """A batch processor for IStudent objects. """ grok.implements(IBatchProcessor) grok.provides(IBatchProcessor) grok.context(Interface) util_name = 'studentimporter' grok.name(util_name) name = u'Student Importer' iface = IStudent location_fields = [] factory_name = 'waeup.Student' mode = None @property def available_fields(self): return sorted(list(set( ['student_id','reg_number','matric_number', 'password', 'reg_state'] + getFields( self.iface).keys()))) def checkHeaders(self, headerfields, mode='create'): if not 'reg_number' in headerfields and not 'student_id' \ in headerfields and not 'matric_number' in headerfields: raise FatalCSVError( "Need at least columns student_id or reg_number " + "or matric_number for import!") if mode == 'create': for field in self.required_fields: if not field in headerfields: raise FatalCSVError( "Need at least columns %s for import!" % ', '.join(["'%s'" % x for x in self.required_fields])) # Check for fields to be ignored... not_ignored_fields = [x for x in headerfields if not x.startswith('--')] if len(set(not_ignored_fields)) < len(not_ignored_fields): raise FatalCSVError( "Double headers: each column name may only appear once.") return True def parentsExist(self, row, site): return 'students' in site.keys() def getLocator(self, row): if row.get('student_id',None): return 'student_id' elif row.get('reg_number',None): return 'reg_number' elif row.get('matric_number',None): return 'matric_number' else: return None # The entry never exists in create mode. def entryExists(self, row, site): return self.getEntry(row, site) is not None def getParent(self, row, site): return site['students'] def getEntry(self, row, site): if not 'students' in site.keys(): return None if self.getLocator(row) == 'student_id': if row['student_id'] in site['students']: student = site['students'][row['student_id']] return student elif self.getLocator(row) == 'reg_number': reg_number = row['reg_number'] cat = queryUtility(ICatalog, name='students_catalog') results = list( cat.searchResults(reg_number=(reg_number, reg_number))) if results: return results[0] elif self.getLocator(row) == 'matric_number': matric_number = row['matric_number'] cat = queryUtility(ICatalog, name='students_catalog') results = list( cat.searchResults(matric_number=(matric_number, matric_number))) if results: return results[0] return None def addEntry(self, obj, row, site): parent = self.getParent(row, site) parent.addStudent(obj) # We don't need to log this because is already done by the workflow # transition even handler. history = IObjectHistory(obj) history.addMessage('Student record imported') return def delEntry(self, row, site): student = self.getEntry(row, site) if student is not None: parent = self.getParent(row, site) parent.logger.info('%s - Student removed' % student.student_id) del parent[student.student_id] pass def updateEntry(self, obj, row, site): """Update obj to the values given in row. """ # Remove student_id from row if empty if row.has_key('student_id') and row['student_id'] is None: row.pop('student_id') items_changed = '' for key, value in row.items(): # Set student password and all fields declared in interface. if key == 'password' and value != '': IUserAccount(obj).setPassword(value) elif key == 'reg_state': IWorkflowState(obj).setState(value) msg = "State '%s' set" % value history = IObjectHistory(obj) history.addMessage(msg) elif hasattr(obj, key): setattr(obj, key, value) items_changed += '%s=%s, ' % (key,value) parent = self.getParent(row, site) if hasattr(obj,'student_id'): # Update mode: the student exists and we can get the student_id parent.logger.info( '%s - Student record updated: %s' % (obj.student_id, items_changed)) else: # Create mode: the student does not yet exist parent.logger.info('Student data imported: %s' % items_changed) return def getMapping(self, path, headerfields, mode): """Get a mapping from CSV file headerfields to actually used fieldnames. """ result = dict() reader = csv.reader(open(path, 'rb')) raw_header = reader.next() for num, field in enumerate(headerfields): if field not in [ 'student_id', 'reg_number', 'matric_number'] and mode == 'remove': continue if field == u'--IGNORE--': # Skip ignored columns in failed and finished data files. continue result[raw_header[num]] = field return result def checkConversion(self, row, mode='create'): """Validates all values in row. """ iface = self.iface if mode in ['update', 'remove']: if self.getLocator(row) == 'reg_number': iface = IStudentUpdateByRegNo elif self.getLocator(row) == 'matric_number': iface = IStudentUpdateByMatricNo converter = IObjectConverter(iface) errs, inv_errs, conv_dict = converter.fromStringDict( row, self.factory_name) if row.has_key('reg_state') and \ not row['reg_state'] in IMPORTABLE_STATES: if row['reg_state'] != '': errs.append(('reg_state','not allowed')) else: errs.append(('reg_state','no value provided')) return errs, inv_errs, conv_dict class StudentStudyCourseProcessor(BatchProcessor): """A batch processor for IStudentStudyCourse objects. """ grok.implements(IBatchProcessor) grok.provides(IBatchProcessor) grok.context(Interface) util_name = 'studycourseupdater' grok.name(util_name) name = u'StudentStudyCourse Importer (update only)' iface = IStudentStudyCourse factory_name = 'waeup.StudentStudyCourse' location_fields = [] mode = None @property def available_fields(self): return sorted(list(set( ['student_id','reg_number','matric_number'] + getFields( self.iface).keys()))) def checkHeaders(self, headerfields, mode='ignore'): if not 'reg_number' in headerfields and not 'student_id' \ in headerfields and not 'matric_number' in headerfields: raise FatalCSVError( "Need at least columns student_id " + "or reg_number or matric_number for import!") # Check for fields to be ignored... not_ignored_fields = [x for x in headerfields if not x.startswith('--')] if len(set(not_ignored_fields)) < len(not_ignored_fields): raise FatalCSVError( "Double headers: each column name may only appear once.") return True def getParent(self, row, site): if not 'students' in site.keys(): return None if 'student_id' in row.keys() and row['student_id']: if row['student_id'] in site['students']: student = site['students'][row['student_id']] return student elif 'reg_number' in row.keys() and row['reg_number']: reg_number = row['reg_number'] cat = queryUtility(ICatalog, name='students_catalog') results = list( cat.searchResults(reg_number=(reg_number, reg_number))) if results: return results[0] elif 'matric_number' in row.keys() and row['matric_number']: matric_number = row['matric_number'] cat = queryUtility(ICatalog, name='students_catalog') results = list( cat.searchResults(matric_number=(matric_number, matric_number))) if results: return results[0] return None def parentsExist(self, row, site): return self.getParent(row, site) is not None def entryExists(self, row, site): return self.getEntry(row, site) is not None def getEntry(self, row, site): student = self.getParent(row, site) if student is None: return None return student.get('studycourse') def updateEntry(self, obj, row, site): """Update obj to the values given in row. """ items_changed = '' for key, value in row.items(): # Skip fields not declared in interface. if hasattr(obj, key): setattr(obj, key, value) if key == 'certificate': value = value.code items_changed += '%s=%s, ' % (key,value) parent = self.getParent(row, site) parent.__parent__.logger.info( '%s - Study course updated: %s' % (parent.student_id, items_changed)) # Update the students_catalog notify(grok.ObjectModifiedEvent(obj.__parent__)) return def checkConversion(self, row, mode='ignore'): """Validates all values in row. """ converter = IObjectConverter(self.iface) errs, inv_errs, conv_dict = converter.fromStringDict( row, self.factory_name) # We have to check if current_level is in range of certificate. # This is not done by the converter. This kind of conversion # checking does only work if a combination of certificate and # current_level is provided. if conv_dict.has_key('certificate'): certificate = conv_dict['certificate'] start_level = certificate.start_level end_level = certificate.end_level if conv_dict['current_level'] < start_level or \ conv_dict['current_level'] > end_level+120: errs.append(('current_level','not in range')) return errs, inv_errs, conv_dict class StudentStudyLevelProcessor(BatchProcessor): """A batch processor for IStudentStudyLevel objects. """ grok.implements(IBatchProcessor) grok.provides(IBatchProcessor) grok.context(Interface) util_name = 'studylevelimporter' grok.name(util_name) name = u'StudentStudyLevel Importer' iface = IStudentStudyLevel factory_name = 'waeup.StudentStudyLevel' location_fields = [] mode = None @property def available_fields(self): return sorted(list(set( ['student_id','reg_number','matric_number','level'] + getFields( self.iface).keys()))) def checkHeaders(self, headerfields, mode='ignore'): if not 'reg_number' in headerfields and not 'student_id' \ in headerfields and not 'matric_number' in headerfields: raise FatalCSVError( "Need at least columns student_id " + "or reg_number or matric_number for import!") if not 'level' in headerfields: raise FatalCSVError( "Need level for import!") # Check for fields to be ignored... not_ignored_fields = [x for x in headerfields if not x.startswith('--')] if len(set(not_ignored_fields)) < len(not_ignored_fields): raise FatalCSVError( "Double headers: each column name may only appear once.") return True def getParent(self, row, site): if not 'students' in site.keys(): return None if 'student_id' in row.keys() and row['student_id']: if row['student_id'] in site['students']: student = site['students'][row['student_id']] return student['studycourse'] elif 'reg_number' in row.keys() and row['reg_number']: reg_number = row['reg_number'] cat = queryUtility(ICatalog, name='students_catalog') results = list( cat.searchResults(reg_number=(reg_number, reg_number))) if results: return results[0]['studycourse'] elif 'matric_number' in row.keys() and row['matric_number']: matric_number = row['matric_number'] cat = queryUtility(ICatalog, name='students_catalog') results = list( cat.searchResults(matric_number=(matric_number, matric_number))) if results: return results[0]['studycourse'] return None def parentsExist(self, row, site): return self.getParent(row, site) is not None def entryExists(self, row, site): return self.getEntry(row, site) is not None def getEntry(self, row, site): studycourse = self.getParent(row, site) if studycourse is None: return None return studycourse.get(row['level']) def addEntry(self, obj, row, site): parent = self.getParent(row, site) obj.level = int(row['level']) parent[row['level']] = obj return def checkConversion(self, row, mode='ignore'): """Validates all values in row. """ converter = IObjectConverter(self.iface) errs, inv_errs, conv_dict = converter.fromStringDict( row, self.factory_name) # We have to check if level is a valid integer. # This is not done by the converter. try: level = int(row['level']) if level not in range(0,700,10): errs.append(('level','no valid integer')) except ValueError: errs.append(('level','no integer')) return errs, inv_errs, conv_dict class CourseTicketProcessor(BatchProcessor): """A batch processor for ICourseTicket objects. """ grok.implements(IBatchProcessor) grok.provides(IBatchProcessor) grok.context(Interface) util_name = 'courseticketimporter' grok.name(util_name) name = u'CourseTicket Importer' iface = ICourseTicket factory_name = 'waeup.CourseTicket' location_fields = [] mode = None @property def available_fields(self): return sorted(list(set( ['student_id','reg_number','matric_number','level','code'] + getFields( self.iface).keys()))) def checkHeaders(self, headerfields, mode='ignore'): if not 'reg_number' in headerfields and not 'student_id' \ in headerfields and not 'matric_number' in headerfields: raise FatalCSVError( "Need at least columns student_id " + "or reg_number or matric_number for import!") if not 'level' in headerfields: raise FatalCSVError( "Need level for import!") if not 'code' in headerfields: raise FatalCSVError( "Need code for import!") # Check for fields to be ignored... not_ignored_fields = [x for x in headerfields if not x.startswith('--')] if len(set(not_ignored_fields)) < len(not_ignored_fields): raise FatalCSVError( "Double headers: each column name may only appear once.") return True def getParent(self, row, site): if not 'students' in site.keys(): return None if 'student_id' in row.keys() and row['student_id']: if row['student_id'] in site['students']: student = site['students'][row['student_id']] return student['studycourse'].get(row['level']) elif 'reg_number' in row.keys() and row['reg_number']: reg_number = row['reg_number'] cat = queryUtility(ICatalog, name='students_catalog') results = list( cat.searchResults(reg_number=(reg_number, reg_number))) if results: return results[0]['studycourse'].get(row['level']) elif 'matric_number' in row.keys() and row['matric_number']: matric_number = row['matric_number'] cat = queryUtility(ICatalog, name='students_catalog') results = list( cat.searchResults(matric_number=(matric_number, matric_number))) if results: return results[0]['studycourse'].get(row['level']) return None def parentsExist(self, row, site): return self.getParent(row, site) is not None def entryExists(self, row, site): return self.getEntry(row, site) is not None def getEntry(self, row, site): level = self.getParent(row, site) if level is None: return None return level.get(row['code']) def addEntry(self, obj, row, site): parent = self.getParent(row, site) catalog = getUtility(ICatalog, name='courses_catalog') entries = list(catalog.searchResults(code=(row['code'],row['code']))) obj.fcode = entries[0].__parent__.__parent__.__parent__.code obj.dcode = entries[0].__parent__.__parent__.code obj.title = entries[0].title obj.credits = entries[0].credits obj.passmark = entries[0].passmark obj.semester = entries[0].semester parent[row['code']] = obj return def checkConversion(self, row, mode='ignore'): """Validates all values in row. """ converter = IObjectConverter(self.iface) errs, inv_errs, conv_dict = converter.fromStringDict( row, self.factory_name) # We have to check if course really exists. # This is not done by the converter. catalog = getUtility(ICatalog, name='courses_catalog') entries = catalog.searchResults(code=(row['code'],row['code'])) if len(entries) == 0: errs.append(('code','non-existent')) return errs, inv_errs, conv_dict return errs, inv_errs, conv_dict class StudentOnlinePaymentProcessor(BatchProcessor): """A batch processor for IStudentOnlinePayment objects. """ grok.implements(IBatchProcessor) grok.provides(IBatchProcessor) grok.context(Interface) util_name = 'paymentimporter' grok.name(util_name) name = u'Payment Importer' iface = IStudentOnlinePayment factory_name = 'waeup.StudentOnlinePayment' location_fields = [] mode = None @property def available_fields(self): return sorted(list(set( ['student_id','reg_number','matric_number','p_id'] + getFields( self.iface).keys()))) def checkHeaders(self, headerfields, mode='ignore'): if not 'reg_number' in headerfields and not 'student_id' \ in headerfields and not 'matric_number' in headerfields: raise FatalCSVError( "Need at least columns student_id " + "or reg_number or matric_number for import!") if not 'p_id' in headerfields: raise FatalCSVError( "Need p_id for import!") # Check for fields to be ignored... not_ignored_fields = [x for x in headerfields if not x.startswith('--')] if len(set(not_ignored_fields)) < len(not_ignored_fields): raise FatalCSVError( "Double headers: each column name may only appear once.") return True def getParent(self, row, site): if not 'students' in site.keys(): return None if 'student_id' in row.keys() and row['student_id']: if row['student_id'] in site['students']: student = site['students'][row['student_id']] return student['payments'] elif 'reg_number' in row.keys() and row['reg_number']: reg_number = row['reg_number'] cat = queryUtility(ICatalog, name='students_catalog') results = list( cat.searchResults(reg_number=(reg_number, reg_number))) if results: return results[0]['payments'] elif 'matric_number' in row.keys() and row['matric_number']: matric_number = row['matric_number'] cat = queryUtility(ICatalog, name='students_catalog') results = list( cat.searchResults(matric_number=(matric_number, matric_number))) if results: return results[0]['payments'] return None def parentsExist(self, row, site): return self.getParent(row, site) is not None def entryExists(self, row, site): return self.getEntry(row, site) is not None def getEntry(self, row, site): payments = self.getParent(row, site) if payments is None: return None # We can use the hash symbol at the end of p_id in import files # to avoid annoying automatic number transformation # by Excel or Calc p_id = row['p_id'].strip('#') if p_id.startswith('p'): entry = payments.get(p_id) else: # For data migration from old SRP entry = payments.get('p' + p_id[6:]) return entry def addEntry(self, obj, row, site): parent = self.getParent(row, site) p_id = row['p_id'].strip('#') if not p_id.startswith('p'): # For data migration from old SRP obj.p_id = 'p' + p_id[6:] parent[obj.p_id] = obj else: parent[p_id] = obj return def checkConversion(self, row, mode='ignore'): """Validates all values in row. """ converter = IObjectConverter(self.iface) errs, inv_errs, conv_dict = converter.fromStringDict( row, self.factory_name) # We have to check p_id. p_id = row['p_id'].strip('#') if p_id.startswith('p'): if not len(p_id) == 14: errs.append(('p_id','invalid length')) return errs, inv_errs, conv_dict else: if not len(p_id) == 19: errs.append(('p_id','invalid length')) return errs, inv_errs, conv_dict return errs, inv_errs, conv_dict