"""Batch processing components for academics objects. Batch processors eat CSV files to add, update or remove large numbers of certain kinds of objects at once. Here we define the processors for academics specific objects like faculties, departments and the like. """ import grok import csv import copy from zope.interface import Interface from zope.schema import getFields from zope.component import queryUtility from zope.catalog.interfaces import ICatalog from waeup.sirp.interfaces import ( IBatchProcessor, FatalCSVError, IObjectConverter) from waeup.sirp.students.interfaces import ( IStudent, IStudentStudyCourse, IStudentStudyCourseImport, IStudentUpdateByRegNo, IStudentUpdateByMatricNo) from waeup.sirp.utils.batching import BatchProcessor class StudentProcessor(BatchProcessor): """A batch processor for IStudent objects. """ grok.implements(IBatchProcessor) grok.provides(IBatchProcessor) grok.context(Interface) util_name = 'studentimporter' grok.name(util_name) name = u'Student Importer' iface = IStudent location_fields = [] factory_name = 'waeup.Student' mode = None @property def available_fields(self): result = [] return sorted(list(set( ['student_id','reg_number','matric_number'] + getFields( self.iface).keys()))) def checkHeaders(self, headerfields, mode='create'): if not 'reg_number' in headerfields and not 'student_id' \ in headerfields and not 'matric_number' in headerfields: raise FatalCSVError( "Need at least columns student_id or reg_number " + "or matric_number for import!") if mode == 'create': for field in self.required_fields: if not field in headerfields: raise FatalCSVError( "Need at least columns %s for import!" % ', '.join(["'%s'" % x for x in self.required_fields])) # Check for fields to be ignored... not_ignored_fields = [x for x in headerfields if not x.startswith('--')] if len(set(not_ignored_fields)) < len(not_ignored_fields): raise FatalCSVError( "Double headers: each column name may only appear once.") return True def parentsExist(self, row, site): return 'students' in site.keys() def getLocator(self, row): if 'student_id' in row.keys() and row['student_id']: return 'student_id' elif 'reg_number' in row.keys() and row['reg_number']: return 'reg_number' elif 'matric_number' in row.keys() and row['matric_number']: return 'matric_number' else: return None # The entry never exists in create mode. def entryExists(self, row, site): if not 'students' in site.keys(): return None if self.getLocator(row) == 'student_id': if row['student_id'] in site['students']: student = site['students'][row['student_id']] return student elif self.getLocator(row) == 'reg_number': reg_number = row['reg_number'] cat = queryUtility(ICatalog, name='students_catalog') results = list( cat.searchResults(reg_number=(reg_number, reg_number))) if results: return results[0] elif self.getLocator(row) == 'matric_number': matric_number = row['matric_number'] cat = queryUtility(ICatalog, name='students_catalog') results = list( cat.searchResults(matric_number=(matric_number, matric_number))) if results: return results[0] return None def getParent(self, row, site): return site['students'] def getEntry(self, row, site): return self.entryExists(row, site) def addEntry(self, obj, row, site): parent = self.getParent(row, site) parent.addStudent(obj) return def delEntry(self, row, site): student = self.entryExists(row, site) if student: parent = self.getParent(row, site) del parent[student.student_id] pass def getMapping(self, path, headerfields, mode): """Get a mapping from CSV file headerfields to actually used fieldnames. """ result = dict() reader = csv.reader(open(path, 'rb')) raw_header = reader.next() for num, field in enumerate(headerfields): if field not in [ 'student_id', 'reg_number', 'matric_number'] and mode == 'remove': continue if field == u'--IGNORE--': # Skip ignored columns in failed and finished data files. continue result[raw_header[num]] = field return result def checkConversion(self, row, mode='create'): """Validates all values in row. """ if mode in ['update', 'remove']: if self.getLocator(row) == 'reg_number': iface = IStudentUpdateByRegNo elif self.getLocator(row) == 'matric_number': iface = IStudentUpdateByMatricNo else: iface = self.iface converter = IObjectConverter(iface) errs, inv_errs, conv_dict = converter.fromStringDict( row, self.factory_name) return errs, inv_errs, conv_dict class StudentStudyCourseProcessor(BatchProcessor): """A batch processor for IStudentStudyCourse objects. """ grok.implements(IBatchProcessor) grok.provides(IBatchProcessor) grok.context(Interface) util_name = 'studycourseupdater' grok.name(util_name) name = u'StudentStudyCourse Importer (update only)' iface = IStudentStudyCourseImport factory_name = 'waeup.StudentStudyCourse' location_fields = [] mode = None @property def available_fields(self): result = [] return sorted(list(set( ['student_id','reg_number','matric_number'] + getFields( self.iface).keys()))) def checkHeaders(self, headerfields, mode='ignore'): if not 'reg_number' in headerfields and not 'student_id' \ in headerfields and not 'matric_number' in headerfields: raise FatalCSVError( "Need at least columns student_id " + "or reg_number or matric_number for import!") # Check for fields to be ignored... not_ignored_fields = [x for x in headerfields if not x.startswith('--')] if len(set(not_ignored_fields)) < len(not_ignored_fields): raise FatalCSVError( "Double headers: each column name may only appear once.") return True def parentsExist(self, row, site): if not 'students' in site.keys(): return None if 'student_id' in row.keys() and row['student_id']: if row['student_id'] in site['students']: student = site['students'][row['student_id']] return student elif 'reg_number' in row.keys() and row['reg_number']: reg_number = row['reg_number'] #import pdb; pdb.set_trace() cat = queryUtility(ICatalog, name='students_catalog') results = list( cat.searchResults(reg_number=(reg_number, reg_number))) if results: return results[0] elif 'matric_number' in row.keys() and row['matric_number']: matric_number = row['matric_number'] cat = queryUtility(ICatalog, name='students_catalog') results = list( cat.searchResults(matric_number=(matric_number, matric_number))) if results: return results[0] return None def entryExists(self, row, site): student = self.parentsExist(row, site) if not student: return None if 'studycourse' in student: return student return None def getEntry(self, row, site): student = self.entryExists(row, site) if not student: return None return student.get('studycourse')