source: main/waeup.kofa/trunk/src/waeup/kofa/students/batching.py @ 8268

Last change on this file since 8268 was 8232, checked in by uli, 13 years ago

Reduce redundancy and try to check more correctly for set values in
StudentsProcessor?.

  • Property svn:keywords set to Id
File size: 21.2 KB
RevLine 
[7191]1## $Id: batching.py 8232 2012-04-20 10:01:49Z uli $
2##
3## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
[7433]18"""Batch processing components for student objects.
[6821]19
20Batch processors eat CSV files to add, update or remove large numbers
21of certain kinds of objects at once.
22
[7261]23Here we define the processors for students specific objects like
24students, studycourses, payment tickets and accommodation tickets.
[6821]25"""
26import grok
[6849]27import csv
[6821]28from zope.interface import Interface
[6825]29from zope.schema import getFields
[7548]30from zope.component import queryUtility, getUtility
[7429]31from zope.event import notify
[6825]32from zope.catalog.interfaces import ICatalog
[7951]33from hurry.workflow.interfaces import IWorkflowState, IWorkflowInfo
[7811]34from waeup.kofa.interfaces import (
[7522]35    IBatchProcessor, FatalCSVError, IObjectConverter, IUserAccount,
[8214]36    IObjectHistory, VALIDATED, DELETION_MARKER, IGNORE_MARKER)
[7959]37from waeup.kofa.interfaces import MessageFactory as _
[7811]38from waeup.kofa.students.interfaces import (
[7532]39    IStudent, IStudentStudyCourse,
[7536]40    IStudentUpdateByRegNo, IStudentUpdateByMatricNo,
[7623]41    IStudentStudyLevel, ICourseTicket,
[8174]42    IStudentOnlinePayment, IStudentVerdictUpdate)
[7811]43from waeup.kofa.students.workflow import  IMPORTABLE_STATES
44from waeup.kofa.utils.batching import BatchProcessor
[6821]45
46class StudentProcessor(BatchProcessor):
47    """A batch processor for IStudent objects.
48    """
49    grok.implements(IBatchProcessor)
50    grok.provides(IBatchProcessor)
51    grok.context(Interface)
[7933]52    util_name = 'studentprocessor'
[6821]53    grok.name(util_name)
54
[7933]55    name = u'Student Processor'
[6821]56    iface = IStudent
57
[6849]58    location_fields = []
[6821]59    factory_name = 'waeup.Student'
60
61    @property
[6849]62    def available_fields(self):
[8176]63        fields = getFields(self.iface)
[6849]64        return sorted(list(set(
[7513]65            ['student_id','reg_number','matric_number',
66            'password', 'reg_state'] + getFields(
[6849]67                self.iface).keys())))
[6821]68
[6849]69    def checkHeaders(self, headerfields, mode='create'):
[6854]70        if not 'reg_number' in headerfields and not 'student_id' \
71            in headerfields and not 'matric_number' in headerfields:
[6849]72            raise FatalCSVError(
[6854]73                "Need at least columns student_id or reg_number " +
74                "or matric_number for import!")
[6849]75        if mode == 'create':
76            for field in self.required_fields:
77                if not field in headerfields:
78                    raise FatalCSVError(
79                        "Need at least columns %s for import!" %
80                        ', '.join(["'%s'" % x for x in self.required_fields]))
81        # Check for fields to be ignored...
82        not_ignored_fields = [x for x in headerfields
83                              if not x.startswith('--')]
84        if len(set(not_ignored_fields)) < len(not_ignored_fields):
85            raise FatalCSVError(
86                "Double headers: each column name may only appear once.")
87        return True
88
[6821]89    def parentsExist(self, row, site):
90        return 'students' in site.keys()
91
[6849]92    def getLocator(self, row):
[8232]93        if row.get('student_id',None) not in (None, IGNORE_MARKER):
[6849]94            return 'student_id'
[8232]95        elif row.get('reg_number',None) not in (None, IGNORE_MARKER):
[6849]96            return 'reg_number'
[8232]97        elif row.get('matric_number',None) not in (None, IGNORE_MARKER):
[6849]98            return 'matric_number'
99        else:
100            return None
101
[6821]102    # The entry never exists in create mode.
103    def entryExists(self, row, site):
[7267]104        return self.getEntry(row, site) is not None
105
106    def getParent(self, row, site):
107        return site['students']
108
109    def getEntry(self, row, site):
[6846]110        if not 'students' in site.keys():
[6849]111            return None
112        if self.getLocator(row) == 'student_id':
[6846]113            if row['student_id'] in site['students']:
114                student = site['students'][row['student_id']]
115                return student
[6849]116        elif self.getLocator(row) == 'reg_number':
[6846]117            reg_number = row['reg_number']
118            cat = queryUtility(ICatalog, name='students_catalog')
119            results = list(
120                cat.searchResults(reg_number=(reg_number, reg_number)))
121            if results:
122                return results[0]
[6849]123        elif self.getLocator(row) == 'matric_number':
[6846]124            matric_number = row['matric_number']
125            cat = queryUtility(ICatalog, name='students_catalog')
126            results = list(
127                cat.searchResults(matric_number=(matric_number, matric_number)))
128            if results:
129                return results[0]
[6849]130        return None
[6821]131
132    def addEntry(self, obj, row, site):
133        parent = self.getParent(row, site)
134        parent.addStudent(obj)
[7959]135        # We have to log this if reg_state is provided. If not,
136        # logging is done by the event handler handle_student_added
137        if row.has_key('reg_state'):
138            parent.logger.info('%s - Student record created' % obj.student_id)
[7656]139        history = IObjectHistory(obj)
[7959]140        history.addMessage(_('Student record created'))
[6821]141        return
142
143    def delEntry(self, row, site):
[7267]144        student = self.getEntry(row, site)
[7263]145        if student is not None:
[6846]146            parent = self.getParent(row, site)
[7656]147            parent.logger.info('%s - Student removed' % student.student_id)
[6846]148            del parent[student.student_id]
[6821]149        pass
[6825]150
[7497]151    def updateEntry(self, obj, row, site):
152        """Update obj to the values given in row.
153        """
[8221]154        items_changed = ''
155
[7643]156        # Remove student_id from row if empty
[8232]157        if row.has_key('student_id') and row['student_id'] in (
158            None, IGNORE_MARKER):
[7643]159            row.pop('student_id')
[8221]160
161        # Update password
[8232]162        passwd = row.get('password', IGNORE_MARKER)
163        if passwd not in ('', IGNORE_MARKER):
[8221]164            IUserAccount(obj).setPassword(passwd)
165            row.pop('password')
166
167        # Update registration state
[8232]168        if 'reg_state' in row.keys(): # and row['reg_state'] not in (IGNORE_MARKER, ''):
[8221]169            value = row['reg_state']
170            IWorkflowState(obj).setState(value)
171            msg = _("State '${a}' set", mapping = {'a':value})
172            history = IObjectHistory(obj)
173            history.addMessage(msg)
174            row.pop('reg_state')
175
176        # apply other values...
177        items_changed = super(StudentProcessor, self).updateEntry(
178            obj, row, site)
179
180        # Log actions...
[7656]181        parent = self.getParent(row, site)
182        if hasattr(obj,'student_id'):
183            # Update mode: the student exists and we can get the student_id
184            parent.logger.info(
185                '%s - Student record updated: %s'
186                % (obj.student_id, items_changed))
187        else:
188            # Create mode: the student does not yet exist
189            parent.logger.info('Student data imported: %s' % items_changed)
[8221]190        return items_changed
[7497]191
[6849]192    def getMapping(self, path, headerfields, mode):
193        """Get a mapping from CSV file headerfields to actually used fieldnames.
194        """
195        result = dict()
196        reader = csv.reader(open(path, 'rb'))
197        raw_header = reader.next()
198        for num, field in enumerate(headerfields):
[8221]199            if field not in ['student_id', 'reg_number', 'matric_number'
200                             ] and mode == 'remove':
[6849]201                continue
202            if field == u'--IGNORE--':
203                # Skip ignored columns in failed and finished data files.
204                continue
205            result[raw_header[num]] = field
206        return result
207
208    def checkConversion(self, row, mode='create'):
209        """Validates all values in row.
210        """
[7643]211        iface = self.iface
[6849]212        if mode in ['update', 'remove']:
213            if self.getLocator(row) == 'reg_number':
214                iface = IStudentUpdateByRegNo
215            elif self.getLocator(row) == 'matric_number':
216                iface = IStudentUpdateByMatricNo
217        converter = IObjectConverter(iface)
218        errs, inv_errs, conv_dict =  converter.fromStringDict(
[8214]219            row, self.factory_name, mode=mode)
[7513]220        if row.has_key('reg_state') and \
221            not row['reg_state'] in IMPORTABLE_STATES:
[7522]222            if row['reg_state'] != '':
223                errs.append(('reg_state','not allowed'))
224            else:
225                errs.append(('reg_state','no value provided'))
[6849]226        return errs, inv_errs, conv_dict
227
[8232]228
229class StudentProcessorBase(BatchProcessor):
230    """A base for student subitem processor.
231
232    Helps reducing redundancy.
[6825]233    """
[8232]234    grok.baseclass()
[6825]235
[8232]236    #: required fields beside 'student_id', 'reg_number' and 'matric_number'
237    additional_fields = []
[6825]238
[8232]239    #: header fields additional required
240    additional_headers = []
[6849]241
[6825]242    @property
243    def available_fields(self):
[8232]244        fields = ['student_id','reg_number','matric_number'
245                  ] + self.additional_fields
246        return sorted(list(set(fields + getFields(
[6843]247                self.iface).keys())))
[6825]248
[6837]249    def checkHeaders(self, headerfields, mode='ignore'):
[6854]250        if not 'reg_number' in headerfields and not 'student_id' \
251            in headerfields and not 'matric_number' in headerfields:
[6825]252            raise FatalCSVError(
[6854]253                "Need at least columns student_id " +
254                "or reg_number or matric_number for import!")
[8232]255        for name in self.additional_headers:
256            if not name in headerfields:
257                raise FatalCSVError(
258                    "Need %s for import!" % name)
259
[6834]260        # Check for fields to be ignored...
[6825]261        not_ignored_fields = [x for x in headerfields
262                              if not x.startswith('--')]
263        if len(set(not_ignored_fields)) < len(not_ignored_fields):
264            raise FatalCSVError(
265                "Double headers: each column name may only appear once.")
266        return True
267
[8232]268    def _getStudent(self, row, site):
[8225]269        NON_VALUES = ['', IGNORE_MARKER]
[6846]270        if not 'students' in site.keys():
[6849]271            return None
[8225]272        if row.get('student_id', '') not in NON_VALUES:
[6825]273            if row['student_id'] in site['students']:
274                student = site['students'][row['student_id']]
275                return student
[8225]276        elif row.get('reg_number', '') not in NON_VALUES:
[6825]277            reg_number = row['reg_number']
278            cat = queryUtility(ICatalog, name='students_catalog')
279            results = list(
280                cat.searchResults(reg_number=(reg_number, reg_number)))
281            if results:
282                return results[0]
[8225]283        elif row.get('matric_number', '') not in NON_VALUES:
[6843]284            matric_number = row['matric_number']
285            cat = queryUtility(ICatalog, name='students_catalog')
286            results = list(
287                cat.searchResults(matric_number=(matric_number, matric_number)))
288            if results:
289                return results[0]
[6849]290        return None
[6825]291
[7267]292    def parentsExist(self, row, site):
[8232]293        result = self.getParent(row, site) is not None
[7267]294        return self.getParent(row, site) is not None
295
[6825]296    def entryExists(self, row, site):
[7534]297        return self.getEntry(row, site) is not None
[6825]298
[8232]299    def checkConversion(self, row, mode='ignore'):
300        """Validates all values in row.
301        """
302        converter = IObjectConverter(self.iface)
303        errs, inv_errs, conv_dict =  converter.fromStringDict(
304            row, self.factory_name, mode=mode)
305        return errs, inv_errs, conv_dict
306
307
308class StudentStudyCourseProcessor(StudentProcessorBase):
309    """A batch processor for IStudentStudyCourse objects.
310    """
311    grok.implements(IBatchProcessor)
312    grok.provides(IBatchProcessor)
313    grok.context(Interface)
314    util_name = 'studycourseupdater'
315    grok.name(util_name)
316
317    name = u'StudentStudyCourse Processor (update only)'
318    iface = IStudentStudyCourse
319    factory_name = 'waeup.StudentStudyCourse'
320
321    location_fields = []
322    additional_fields = []
323
324    def getParent(self, row, site):
325        return self._getStudent(row, site)
326
[6825]327    def getEntry(self, row, site):
[7534]328        student = self.getParent(row, site)
[7536]329        if student is None:
[6825]330            return None
331        return student.get('studycourse')
[7429]332
333    def updateEntry(self, obj, row, site):
334        """Update obj to the values given in row.
335        """
[8221]336        items_changed = super(StudentStudyCourseProcessor, self).updateEntry(
337            obj, row, site)
[7656]338        parent = self.getParent(row, site)
339        parent.__parent__.logger.info(
340            '%s - Study course updated: %s'
341            % (parent.student_id, items_changed))
[7429]342        # Update the students_catalog
343        notify(grok.ObjectModifiedEvent(obj.__parent__))
344        return
345
[7532]346    def checkConversion(self, row, mode='ignore'):
347        """Validates all values in row.
348        """
[8232]349        errs, inv_errs, conv_dict = super(
350            StudentStudyCourseProcessor, self).checkConversion(row, mode=mode)
[7532]351        # We have to check if current_level is in range of certificate.
[7534]352        if conv_dict.has_key('certificate'):
[8221]353          cert = conv_dict['certificate']
354          if conv_dict['current_level'] < cert.start_level or \
355              conv_dict['current_level'] > cert.end_level+120:
[7534]356              errs.append(('current_level','not in range'))
[7532]357        return errs, inv_errs, conv_dict
358
[8232]359class StudentStudyLevelProcessor(StudentProcessorBase):
[7536]360    """A batch processor for IStudentStudyLevel objects.
361    """
362    grok.implements(IBatchProcessor)
363    grok.provides(IBatchProcessor)
364    grok.context(Interface)
[7933]365    util_name = 'studylevelprocessor'
[7536]366    grok.name(util_name)
367
[7933]368    name = u'StudentStudyLevel Processor'
[7536]369    iface = IStudentStudyLevel
370    factory_name = 'waeup.StudentStudyLevel'
371
372    location_fields = []
[8232]373    additional_fields = ['level']
374    additional_headers = ['level']
[7536]375
376    def getParent(self, row, site):
[8232]377        student = self._getStudent(row, site)
378        if student is None:
[7536]379            return None
[8232]380        return student['studycourse']
[7536]381
382    def getEntry(self, row, site):
383        studycourse = self.getParent(row, site)
384        if studycourse is None:
385            return None
386        return studycourse.get(row['level'])
387
388    def addEntry(self, obj, row, site):
389        parent = self.getParent(row, site)
390        obj.level = int(row['level'])
391        parent[row['level']] = obj
392        return
393
394    def checkConversion(self, row, mode='ignore'):
395        """Validates all values in row.
396        """
[8232]397        errs, inv_errs, conv_dict = super(
398            StudentStudyLevelProcessor, self).checkConversion(row, mode=mode)
399
[7536]400        # We have to check if level is a valid integer.
[7548]401        # This is not done by the converter.
[7536]402        try:
403            level = int(row['level'])
[7612]404            if level not in range(0,700,10):
[7536]405                errs.append(('level','no valid integer'))
406        except ValueError:
407            errs.append(('level','no integer'))
408        return errs, inv_errs, conv_dict
[7548]409
[8232]410class CourseTicketProcessor(StudentProcessorBase):
[7548]411    """A batch processor for ICourseTicket objects.
412    """
413    grok.implements(IBatchProcessor)
414    grok.provides(IBatchProcessor)
415    grok.context(Interface)
[7933]416    util_name = 'courseticketprocessor'
[7548]417    grok.name(util_name)
418
[7933]419    name = u'CourseTicket Processor'
[7548]420    iface = ICourseTicket
421    factory_name = 'waeup.CourseTicket'
422
423    location_fields = []
[8232]424    additional_fields = ['level', 'code']
425    additional_headers = ['level', 'code']
[7548]426
427    def getParent(self, row, site):
[8232]428        student = self._getStudent(row, site)
429        if student is None:
[7548]430            return None
[8232]431        return student['studycourse'].get(row['level'])
[7548]432
433    def getEntry(self, row, site):
434        level = self.getParent(row, site)
435        if level is None:
436            return None
437        return level.get(row['code'])
438
439    def addEntry(self, obj, row, site):
440        parent = self.getParent(row, site)
441        catalog = getUtility(ICatalog, name='courses_catalog')
442        entries = list(catalog.searchResults(code=(row['code'],row['code'])))
443        obj.fcode = entries[0].__parent__.__parent__.__parent__.code
444        obj.dcode = entries[0].__parent__.__parent__.code
445        obj.title = entries[0].title
446        obj.credits = entries[0].credits
447        obj.passmark = entries[0].passmark
448        obj.semester = entries[0].semester
449        parent[row['code']] = obj
450        return
451
452    def checkConversion(self, row, mode='ignore'):
453        """Validates all values in row.
454        """
[8232]455        errs, inv_errs, conv_dict = super(
456            CourseTicketProcessor, self).checkConversion(row, mode=mode)
457
[7548]458        # We have to check if course really exists.
459        # This is not done by the converter.
460        catalog = getUtility(ICatalog, name='courses_catalog')
461        entries = catalog.searchResults(code=(row['code'],row['code']))
462        if len(entries) == 0:
463            errs.append(('code','non-existent'))
464            return errs, inv_errs, conv_dict
[7623]465        return errs, inv_errs, conv_dict
466
[8232]467class StudentOnlinePaymentProcessor(StudentProcessorBase):
[7623]468    """A batch processor for IStudentOnlinePayment objects.
469    """
470    grok.implements(IBatchProcessor)
471    grok.provides(IBatchProcessor)
472    grok.context(Interface)
[7933]473    util_name = 'paymentprocessor'
[7623]474    grok.name(util_name)
475
[7933]476    name = u'Payment Processor'
[8174]477    iface = IStudentOnlinePayment
[7623]478    factory_name = 'waeup.StudentOnlinePayment'
479
480    location_fields = []
[8232]481    additional_fields = ['p_id']
482    additional_headers = ['p_id']
[7623]483
[8232]484    def parentsExist(self, row, site):
485        result = self.getParent(row, site) is not None
486        return self.getParent(row, site) is not None
[7623]487
488    def getParent(self, row, site):
[8232]489        student = self._getStudent(row, site)
490        if student is None:
[7623]491            return None
[8232]492        return student['payments']
[7623]493
494    def getEntry(self, row, site):
495        payments = self.getParent(row, site)
496        if payments is None:
497            return None
[7626]498        # We can use the hash symbol at the end of p_id in import files
499        # to avoid annoying automatic number transformation
500        # by Excel or Calc
501        p_id = row['p_id'].strip('#')
502        if p_id.startswith('p'):
503            entry = payments.get(p_id)
[7623]504        else:
505            # For data migration from old SRP
[7626]506            entry = payments.get('p' + p_id[6:])
[7623]507        return entry
508
509    def addEntry(self, obj, row, site):
510        parent = self.getParent(row, site)
[7626]511        p_id = row['p_id'].strip('#')
512        if not p_id.startswith('p'):
[7623]513            # For data migration from old SRP
[7626]514            obj.p_id = 'p' + p_id[6:]
[7623]515            parent[obj.p_id] = obj
516        else:
[7626]517            parent[p_id] = obj
[7623]518        return
519
520    def checkConversion(self, row, mode='ignore'):
521        """Validates all values in row.
522        """
[8232]523        errs, inv_errs, conv_dict = super(
524            StudentOnlinePaymentProcessor, self).checkConversion(row, mode=mode)
525
[7623]526        # We have to check p_id.
[7626]527        p_id = row['p_id'].strip('#')
528        if p_id.startswith('p'):
529            if not len(p_id) == 14:
[7623]530                errs.append(('p_id','invalid length'))
531                return errs, inv_errs, conv_dict
532        else:
[7626]533            if not len(p_id) == 19:
[7623]534                errs.append(('p_id','invalid length'))
535                return errs, inv_errs, conv_dict
536        return errs, inv_errs, conv_dict
[7951]537
538class StudentVerdictProcessor(StudentStudyCourseProcessor):
539    """A batch processor for verdicts.
540
541    Import verdicts and perform workflow transitions.
542    """
543
544    util_name = 'verdictupdater'
545    grok.name(util_name)
546
547    name = u'Verdict Processor (update only)'
548    iface = IStudentVerdictUpdate
549    factory_name = 'waeup.StudentStudyCourse'
550
[8232]551    additional_fields = ['current_session', 'current_level']
[7951]552
553    def checkUpdateRequirements(self, obj, row, site):
554        """Checks requirements the studycourse and the student must fulfill
555        before being updated.
556        """
557        # Check if current_levels correspond
558        if obj.current_level != row['current_level']:
559            return 'Current level does not correspond.'
560        # Check if current_sessions correspond
561        if obj.current_session != row['current_session']:
562            return 'Current session does not correspond.'
563        # Check if student is in state REGISTERED
564        if obj.getStudent().state != VALIDATED:
565            return 'Student in wrong state.'
566        return None
567
568    def updateEntry(self, obj, row, site):
569        """Update obj to the values given in row.
570        """
[8221]571        # Don't set current_session, current_level
572        vals_to_set = dict((key, val) for key, val in row.items()
573                           if key not in ('current_session','current_level'))
574        items_changed = super(StudentVerdictProcessor, self).updateEntry(
575            obj, vals_to_set, site)
[7951]576        parent = self.getParent(row, site)
577        parent.__parent__.logger.info(
578            '%s - Verdict updated: %s'
579            % (parent.student_id, items_changed))
580        # Fire transition
581        IWorkflowInfo(obj.__parent__).fireTransition('return')
582        # Update the students_catalog
583        notify(grok.ObjectModifiedEvent(obj.__parent__))
584        return
Note: See TracBrowser for help on using the repository browser.