source: main/waeup.sirp/trunk/src/waeup/sirp/students/batching.py @ 8051

Last change on this file since 8051 was 7656, checked in by Henrik Bettermann, 13 years ago

Be more verbose in log messages when creating or updating students or study courses.

  • Property svn:keywords set to Id
File size: 24.2 KB
RevLine 
[7191]1## $Id: batching.py 7656 2012-02-16 08:35:53Z henrik $
2##
3## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
[7433]18"""Batch processing components for student objects.
[6821]19
20Batch processors eat CSV files to add, update or remove large numbers
21of certain kinds of objects at once.
22
[7261]23Here we define the processors for students specific objects like
24students, studycourses, payment tickets and accommodation tickets.
[6821]25"""
26import grok
[6849]27import csv
[6821]28from zope.interface import Interface
[6825]29from zope.schema import getFields
[7548]30from zope.component import queryUtility, getUtility
[7429]31from zope.event import notify
[6825]32from zope.catalog.interfaces import ICatalog
[7513]33from hurry.workflow.interfaces import IWorkflowState
[6849]34from waeup.sirp.interfaces import (
[7522]35    IBatchProcessor, FatalCSVError, IObjectConverter, IUserAccount,
36    IObjectHistory)
[6825]37from waeup.sirp.students.interfaces import (
[7532]38    IStudent, IStudentStudyCourse,
[7536]39    IStudentUpdateByRegNo, IStudentUpdateByMatricNo,
[7623]40    IStudentStudyLevel, ICourseTicket,
41    IStudentOnlinePayment)
[7513]42from waeup.sirp.students.workflow import  IMPORTABLE_STATES
[6821]43from waeup.sirp.utils.batching import BatchProcessor
44
45class StudentProcessor(BatchProcessor):
46    """A batch processor for IStudent objects.
47    """
48    grok.implements(IBatchProcessor)
49    grok.provides(IBatchProcessor)
50    grok.context(Interface)
51    util_name = 'studentimporter'
52    grok.name(util_name)
53
54    name = u'Student Importer'
55    iface = IStudent
56
[6849]57    location_fields = []
[6821]58    factory_name = 'waeup.Student'
59
[6841]60    mode = None
61
[6821]62    @property
[6849]63    def available_fields(self):
64        return sorted(list(set(
[7513]65            ['student_id','reg_number','matric_number',
66            'password', 'reg_state'] + getFields(
[6849]67                self.iface).keys())))
[6821]68
[6849]69    def checkHeaders(self, headerfields, mode='create'):
[6854]70        if not 'reg_number' in headerfields and not 'student_id' \
71            in headerfields and not 'matric_number' in headerfields:
[6849]72            raise FatalCSVError(
[6854]73                "Need at least columns student_id or reg_number " +
74                "or matric_number for import!")
[6849]75        if mode == 'create':
76            for field in self.required_fields:
77                if not field in headerfields:
78                    raise FatalCSVError(
79                        "Need at least columns %s for import!" %
80                        ', '.join(["'%s'" % x for x in self.required_fields]))
81        # Check for fields to be ignored...
82        not_ignored_fields = [x for x in headerfields
83                              if not x.startswith('--')]
84        if len(set(not_ignored_fields)) < len(not_ignored_fields):
85            raise FatalCSVError(
86                "Double headers: each column name may only appear once.")
87        return True
88
[6821]89    def parentsExist(self, row, site):
90        return 'students' in site.keys()
91
[6849]92    def getLocator(self, row):
[7269]93        if row.get('student_id',None):
[6849]94            return 'student_id'
[7269]95        elif row.get('reg_number',None):
[6849]96            return 'reg_number'
[7269]97        elif row.get('matric_number',None):
[6849]98            return 'matric_number'
99        else:
100            return None
101
[6821]102    # The entry never exists in create mode.
103    def entryExists(self, row, site):
[7267]104        return self.getEntry(row, site) is not None
105
106    def getParent(self, row, site):
107        return site['students']
108
109    def getEntry(self, row, site):
[6846]110        if not 'students' in site.keys():
[6849]111            return None
112        if self.getLocator(row) == 'student_id':
[6846]113            if row['student_id'] in site['students']:
114                student = site['students'][row['student_id']]
115                return student
[6849]116        elif self.getLocator(row) == 'reg_number':
[6846]117            reg_number = row['reg_number']
118            cat = queryUtility(ICatalog, name='students_catalog')
119            results = list(
120                cat.searchResults(reg_number=(reg_number, reg_number)))
121            if results:
122                return results[0]
[6849]123        elif self.getLocator(row) == 'matric_number':
[6846]124            matric_number = row['matric_number']
125            cat = queryUtility(ICatalog, name='students_catalog')
126            results = list(
127                cat.searchResults(matric_number=(matric_number, matric_number)))
128            if results:
129                return results[0]
[6849]130        return None
[6821]131
[7267]132       
[6821]133    def addEntry(self, obj, row, site):
134        parent = self.getParent(row, site)
135        parent.addStudent(obj)
[7656]136        # We don't need to log this because is already done by the workflow
137        # transition even handler.
138        history = IObjectHistory(obj)
139        history.addMessage('Student record imported')
[6821]140        return
141
142    def delEntry(self, row, site):
[7267]143        student = self.getEntry(row, site)
[7263]144        if student is not None:
[6846]145            parent = self.getParent(row, site)
[7656]146            parent.logger.info('%s - Student removed' % student.student_id)
[6846]147            del parent[student.student_id]
[6821]148        pass
[6825]149
[7497]150    def updateEntry(self, obj, row, site):
151        """Update obj to the values given in row.
152        """
[7643]153        # Remove student_id from row if empty
154        if row.has_key('student_id') and row['student_id'] is None:
155            row.pop('student_id')
[7656]156        items_changed = ''
[7497]157        for key, value in row.items():
158            # Set student password and all fields declared in interface.
[7522]159            if key == 'password' and value != '':
[7497]160                IUserAccount(obj).setPassword(value)
[7513]161            elif key == 'reg_state':
162                IWorkflowState(obj).setState(value)
[7522]163                msg = "State '%s' set" % value
164                history = IObjectHistory(obj)
165                history.addMessage(msg)
[7497]166            elif hasattr(obj, key):
167                setattr(obj, key, value)
[7656]168            items_changed += '%s=%s, ' % (key,value)
169        parent = self.getParent(row, site)
170        if hasattr(obj,'student_id'):
171            # Update mode: the student exists and we can get the student_id
172            parent.logger.info(
173                '%s - Student record updated: %s'
174                % (obj.student_id, items_changed))
175        else:
176            # Create mode: the student does not yet exist
177            parent.logger.info('Student data imported: %s' % items_changed)
[7497]178        return
179
[6849]180    def getMapping(self, path, headerfields, mode):
181        """Get a mapping from CSV file headerfields to actually used fieldnames.
182        """
183        result = dict()
184        reader = csv.reader(open(path, 'rb'))
185        raw_header = reader.next()
186        for num, field in enumerate(headerfields):
[6854]187            if field not in [
188                'student_id', 'reg_number', 'matric_number'] and mode == 'remove':
[6849]189                continue
190            if field == u'--IGNORE--':
191                # Skip ignored columns in failed and finished data files.
192                continue
193            result[raw_header[num]] = field
194        return result
195
196    def checkConversion(self, row, mode='create'):
197        """Validates all values in row.
198        """
[7643]199        iface = self.iface
[6849]200        if mode in ['update', 'remove']:
201            if self.getLocator(row) == 'reg_number':
202                iface = IStudentUpdateByRegNo
203            elif self.getLocator(row) == 'matric_number':
204                iface = IStudentUpdateByMatricNo
205        converter = IObjectConverter(iface)
206        errs, inv_errs, conv_dict =  converter.fromStringDict(
207            row, self.factory_name)
[7513]208        if row.has_key('reg_state') and \
209            not row['reg_state'] in IMPORTABLE_STATES:
[7522]210            if row['reg_state'] != '':
211                errs.append(('reg_state','not allowed'))
212            else:
213                errs.append(('reg_state','no value provided'))
[6849]214        return errs, inv_errs, conv_dict
215
[6825]216class StudentStudyCourseProcessor(BatchProcessor):
217    """A batch processor for IStudentStudyCourse objects.
218    """
219    grok.implements(IBatchProcessor)
220    grok.provides(IBatchProcessor)
221    grok.context(Interface)
[6837]222    util_name = 'studycourseupdater'
[6825]223    grok.name(util_name)
224
[6837]225    name = u'StudentStudyCourse Importer (update only)'
[7532]226    iface = IStudentStudyCourse
[6825]227    factory_name = 'waeup.StudentStudyCourse'
228
[6849]229    location_fields = []
230
[6841]231    mode = None
232
[6825]233    @property
234    def available_fields(self):
235        return sorted(list(set(
[6843]236            ['student_id','reg_number','matric_number'] + getFields(
237                self.iface).keys())))
[6825]238
[6837]239    def checkHeaders(self, headerfields, mode='ignore'):
[6854]240        if not 'reg_number' in headerfields and not 'student_id' \
241            in headerfields and not 'matric_number' in headerfields:
[6825]242            raise FatalCSVError(
[6854]243                "Need at least columns student_id " +
244                "or reg_number or matric_number for import!")
[6834]245        # Check for fields to be ignored...
[6825]246        not_ignored_fields = [x for x in headerfields
247                              if not x.startswith('--')]
248        if len(set(not_ignored_fields)) < len(not_ignored_fields):
249            raise FatalCSVError(
250                "Double headers: each column name may only appear once.")
251        return True
252
[7267]253    def getParent(self, row, site):
[6846]254        if not 'students' in site.keys():
[6849]255            return None
[6846]256        if 'student_id' in row.keys() and row['student_id']:
[6825]257            if row['student_id'] in site['students']:
258                student = site['students'][row['student_id']]
259                return student
[6843]260        elif 'reg_number' in row.keys() and row['reg_number']:
[6825]261            reg_number = row['reg_number']
262            cat = queryUtility(ICatalog, name='students_catalog')
263            results = list(
264                cat.searchResults(reg_number=(reg_number, reg_number)))
265            if results:
266                return results[0]
[6843]267        elif 'matric_number' in row.keys() and row['matric_number']:
268            matric_number = row['matric_number']
269            cat = queryUtility(ICatalog, name='students_catalog')
270            results = list(
271                cat.searchResults(matric_number=(matric_number, matric_number)))
272            if results:
273                return results[0]
[6849]274        return None
[6825]275
[7267]276    def parentsExist(self, row, site):
277        return self.getParent(row, site) is not None
278
[6825]279    def entryExists(self, row, site):
[7534]280        return self.getEntry(row, site) is not None
[6825]281
282    def getEntry(self, row, site):
[7534]283        student = self.getParent(row, site)
[7536]284        if student is None:
[6825]285            return None
286        return student.get('studycourse')
[7429]287
288    def updateEntry(self, obj, row, site):
289        """Update obj to the values given in row.
290        """
[7656]291        items_changed = ''
[7429]292        for key, value in row.items():
293            # Skip fields not declared in interface.
294            if hasattr(obj, key):
295                setattr(obj, key, value)
[7656]296                if key == 'certificate':
297                    value = value.code
298            items_changed += '%s=%s, ' % (key,value)
299        parent = self.getParent(row, site)
300        parent.__parent__.logger.info(
301            '%s - Study course updated: %s'
302            % (parent.student_id, items_changed))
[7429]303        # Update the students_catalog
304        notify(grok.ObjectModifiedEvent(obj.__parent__))
305        return
306
[7532]307    def checkConversion(self, row, mode='ignore'):
308        """Validates all values in row.
309        """
310        converter = IObjectConverter(self.iface)
311        errs, inv_errs, conv_dict =  converter.fromStringDict(
312            row, self.factory_name)
313        # We have to check if current_level is in range of certificate.
[7548]314        # This is not done by the converter. This kind of conversion
315        # checking does only work if a combination of certificate and
316        # current_level is provided.
[7534]317        if conv_dict.has_key('certificate'):
318          certificate = conv_dict['certificate']
319          start_level = certificate.start_level
320          end_level = certificate.end_level
321          if conv_dict['current_level'] < start_level or \
[7612]322              conv_dict['current_level'] > end_level+120:
[7534]323              errs.append(('current_level','not in range'))
[7532]324        return errs, inv_errs, conv_dict
325
[7536]326class StudentStudyLevelProcessor(BatchProcessor):
327    """A batch processor for IStudentStudyLevel objects.
328    """
329    grok.implements(IBatchProcessor)
330    grok.provides(IBatchProcessor)
331    grok.context(Interface)
332    util_name = 'studylevelimporter'
333    grok.name(util_name)
334
335    name = u'StudentStudyLevel Importer'
336    iface = IStudentStudyLevel
337    factory_name = 'waeup.StudentStudyLevel'
338
339    location_fields = []
340
341    mode = None
342
343    @property
344    def available_fields(self):
345        return sorted(list(set(
346            ['student_id','reg_number','matric_number','level'] + getFields(
347                self.iface).keys())))
348
349    def checkHeaders(self, headerfields, mode='ignore'):
350        if not 'reg_number' in headerfields and not 'student_id' \
351            in headerfields and not 'matric_number' in headerfields:
352            raise FatalCSVError(
353                "Need at least columns student_id " +
354                "or reg_number or matric_number for import!")
355        if not 'level' in headerfields:
356            raise FatalCSVError(
357                "Need level for import!")
358        # Check for fields to be ignored...
359        not_ignored_fields = [x for x in headerfields
360                              if not x.startswith('--')]
361        if len(set(not_ignored_fields)) < len(not_ignored_fields):
362            raise FatalCSVError(
363                "Double headers: each column name may only appear once.")
364        return True
365
366    def getParent(self, row, site):
367        if not 'students' in site.keys():
368            return None
369        if 'student_id' in row.keys() and row['student_id']:
370            if row['student_id'] in site['students']:
371                student = site['students'][row['student_id']]
372                return student['studycourse']
373        elif 'reg_number' in row.keys() and row['reg_number']:
374            reg_number = row['reg_number']
375            cat = queryUtility(ICatalog, name='students_catalog')
376            results = list(
377                cat.searchResults(reg_number=(reg_number, reg_number)))
378            if results:
379                return results[0]['studycourse']
380        elif 'matric_number' in row.keys() and row['matric_number']:
381            matric_number = row['matric_number']
382            cat = queryUtility(ICatalog, name='students_catalog')
383            results = list(
384                cat.searchResults(matric_number=(matric_number, matric_number)))
385            if results:
386                return results[0]['studycourse']
387        return None
388
389    def parentsExist(self, row, site):
390        return self.getParent(row, site) is not None
391
392    def entryExists(self, row, site):
393        return self.getEntry(row, site) is not None
394
395    def getEntry(self, row, site):
396        studycourse = self.getParent(row, site)
397        if studycourse is None:
398            return None
399        return studycourse.get(row['level'])
400
401    def addEntry(self, obj, row, site):
402        parent = self.getParent(row, site)
403        obj.level = int(row['level'])
404        parent[row['level']] = obj
405        return
406
407    def checkConversion(self, row, mode='ignore'):
408        """Validates all values in row.
409        """
410        converter = IObjectConverter(self.iface)
411        errs, inv_errs, conv_dict =  converter.fromStringDict(
412            row, self.factory_name)
413        # We have to check if level is a valid integer.
[7548]414        # This is not done by the converter.
[7536]415        try:
416            level = int(row['level'])
[7612]417            if level not in range(0,700,10):
[7536]418                errs.append(('level','no valid integer'))
419        except ValueError:
420            errs.append(('level','no integer'))
421        return errs, inv_errs, conv_dict
[7548]422
423class CourseTicketProcessor(BatchProcessor):
424    """A batch processor for ICourseTicket objects.
425    """
426    grok.implements(IBatchProcessor)
427    grok.provides(IBatchProcessor)
428    grok.context(Interface)
429    util_name = 'courseticketimporter'
430    grok.name(util_name)
431
432    name = u'CourseTicket Importer'
433    iface = ICourseTicket
434    factory_name = 'waeup.CourseTicket'
435
436    location_fields = []
437
438    mode = None
439
440    @property
441    def available_fields(self):
442        return sorted(list(set(
443            ['student_id','reg_number','matric_number','level','code'] + getFields(
444                self.iface).keys())))
445
446    def checkHeaders(self, headerfields, mode='ignore'):
447        if not 'reg_number' in headerfields and not 'student_id' \
448            in headerfields and not 'matric_number' in headerfields:
449            raise FatalCSVError(
450                "Need at least columns student_id " +
451                "or reg_number or matric_number for import!")
452        if not 'level' in headerfields:
453            raise FatalCSVError(
454                "Need level for import!")
455        if not 'code' in headerfields:
456            raise FatalCSVError(
457                "Need code for import!")
458        # Check for fields to be ignored...
459        not_ignored_fields = [x for x in headerfields
460                              if not x.startswith('--')]
461        if len(set(not_ignored_fields)) < len(not_ignored_fields):
462            raise FatalCSVError(
463                "Double headers: each column name may only appear once.")
464        return True
465
466    def getParent(self, row, site):
467        if not 'students' in site.keys():
468            return None
469        if 'student_id' in row.keys() and row['student_id']:
470            if row['student_id'] in site['students']:
471                student = site['students'][row['student_id']]
472                return student['studycourse'].get(row['level'])
473        elif 'reg_number' in row.keys() and row['reg_number']:
474            reg_number = row['reg_number']
475            cat = queryUtility(ICatalog, name='students_catalog')
476            results = list(
477                cat.searchResults(reg_number=(reg_number, reg_number)))
478            if results:
479                return results[0]['studycourse'].get(row['level'])
480        elif 'matric_number' in row.keys() and row['matric_number']:
481            matric_number = row['matric_number']
482            cat = queryUtility(ICatalog, name='students_catalog')
483            results = list(
484                cat.searchResults(matric_number=(matric_number, matric_number)))
485            if results:
486                return results[0]['studycourse'].get(row['level'])
487        return None
488
489    def parentsExist(self, row, site):
490        return self.getParent(row, site) is not None
491
492    def entryExists(self, row, site):
493        return self.getEntry(row, site) is not None
494
495    def getEntry(self, row, site):
496        level = self.getParent(row, site)
497        if level is None:
498            return None
499        return level.get(row['code'])
500
501    def addEntry(self, obj, row, site):
502        parent = self.getParent(row, site)
503        catalog = getUtility(ICatalog, name='courses_catalog')
504        entries = list(catalog.searchResults(code=(row['code'],row['code'])))
505        obj.fcode = entries[0].__parent__.__parent__.__parent__.code
506        obj.dcode = entries[0].__parent__.__parent__.code
507        obj.title = entries[0].title
508        obj.credits = entries[0].credits
509        obj.passmark = entries[0].passmark
510        obj.semester = entries[0].semester
511        parent[row['code']] = obj
512        return
513
514    def checkConversion(self, row, mode='ignore'):
515        """Validates all values in row.
516        """
517        converter = IObjectConverter(self.iface)
518        errs, inv_errs, conv_dict =  converter.fromStringDict(
519            row, self.factory_name)
520        # We have to check if course really exists.
521        # This is not done by the converter.
522        catalog = getUtility(ICatalog, name='courses_catalog')
523        entries = catalog.searchResults(code=(row['code'],row['code']))
524        if len(entries) == 0:
525            errs.append(('code','non-existent'))
526            return errs, inv_errs, conv_dict
[7623]527        return errs, inv_errs, conv_dict
528
529class StudentOnlinePaymentProcessor(BatchProcessor):
530    """A batch processor for IStudentOnlinePayment objects.
531    """
532    grok.implements(IBatchProcessor)
533    grok.provides(IBatchProcessor)
534    grok.context(Interface)
535    util_name = 'paymentimporter'
536    grok.name(util_name)
537
538    name = u'Payment Importer'
539    iface = IStudentOnlinePayment
540    factory_name = 'waeup.StudentOnlinePayment'
541
542    location_fields = []
543
544    mode = None
545
546    @property
547    def available_fields(self):
548        return sorted(list(set(
549            ['student_id','reg_number','matric_number','p_id'] + getFields(
550                self.iface).keys())))
551
552    def checkHeaders(self, headerfields, mode='ignore'):
553        if not 'reg_number' in headerfields and not 'student_id' \
554            in headerfields and not 'matric_number' in headerfields:
555            raise FatalCSVError(
556                "Need at least columns student_id " +
557                "or reg_number or matric_number for import!")
558        if not 'p_id' in headerfields:
559            raise FatalCSVError(
560                "Need p_id for import!")
561        # Check for fields to be ignored...
562        not_ignored_fields = [x for x in headerfields
563                              if not x.startswith('--')]
564        if len(set(not_ignored_fields)) < len(not_ignored_fields):
565            raise FatalCSVError(
566                "Double headers: each column name may only appear once.")
567        return True
568
569    def getParent(self, row, site):
570        if not 'students' in site.keys():
571            return None
572        if 'student_id' in row.keys() and row['student_id']:
573            if row['student_id'] in site['students']:
574                student = site['students'][row['student_id']]
575                return student['payments']
576        elif 'reg_number' in row.keys() and row['reg_number']:
577            reg_number = row['reg_number']
578            cat = queryUtility(ICatalog, name='students_catalog')
579            results = list(
580                cat.searchResults(reg_number=(reg_number, reg_number)))
581            if results:
582                return results[0]['payments']
583        elif 'matric_number' in row.keys() and row['matric_number']:
584            matric_number = row['matric_number']
585            cat = queryUtility(ICatalog, name='students_catalog')
586            results = list(
587                cat.searchResults(matric_number=(matric_number, matric_number)))
588            if results:
589                return results[0]['payments']
590        return None
591
592    def parentsExist(self, row, site):
593        return self.getParent(row, site) is not None
594
595    def entryExists(self, row, site):
596        return self.getEntry(row, site) is not None
597
598    def getEntry(self, row, site):
599        payments = self.getParent(row, site)
600        if payments is None:
601            return None
[7626]602        # We can use the hash symbol at the end of p_id in import files
603        # to avoid annoying automatic number transformation
604        # by Excel or Calc
605        p_id = row['p_id'].strip('#')
606        if p_id.startswith('p'):
607            entry = payments.get(p_id)
[7623]608        else:
609            # For data migration from old SRP
[7626]610            entry = payments.get('p' + p_id[6:])
[7623]611        return entry
612
613    def addEntry(self, obj, row, site):
614        parent = self.getParent(row, site)
[7626]615        p_id = row['p_id'].strip('#')
616        if not p_id.startswith('p'):
[7623]617            # For data migration from old SRP
[7626]618            obj.p_id = 'p' + p_id[6:]
[7623]619            parent[obj.p_id] = obj
620        else:
[7626]621            parent[p_id] = obj
[7623]622        return
623
624    def checkConversion(self, row, mode='ignore'):
625        """Validates all values in row.
626        """
627        converter = IObjectConverter(self.iface)
628        errs, inv_errs, conv_dict =  converter.fromStringDict(
629            row, self.factory_name)
630        # We have to check p_id.
[7626]631        p_id = row['p_id'].strip('#')
632        if p_id.startswith('p'):
633            if not len(p_id) == 14:
[7623]634                errs.append(('p_id','invalid length'))
635                return errs, inv_errs, conv_dict
636        else:
[7626]637            if not len(p_id) == 19:
[7623]638                errs.append(('p_id','invalid length'))
639                return errs, inv_errs, conv_dict
640        return errs, inv_errs, conv_dict
Note: See TracBrowser for help on using the repository browser.