source: main/waeup.kofa/trunk/src/waeup/kofa/students/batching.py @ 7944

Last change on this file since 7944 was 7933, checked in by Henrik Bettermann, 13 years ago

Rename importers to processors.

  • Property svn:keywords set to Id
File size: 24.2 KB
RevLine 
[7191]1## $Id: batching.py 7933 2012-03-21 13:42:00Z henrik $
2##
3## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
[7433]18"""Batch processing components for student objects.
[6821]19
20Batch processors eat CSV files to add, update or remove large numbers
21of certain kinds of objects at once.
22
[7261]23Here we define the processors for students specific objects like
24students, studycourses, payment tickets and accommodation tickets.
[6821]25"""
26import grok
[6849]27import csv
[6821]28from zope.interface import Interface
[6825]29from zope.schema import getFields
[7548]30from zope.component import queryUtility, getUtility
[7429]31from zope.event import notify
[6825]32from zope.catalog.interfaces import ICatalog
[7513]33from hurry.workflow.interfaces import IWorkflowState
[7811]34from waeup.kofa.interfaces import (
[7522]35    IBatchProcessor, FatalCSVError, IObjectConverter, IUserAccount,
36    IObjectHistory)
[7811]37from waeup.kofa.students.interfaces import (
[7532]38    IStudent, IStudentStudyCourse,
[7536]39    IStudentUpdateByRegNo, IStudentUpdateByMatricNo,
[7623]40    IStudentStudyLevel, ICourseTicket,
41    IStudentOnlinePayment)
[7811]42from waeup.kofa.students.workflow import  IMPORTABLE_STATES
43from waeup.kofa.utils.batching import BatchProcessor
[6821]44
45class StudentProcessor(BatchProcessor):
46    """A batch processor for IStudent objects.
47    """
48    grok.implements(IBatchProcessor)
49    grok.provides(IBatchProcessor)
50    grok.context(Interface)
[7933]51    util_name = 'studentprocessor'
[6821]52    grok.name(util_name)
53
[7933]54    name = u'Student Processor'
[6821]55    iface = IStudent
56
[6849]57    location_fields = []
[6821]58    factory_name = 'waeup.Student'
59
[6841]60    mode = None
61
[6821]62    @property
[6849]63    def available_fields(self):
64        return sorted(list(set(
[7513]65            ['student_id','reg_number','matric_number',
66            'password', 'reg_state'] + getFields(
[6849]67                self.iface).keys())))
[6821]68
[6849]69    def checkHeaders(self, headerfields, mode='create'):
[6854]70        if not 'reg_number' in headerfields and not 'student_id' \
71            in headerfields and not 'matric_number' in headerfields:
[6849]72            raise FatalCSVError(
[6854]73                "Need at least columns student_id or reg_number " +
74                "or matric_number for import!")
[6849]75        if mode == 'create':
76            for field in self.required_fields:
77                if not field in headerfields:
78                    raise FatalCSVError(
79                        "Need at least columns %s for import!" %
80                        ', '.join(["'%s'" % x for x in self.required_fields]))
81        # Check for fields to be ignored...
82        not_ignored_fields = [x for x in headerfields
83                              if not x.startswith('--')]
84        if len(set(not_ignored_fields)) < len(not_ignored_fields):
85            raise FatalCSVError(
86                "Double headers: each column name may only appear once.")
87        return True
88
[6821]89    def parentsExist(self, row, site):
90        return 'students' in site.keys()
91
[6849]92    def getLocator(self, row):
[7269]93        if row.get('student_id',None):
[6849]94            return 'student_id'
[7269]95        elif row.get('reg_number',None):
[6849]96            return 'reg_number'
[7269]97        elif row.get('matric_number',None):
[6849]98            return 'matric_number'
99        else:
100            return None
101
[6821]102    # The entry never exists in create mode.
103    def entryExists(self, row, site):
[7267]104        return self.getEntry(row, site) is not None
105
106    def getParent(self, row, site):
107        return site['students']
108
109    def getEntry(self, row, site):
[6846]110        if not 'students' in site.keys():
[6849]111            return None
112        if self.getLocator(row) == 'student_id':
[6846]113            if row['student_id'] in site['students']:
114                student = site['students'][row['student_id']]
115                return student
[6849]116        elif self.getLocator(row) == 'reg_number':
[6846]117            reg_number = row['reg_number']
118            cat = queryUtility(ICatalog, name='students_catalog')
119            results = list(
120                cat.searchResults(reg_number=(reg_number, reg_number)))
121            if results:
122                return results[0]
[6849]123        elif self.getLocator(row) == 'matric_number':
[6846]124            matric_number = row['matric_number']
125            cat = queryUtility(ICatalog, name='students_catalog')
126            results = list(
127                cat.searchResults(matric_number=(matric_number, matric_number)))
128            if results:
129                return results[0]
[6849]130        return None
[6821]131
[7267]132       
[6821]133    def addEntry(self, obj, row, site):
134        parent = self.getParent(row, site)
135        parent.addStudent(obj)
[7656]136        # We don't need to log this because is already done by the workflow
137        # transition even handler.
138        history = IObjectHistory(obj)
139        history.addMessage('Student record imported')
[6821]140        return
141
142    def delEntry(self, row, site):
[7267]143        student = self.getEntry(row, site)
[7263]144        if student is not None:
[6846]145            parent = self.getParent(row, site)
[7656]146            parent.logger.info('%s - Student removed' % student.student_id)
[6846]147            del parent[student.student_id]
[6821]148        pass
[6825]149
[7497]150    def updateEntry(self, obj, row, site):
151        """Update obj to the values given in row.
152        """
[7643]153        # Remove student_id from row if empty
154        if row.has_key('student_id') and row['student_id'] is None:
155            row.pop('student_id')
[7656]156        items_changed = ''
[7497]157        for key, value in row.items():
158            # Set student password and all fields declared in interface.
[7522]159            if key == 'password' and value != '':
[7497]160                IUserAccount(obj).setPassword(value)
[7513]161            elif key == 'reg_state':
162                IWorkflowState(obj).setState(value)
[7522]163                msg = "State '%s' set" % value
164                history = IObjectHistory(obj)
165                history.addMessage(msg)
[7497]166            elif hasattr(obj, key):
167                setattr(obj, key, value)
[7656]168            items_changed += '%s=%s, ' % (key,value)
169        parent = self.getParent(row, site)
170        if hasattr(obj,'student_id'):
171            # Update mode: the student exists and we can get the student_id
172            parent.logger.info(
173                '%s - Student record updated: %s'
174                % (obj.student_id, items_changed))
175        else:
176            # Create mode: the student does not yet exist
177            parent.logger.info('Student data imported: %s' % items_changed)
[7497]178        return
179
[6849]180    def getMapping(self, path, headerfields, mode):
181        """Get a mapping from CSV file headerfields to actually used fieldnames.
182        """
183        result = dict()
184        reader = csv.reader(open(path, 'rb'))
185        raw_header = reader.next()
186        for num, field in enumerate(headerfields):
[6854]187            if field not in [
188                'student_id', 'reg_number', 'matric_number'] and mode == 'remove':
[6849]189                continue
190            if field == u'--IGNORE--':
191                # Skip ignored columns in failed and finished data files.
192                continue
193            result[raw_header[num]] = field
194        return result
195
196    def checkConversion(self, row, mode='create'):
197        """Validates all values in row.
198        """
[7643]199        iface = self.iface
[6849]200        if mode in ['update', 'remove']:
201            if self.getLocator(row) == 'reg_number':
202                iface = IStudentUpdateByRegNo
203            elif self.getLocator(row) == 'matric_number':
204                iface = IStudentUpdateByMatricNo
205        converter = IObjectConverter(iface)
206        errs, inv_errs, conv_dict =  converter.fromStringDict(
207            row, self.factory_name)
[7513]208        if row.has_key('reg_state') and \
209            not row['reg_state'] in IMPORTABLE_STATES:
[7522]210            if row['reg_state'] != '':
211                errs.append(('reg_state','not allowed'))
212            else:
213                errs.append(('reg_state','no value provided'))
[6849]214        return errs, inv_errs, conv_dict
215
[6825]216class StudentStudyCourseProcessor(BatchProcessor):
217    """A batch processor for IStudentStudyCourse objects.
218    """
219    grok.implements(IBatchProcessor)
220    grok.provides(IBatchProcessor)
221    grok.context(Interface)
[6837]222    util_name = 'studycourseupdater'
[6825]223    grok.name(util_name)
224
[7933]225    name = u'StudentStudyCourse Processor (update only)'
[7532]226    iface = IStudentStudyCourse
[6825]227    factory_name = 'waeup.StudentStudyCourse'
228
[6849]229    location_fields = []
230
[6841]231    mode = None
232
[6825]233    @property
234    def available_fields(self):
235        return sorted(list(set(
[6843]236            ['student_id','reg_number','matric_number'] + getFields(
237                self.iface).keys())))
[6825]238
[6837]239    def checkHeaders(self, headerfields, mode='ignore'):
[6854]240        if not 'reg_number' in headerfields and not 'student_id' \
241            in headerfields and not 'matric_number' in headerfields:
[6825]242            raise FatalCSVError(
[6854]243                "Need at least columns student_id " +
244                "or reg_number or matric_number for import!")
[6834]245        # Check for fields to be ignored...
[6825]246        not_ignored_fields = [x for x in headerfields
247                              if not x.startswith('--')]
248        if len(set(not_ignored_fields)) < len(not_ignored_fields):
249            raise FatalCSVError(
250                "Double headers: each column name may only appear once.")
251        return True
252
[7267]253    def getParent(self, row, site):
[6846]254        if not 'students' in site.keys():
[6849]255            return None
[6846]256        if 'student_id' in row.keys() and row['student_id']:
[6825]257            if row['student_id'] in site['students']:
258                student = site['students'][row['student_id']]
259                return student
[6843]260        elif 'reg_number' in row.keys() and row['reg_number']:
[6825]261            reg_number = row['reg_number']
262            cat = queryUtility(ICatalog, name='students_catalog')
263            results = list(
264                cat.searchResults(reg_number=(reg_number, reg_number)))
265            if results:
266                return results[0]
[6843]267        elif 'matric_number' in row.keys() and row['matric_number']:
268            matric_number = row['matric_number']
269            cat = queryUtility(ICatalog, name='students_catalog')
270            results = list(
271                cat.searchResults(matric_number=(matric_number, matric_number)))
272            if results:
273                return results[0]
[6849]274        return None
[6825]275
[7267]276    def parentsExist(self, row, site):
277        return self.getParent(row, site) is not None
278
[6825]279    def entryExists(self, row, site):
[7534]280        return self.getEntry(row, site) is not None
[6825]281
282    def getEntry(self, row, site):
[7534]283        student = self.getParent(row, site)
[7536]284        if student is None:
[6825]285            return None
286        return student.get('studycourse')
[7429]287
288    def updateEntry(self, obj, row, site):
289        """Update obj to the values given in row.
290        """
[7656]291        items_changed = ''
[7429]292        for key, value in row.items():
293            # Skip fields not declared in interface.
294            if hasattr(obj, key):
295                setattr(obj, key, value)
[7656]296                if key == 'certificate':
297                    value = value.code
298            items_changed += '%s=%s, ' % (key,value)
299        parent = self.getParent(row, site)
300        parent.__parent__.logger.info(
301            '%s - Study course updated: %s'
302            % (parent.student_id, items_changed))
[7429]303        # Update the students_catalog
304        notify(grok.ObjectModifiedEvent(obj.__parent__))
305        return
306
[7532]307    def checkConversion(self, row, mode='ignore'):
308        """Validates all values in row.
309        """
310        converter = IObjectConverter(self.iface)
311        errs, inv_errs, conv_dict =  converter.fromStringDict(
312            row, self.factory_name)
313        # We have to check if current_level is in range of certificate.
[7548]314        # This is not done by the converter. This kind of conversion
315        # checking does only work if a combination of certificate and
316        # current_level is provided.
[7534]317        if conv_dict.has_key('certificate'):
318          certificate = conv_dict['certificate']
319          start_level = certificate.start_level
320          end_level = certificate.end_level
321          if conv_dict['current_level'] < start_level or \
[7612]322              conv_dict['current_level'] > end_level+120:
[7534]323              errs.append(('current_level','not in range'))
[7532]324        return errs, inv_errs, conv_dict
325
[7536]326class StudentStudyLevelProcessor(BatchProcessor):
327    """A batch processor for IStudentStudyLevel objects.
328    """
329    grok.implements(IBatchProcessor)
330    grok.provides(IBatchProcessor)
331    grok.context(Interface)
[7933]332    util_name = 'studylevelprocessor'
[7536]333    grok.name(util_name)
334
[7933]335    name = u'StudentStudyLevel Processor'
[7536]336    iface = IStudentStudyLevel
337    factory_name = 'waeup.StudentStudyLevel'
338
339    location_fields = []
340
341    mode = None
342
343    @property
344    def available_fields(self):
345        return sorted(list(set(
346            ['student_id','reg_number','matric_number','level'] + getFields(
347                self.iface).keys())))
348
349    def checkHeaders(self, headerfields, mode='ignore'):
350        if not 'reg_number' in headerfields and not 'student_id' \
351            in headerfields and not 'matric_number' in headerfields:
352            raise FatalCSVError(
353                "Need at least columns student_id " +
354                "or reg_number or matric_number for import!")
355        if not 'level' in headerfields:
356            raise FatalCSVError(
357                "Need level for import!")
358        # Check for fields to be ignored...
359        not_ignored_fields = [x for x in headerfields
360                              if not x.startswith('--')]
361        if len(set(not_ignored_fields)) < len(not_ignored_fields):
362            raise FatalCSVError(
363                "Double headers: each column name may only appear once.")
364        return True
365
366    def getParent(self, row, site):
367        if not 'students' in site.keys():
368            return None
369        if 'student_id' in row.keys() and row['student_id']:
370            if row['student_id'] in site['students']:
371                student = site['students'][row['student_id']]
372                return student['studycourse']
373        elif 'reg_number' in row.keys() and row['reg_number']:
374            reg_number = row['reg_number']
375            cat = queryUtility(ICatalog, name='students_catalog')
376            results = list(
377                cat.searchResults(reg_number=(reg_number, reg_number)))
378            if results:
379                return results[0]['studycourse']
380        elif 'matric_number' in row.keys() and row['matric_number']:
381            matric_number = row['matric_number']
382            cat = queryUtility(ICatalog, name='students_catalog')
383            results = list(
384                cat.searchResults(matric_number=(matric_number, matric_number)))
385            if results:
386                return results[0]['studycourse']
387        return None
388
389    def parentsExist(self, row, site):
390        return self.getParent(row, site) is not None
391
392    def entryExists(self, row, site):
393        return self.getEntry(row, site) is not None
394
395    def getEntry(self, row, site):
396        studycourse = self.getParent(row, site)
397        if studycourse is None:
398            return None
399        return studycourse.get(row['level'])
400
401    def addEntry(self, obj, row, site):
402        parent = self.getParent(row, site)
403        obj.level = int(row['level'])
404        parent[row['level']] = obj
405        return
406
407    def checkConversion(self, row, mode='ignore'):
408        """Validates all values in row.
409        """
410        converter = IObjectConverter(self.iface)
411        errs, inv_errs, conv_dict =  converter.fromStringDict(
412            row, self.factory_name)
413        # We have to check if level is a valid integer.
[7548]414        # This is not done by the converter.
[7536]415        try:
416            level = int(row['level'])
[7612]417            if level not in range(0,700,10):
[7536]418                errs.append(('level','no valid integer'))
419        except ValueError:
420            errs.append(('level','no integer'))
421        return errs, inv_errs, conv_dict
[7548]422
423class CourseTicketProcessor(BatchProcessor):
424    """A batch processor for ICourseTicket objects.
425    """
426    grok.implements(IBatchProcessor)
427    grok.provides(IBatchProcessor)
428    grok.context(Interface)
[7933]429    util_name = 'courseticketprocessor'
[7548]430    grok.name(util_name)
431
[7933]432    name = u'CourseTicket Processor'
[7548]433    iface = ICourseTicket
434    factory_name = 'waeup.CourseTicket'
435
436    location_fields = []
437
438    mode = None
439
440    @property
441    def available_fields(self):
442        return sorted(list(set(
443            ['student_id','reg_number','matric_number','level','code'] + getFields(
444                self.iface).keys())))
445
446    def checkHeaders(self, headerfields, mode='ignore'):
447        if not 'reg_number' in headerfields and not 'student_id' \
448            in headerfields and not 'matric_number' in headerfields:
449            raise FatalCSVError(
450                "Need at least columns student_id " +
451                "or reg_number or matric_number for import!")
452        if not 'level' in headerfields:
453            raise FatalCSVError(
454                "Need level for import!")
455        if not 'code' in headerfields:
456            raise FatalCSVError(
457                "Need code for import!")
458        # Check for fields to be ignored...
459        not_ignored_fields = [x for x in headerfields
460                              if not x.startswith('--')]
461        if len(set(not_ignored_fields)) < len(not_ignored_fields):
462            raise FatalCSVError(
463                "Double headers: each column name may only appear once.")
464        return True
465
466    def getParent(self, row, site):
467        if not 'students' in site.keys():
468            return None
469        if 'student_id' in row.keys() and row['student_id']:
470            if row['student_id'] in site['students']:
471                student = site['students'][row['student_id']]
472                return student['studycourse'].get(row['level'])
473        elif 'reg_number' in row.keys() and row['reg_number']:
474            reg_number = row['reg_number']
475            cat = queryUtility(ICatalog, name='students_catalog')
476            results = list(
477                cat.searchResults(reg_number=(reg_number, reg_number)))
478            if results:
479                return results[0]['studycourse'].get(row['level'])
480        elif 'matric_number' in row.keys() and row['matric_number']:
481            matric_number = row['matric_number']
482            cat = queryUtility(ICatalog, name='students_catalog')
483            results = list(
484                cat.searchResults(matric_number=(matric_number, matric_number)))
485            if results:
486                return results[0]['studycourse'].get(row['level'])
487        return None
488
489    def parentsExist(self, row, site):
490        return self.getParent(row, site) is not None
491
492    def entryExists(self, row, site):
493        return self.getEntry(row, site) is not None
494
495    def getEntry(self, row, site):
496        level = self.getParent(row, site)
497        if level is None:
498            return None
499        return level.get(row['code'])
500
501    def addEntry(self, obj, row, site):
502        parent = self.getParent(row, site)
503        catalog = getUtility(ICatalog, name='courses_catalog')
504        entries = list(catalog.searchResults(code=(row['code'],row['code'])))
505        obj.fcode = entries[0].__parent__.__parent__.__parent__.code
506        obj.dcode = entries[0].__parent__.__parent__.code
507        obj.title = entries[0].title
508        obj.credits = entries[0].credits
509        obj.passmark = entries[0].passmark
510        obj.semester = entries[0].semester
511        parent[row['code']] = obj
512        return
513
514    def checkConversion(self, row, mode='ignore'):
515        """Validates all values in row.
516        """
517        converter = IObjectConverter(self.iface)
518        errs, inv_errs, conv_dict =  converter.fromStringDict(
519            row, self.factory_name)
520        # We have to check if course really exists.
521        # This is not done by the converter.
522        catalog = getUtility(ICatalog, name='courses_catalog')
523        entries = catalog.searchResults(code=(row['code'],row['code']))
524        if len(entries) == 0:
525            errs.append(('code','non-existent'))
526            return errs, inv_errs, conv_dict
[7623]527        return errs, inv_errs, conv_dict
528
529class StudentOnlinePaymentProcessor(BatchProcessor):
530    """A batch processor for IStudentOnlinePayment objects.
531    """
532    grok.implements(IBatchProcessor)
533    grok.provides(IBatchProcessor)
534    grok.context(Interface)
[7933]535    util_name = 'paymentprocessor'
[7623]536    grok.name(util_name)
537
[7933]538    name = u'Payment Processor'
[7623]539    iface = IStudentOnlinePayment
540    factory_name = 'waeup.StudentOnlinePayment'
541
542    location_fields = []
543
544    mode = None
545
546    @property
547    def available_fields(self):
548        return sorted(list(set(
549            ['student_id','reg_number','matric_number','p_id'] + getFields(
550                self.iface).keys())))
551
552    def checkHeaders(self, headerfields, mode='ignore'):
553        if not 'reg_number' in headerfields and not 'student_id' \
554            in headerfields and not 'matric_number' in headerfields:
555            raise FatalCSVError(
556                "Need at least columns student_id " +
557                "or reg_number or matric_number for import!")
558        if not 'p_id' in headerfields:
559            raise FatalCSVError(
560                "Need p_id for import!")
561        # Check for fields to be ignored...
562        not_ignored_fields = [x for x in headerfields
563                              if not x.startswith('--')]
564        if len(set(not_ignored_fields)) < len(not_ignored_fields):
565            raise FatalCSVError(
566                "Double headers: each column name may only appear once.")
567        return True
568
569    def getParent(self, row, site):
570        if not 'students' in site.keys():
571            return None
572        if 'student_id' in row.keys() and row['student_id']:
573            if row['student_id'] in site['students']:
574                student = site['students'][row['student_id']]
575                return student['payments']
576        elif 'reg_number' in row.keys() and row['reg_number']:
577            reg_number = row['reg_number']
578            cat = queryUtility(ICatalog, name='students_catalog')
579            results = list(
580                cat.searchResults(reg_number=(reg_number, reg_number)))
581            if results:
582                return results[0]['payments']
583        elif 'matric_number' in row.keys() and row['matric_number']:
584            matric_number = row['matric_number']
585            cat = queryUtility(ICatalog, name='students_catalog')
586            results = list(
587                cat.searchResults(matric_number=(matric_number, matric_number)))
588            if results:
589                return results[0]['payments']
590        return None
591
592    def parentsExist(self, row, site):
593        return self.getParent(row, site) is not None
594
595    def entryExists(self, row, site):
596        return self.getEntry(row, site) is not None
597
598    def getEntry(self, row, site):
599        payments = self.getParent(row, site)
600        if payments is None:
601            return None
[7626]602        # We can use the hash symbol at the end of p_id in import files
603        # to avoid annoying automatic number transformation
604        # by Excel or Calc
605        p_id = row['p_id'].strip('#')
606        if p_id.startswith('p'):
607            entry = payments.get(p_id)
[7623]608        else:
609            # For data migration from old SRP
[7626]610            entry = payments.get('p' + p_id[6:])
[7623]611        return entry
612
613    def addEntry(self, obj, row, site):
614        parent = self.getParent(row, site)
[7626]615        p_id = row['p_id'].strip('#')
616        if not p_id.startswith('p'):
[7623]617            # For data migration from old SRP
[7626]618            obj.p_id = 'p' + p_id[6:]
[7623]619            parent[obj.p_id] = obj
620        else:
[7626]621            parent[p_id] = obj
[7623]622        return
623
624    def checkConversion(self, row, mode='ignore'):
625        """Validates all values in row.
626        """
627        converter = IObjectConverter(self.iface)
628        errs, inv_errs, conv_dict =  converter.fromStringDict(
629            row, self.factory_name)
630        # We have to check p_id.
[7626]631        p_id = row['p_id'].strip('#')
632        if p_id.startswith('p'):
633            if not len(p_id) == 14:
[7623]634                errs.append(('p_id','invalid length'))
635                return errs, inv_errs, conv_dict
636        else:
[7626]637            if not len(p_id) == 19:
[7623]638                errs.append(('p_id','invalid length'))
639                return errs, inv_errs, conv_dict
640        return errs, inv_errs, conv_dict
Note: See TracBrowser for help on using the repository browser.