source: main/waeup.sirp/trunk/src/waeup/sirp/students/batching.py @ 7640

Last change on this file since 7640 was 7626, checked in by Henrik Bettermann, 13 years ago

We can use the hash symbol at the end of p_id in import files
to avoid annoying automatic number transformation
by Excel or Calc.

  • Property svn:keywords set to Id
File size: 23.5 KB
RevLine 
[7191]1## $Id: batching.py 7626 2012-02-10 20:26:34Z henrik $
2##
3## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
[7433]18"""Batch processing components for student objects.
[6821]19
20Batch processors eat CSV files to add, update or remove large numbers
21of certain kinds of objects at once.
22
[7261]23Here we define the processors for students specific objects like
24students, studycourses, payment tickets and accommodation tickets.
[6821]25"""
26import grok
[6849]27import csv
[6821]28from zope.interface import Interface
[6825]29from zope.schema import getFields
[7548]30from zope.component import queryUtility, getUtility
[7429]31from zope.event import notify
[6825]32from zope.catalog.interfaces import ICatalog
[7513]33from hurry.workflow.interfaces import IWorkflowState
[6849]34from waeup.sirp.interfaces import (
[7522]35    IBatchProcessor, FatalCSVError, IObjectConverter, IUserAccount,
36    IObjectHistory)
[6825]37from waeup.sirp.students.interfaces import (
[7532]38    IStudent, IStudentStudyCourse,
[7536]39    IStudentUpdateByRegNo, IStudentUpdateByMatricNo,
[7623]40    IStudentStudyLevel, ICourseTicket,
41    IStudentOnlinePayment)
[7513]42from waeup.sirp.students.workflow import  IMPORTABLE_STATES
[6821]43from waeup.sirp.utils.batching import BatchProcessor
[7522]44from waeup.sirp.utils.helpers import get_current_principal
[6821]45
46class StudentProcessor(BatchProcessor):
47    """A batch processor for IStudent objects.
48    """
49    grok.implements(IBatchProcessor)
50    grok.provides(IBatchProcessor)
51    grok.context(Interface)
52    util_name = 'studentimporter'
53    grok.name(util_name)
54
55    name = u'Student Importer'
56    iface = IStudent
57
[6849]58    location_fields = []
[6821]59    factory_name = 'waeup.Student'
60
[6841]61    mode = None
62
[6821]63    @property
[6849]64    def available_fields(self):
65        return sorted(list(set(
[7513]66            ['student_id','reg_number','matric_number',
67            'password', 'reg_state'] + getFields(
[6849]68                self.iface).keys())))
[6821]69
[6849]70    def checkHeaders(self, headerfields, mode='create'):
[6854]71        if not 'reg_number' in headerfields and not 'student_id' \
72            in headerfields and not 'matric_number' in headerfields:
[6849]73            raise FatalCSVError(
[6854]74                "Need at least columns student_id or reg_number " +
75                "or matric_number for import!")
[6849]76        if mode == 'create':
77            for field in self.required_fields:
78                if not field in headerfields:
79                    raise FatalCSVError(
80                        "Need at least columns %s for import!" %
81                        ', '.join(["'%s'" % x for x in self.required_fields]))
82        # Check for fields to be ignored...
83        not_ignored_fields = [x for x in headerfields
84                              if not x.startswith('--')]
85        if len(set(not_ignored_fields)) < len(not_ignored_fields):
86            raise FatalCSVError(
87                "Double headers: each column name may only appear once.")
88        return True
89
[6821]90    def parentsExist(self, row, site):
91        return 'students' in site.keys()
92
[6849]93    def getLocator(self, row):
[7269]94        if row.get('student_id',None):
[6849]95            return 'student_id'
[7269]96        elif row.get('reg_number',None):
[6849]97            return 'reg_number'
[7269]98        elif row.get('matric_number',None):
[6849]99            return 'matric_number'
100        else:
101            return None
102
[6821]103    # The entry never exists in create mode.
104    def entryExists(self, row, site):
[7267]105        return self.getEntry(row, site) is not None
106
107    def getParent(self, row, site):
108        return site['students']
109
110    def getEntry(self, row, site):
[6846]111        if not 'students' in site.keys():
[6849]112            return None
113        if self.getLocator(row) == 'student_id':
[6846]114            if row['student_id'] in site['students']:
115                student = site['students'][row['student_id']]
116                return student
[6849]117        elif self.getLocator(row) == 'reg_number':
[6846]118            reg_number = row['reg_number']
119            cat = queryUtility(ICatalog, name='students_catalog')
120            results = list(
121                cat.searchResults(reg_number=(reg_number, reg_number)))
122            if results:
123                return results[0]
[6849]124        elif self.getLocator(row) == 'matric_number':
[6846]125            matric_number = row['matric_number']
126            cat = queryUtility(ICatalog, name='students_catalog')
127            results = list(
128                cat.searchResults(matric_number=(matric_number, matric_number)))
129            if results:
130                return results[0]
[6849]131        return None
[6821]132
[7267]133       
[6821]134    def addEntry(self, obj, row, site):
135        parent = self.getParent(row, site)
136        parent.addStudent(obj)
[7522]137        # In some tests we don't have a students container or a user
138        try:
139            user = get_current_principal()
140            parent.logger.info('%s - %s - Student record imported' % (
141                user.id,obj.student_id))
142            history = IObjectHistory(obj)
143            history.addMessage('Student record imported')
144        except (TypeError, AttributeError):
145            pass
[6821]146        return
147
148    def delEntry(self, row, site):
[7267]149        student = self.getEntry(row, site)
[7263]150        if student is not None:
[6846]151            parent = self.getParent(row, site)
152            del parent[student.student_id]
[6821]153        pass
[6825]154
[7497]155    def updateEntry(self, obj, row, site):
156        """Update obj to the values given in row.
157        """
158        for key, value in row.items():
159            # Set student password and all fields declared in interface.
[7522]160            if key == 'password' and value != '':
[7497]161                IUserAccount(obj).setPassword(value)
[7513]162            elif key == 'reg_state':
163                IWorkflowState(obj).setState(value)
[7522]164                msg = "State '%s' set" % value
165                history = IObjectHistory(obj)
166                history.addMessage(msg)
[7497]167            elif hasattr(obj, key):
168                setattr(obj, key, value)
169        return
170
[6849]171    def getMapping(self, path, headerfields, mode):
172        """Get a mapping from CSV file headerfields to actually used fieldnames.
173        """
174        result = dict()
175        reader = csv.reader(open(path, 'rb'))
176        raw_header = reader.next()
177        for num, field in enumerate(headerfields):
[6854]178            if field not in [
179                'student_id', 'reg_number', 'matric_number'] and mode == 'remove':
[6849]180                continue
181            if field == u'--IGNORE--':
182                # Skip ignored columns in failed and finished data files.
183                continue
184            result[raw_header[num]] = field
185        return result
186
187    def checkConversion(self, row, mode='create'):
188        """Validates all values in row.
189        """
190        if mode in ['update', 'remove']:
191            if self.getLocator(row) == 'reg_number':
192                iface = IStudentUpdateByRegNo
193            elif self.getLocator(row) == 'matric_number':
194                iface = IStudentUpdateByMatricNo
195        else:
196            iface = self.iface
197        converter = IObjectConverter(iface)
198        errs, inv_errs, conv_dict =  converter.fromStringDict(
199            row, self.factory_name)
[7513]200        if row.has_key('reg_state') and \
201            not row['reg_state'] in IMPORTABLE_STATES:
[7522]202            if row['reg_state'] != '':
203                errs.append(('reg_state','not allowed'))
204            else:
205                errs.append(('reg_state','no value provided'))
[6849]206        return errs, inv_errs, conv_dict
207
[6825]208class StudentStudyCourseProcessor(BatchProcessor):
209    """A batch processor for IStudentStudyCourse objects.
210    """
211    grok.implements(IBatchProcessor)
212    grok.provides(IBatchProcessor)
213    grok.context(Interface)
[6837]214    util_name = 'studycourseupdater'
[6825]215    grok.name(util_name)
216
[6837]217    name = u'StudentStudyCourse Importer (update only)'
[7532]218    iface = IStudentStudyCourse
[6825]219    factory_name = 'waeup.StudentStudyCourse'
220
[6849]221    location_fields = []
222
[6841]223    mode = None
224
[6825]225    @property
226    def available_fields(self):
227        return sorted(list(set(
[6843]228            ['student_id','reg_number','matric_number'] + getFields(
229                self.iface).keys())))
[6825]230
[6837]231    def checkHeaders(self, headerfields, mode='ignore'):
[6854]232        if not 'reg_number' in headerfields and not 'student_id' \
233            in headerfields and not 'matric_number' in headerfields:
[6825]234            raise FatalCSVError(
[6854]235                "Need at least columns student_id " +
236                "or reg_number or matric_number for import!")
[6834]237        # Check for fields to be ignored...
[6825]238        not_ignored_fields = [x for x in headerfields
239                              if not x.startswith('--')]
240        if len(set(not_ignored_fields)) < len(not_ignored_fields):
241            raise FatalCSVError(
242                "Double headers: each column name may only appear once.")
243        return True
244
[7267]245    def getParent(self, row, site):
[6846]246        if not 'students' in site.keys():
[6849]247            return None
[6846]248        if 'student_id' in row.keys() and row['student_id']:
[6825]249            if row['student_id'] in site['students']:
250                student = site['students'][row['student_id']]
251                return student
[6843]252        elif 'reg_number' in row.keys() and row['reg_number']:
[6825]253            reg_number = row['reg_number']
254            cat = queryUtility(ICatalog, name='students_catalog')
255            results = list(
256                cat.searchResults(reg_number=(reg_number, reg_number)))
257            if results:
258                return results[0]
[6843]259        elif 'matric_number' in row.keys() and row['matric_number']:
260            matric_number = row['matric_number']
261            cat = queryUtility(ICatalog, name='students_catalog')
262            results = list(
263                cat.searchResults(matric_number=(matric_number, matric_number)))
264            if results:
265                return results[0]
[6849]266        return None
[6825]267
[7267]268    def parentsExist(self, row, site):
269        return self.getParent(row, site) is not None
270
[6825]271    def entryExists(self, row, site):
[7534]272        return self.getEntry(row, site) is not None
[6825]273
274    def getEntry(self, row, site):
[7534]275        student = self.getParent(row, site)
[7536]276        if student is None:
[6825]277            return None
278        return student.get('studycourse')
[7429]279
280    def updateEntry(self, obj, row, site):
281        """Update obj to the values given in row.
282        """
283        for key, value in row.items():
284            # Skip fields not declared in interface.
285            if hasattr(obj, key):
286                setattr(obj, key, value)
287        # Update the students_catalog
288        notify(grok.ObjectModifiedEvent(obj.__parent__))
289        return
290
[7532]291    def checkConversion(self, row, mode='ignore'):
292        """Validates all values in row.
293        """
294        converter = IObjectConverter(self.iface)
295        errs, inv_errs, conv_dict =  converter.fromStringDict(
296            row, self.factory_name)
297        # We have to check if current_level is in range of certificate.
[7548]298        # This is not done by the converter. This kind of conversion
299        # checking does only work if a combination of certificate and
300        # current_level is provided.
[7534]301        if conv_dict.has_key('certificate'):
302          certificate = conv_dict['certificate']
303          start_level = certificate.start_level
304          end_level = certificate.end_level
305          if conv_dict['current_level'] < start_level or \
[7612]306              conv_dict['current_level'] > end_level+120:
[7534]307              errs.append(('current_level','not in range'))
[7532]308        return errs, inv_errs, conv_dict
309
[7536]310class StudentStudyLevelProcessor(BatchProcessor):
311    """A batch processor for IStudentStudyLevel objects.
312    """
313    grok.implements(IBatchProcessor)
314    grok.provides(IBatchProcessor)
315    grok.context(Interface)
316    util_name = 'studylevelimporter'
317    grok.name(util_name)
318
319    name = u'StudentStudyLevel Importer'
320    iface = IStudentStudyLevel
321    factory_name = 'waeup.StudentStudyLevel'
322
323    location_fields = []
324
325    mode = None
326
327    @property
328    def available_fields(self):
329        return sorted(list(set(
330            ['student_id','reg_number','matric_number','level'] + getFields(
331                self.iface).keys())))
332
333    def checkHeaders(self, headerfields, mode='ignore'):
334        if not 'reg_number' in headerfields and not 'student_id' \
335            in headerfields and not 'matric_number' in headerfields:
336            raise FatalCSVError(
337                "Need at least columns student_id " +
338                "or reg_number or matric_number for import!")
339        if not 'level' in headerfields:
340            raise FatalCSVError(
341                "Need level for import!")
342        # Check for fields to be ignored...
343        not_ignored_fields = [x for x in headerfields
344                              if not x.startswith('--')]
345        if len(set(not_ignored_fields)) < len(not_ignored_fields):
346            raise FatalCSVError(
347                "Double headers: each column name may only appear once.")
348        return True
349
350    def getParent(self, row, site):
351        if not 'students' in site.keys():
352            return None
353        if 'student_id' in row.keys() and row['student_id']:
354            if row['student_id'] in site['students']:
355                student = site['students'][row['student_id']]
356                return student['studycourse']
357        elif 'reg_number' in row.keys() and row['reg_number']:
358            reg_number = row['reg_number']
359            cat = queryUtility(ICatalog, name='students_catalog')
360            results = list(
361                cat.searchResults(reg_number=(reg_number, reg_number)))
362            if results:
363                return results[0]['studycourse']
364        elif 'matric_number' in row.keys() and row['matric_number']:
365            matric_number = row['matric_number']
366            cat = queryUtility(ICatalog, name='students_catalog')
367            results = list(
368                cat.searchResults(matric_number=(matric_number, matric_number)))
369            if results:
370                return results[0]['studycourse']
371        return None
372
373    def parentsExist(self, row, site):
374        return self.getParent(row, site) is not None
375
376    def entryExists(self, row, site):
377        return self.getEntry(row, site) is not None
378
379    def getEntry(self, row, site):
380        studycourse = self.getParent(row, site)
381        if studycourse is None:
382            return None
383        return studycourse.get(row['level'])
384
385    def addEntry(self, obj, row, site):
386        parent = self.getParent(row, site)
387        obj.level = int(row['level'])
388        parent[row['level']] = obj
389        return
390
391    def checkConversion(self, row, mode='ignore'):
392        """Validates all values in row.
393        """
394        converter = IObjectConverter(self.iface)
395        errs, inv_errs, conv_dict =  converter.fromStringDict(
396            row, self.factory_name)
397        # We have to check if level is a valid integer.
[7548]398        # This is not done by the converter.
[7536]399        try:
400            level = int(row['level'])
[7612]401            if level not in range(0,700,10):
[7536]402                errs.append(('level','no valid integer'))
403        except ValueError:
404            errs.append(('level','no integer'))
405        return errs, inv_errs, conv_dict
[7548]406
407class CourseTicketProcessor(BatchProcessor):
408    """A batch processor for ICourseTicket objects.
409    """
410    grok.implements(IBatchProcessor)
411    grok.provides(IBatchProcessor)
412    grok.context(Interface)
413    util_name = 'courseticketimporter'
414    grok.name(util_name)
415
416    name = u'CourseTicket Importer'
417    iface = ICourseTicket
418    factory_name = 'waeup.CourseTicket'
419
420    location_fields = []
421
422    mode = None
423
424    @property
425    def available_fields(self):
426        return sorted(list(set(
427            ['student_id','reg_number','matric_number','level','code'] + getFields(
428                self.iface).keys())))
429
430    def checkHeaders(self, headerfields, mode='ignore'):
431        if not 'reg_number' in headerfields and not 'student_id' \
432            in headerfields and not 'matric_number' in headerfields:
433            raise FatalCSVError(
434                "Need at least columns student_id " +
435                "or reg_number or matric_number for import!")
436        if not 'level' in headerfields:
437            raise FatalCSVError(
438                "Need level for import!")
439        if not 'code' in headerfields:
440            raise FatalCSVError(
441                "Need code for import!")
442        # Check for fields to be ignored...
443        not_ignored_fields = [x for x in headerfields
444                              if not x.startswith('--')]
445        if len(set(not_ignored_fields)) < len(not_ignored_fields):
446            raise FatalCSVError(
447                "Double headers: each column name may only appear once.")
448        return True
449
450    def getParent(self, row, site):
451        if not 'students' in site.keys():
452            return None
453        if 'student_id' in row.keys() and row['student_id']:
454            if row['student_id'] in site['students']:
455                student = site['students'][row['student_id']]
456                return student['studycourse'].get(row['level'])
457        elif 'reg_number' in row.keys() and row['reg_number']:
458            reg_number = row['reg_number']
459            #import pdb; pdb.set_trace()
460            cat = queryUtility(ICatalog, name='students_catalog')
461            results = list(
462                cat.searchResults(reg_number=(reg_number, reg_number)))
463            if results:
464                return results[0]['studycourse'].get(row['level'])
465        elif 'matric_number' in row.keys() and row['matric_number']:
466            matric_number = row['matric_number']
467            cat = queryUtility(ICatalog, name='students_catalog')
468            results = list(
469                cat.searchResults(matric_number=(matric_number, matric_number)))
470            if results:
471                return results[0]['studycourse'].get(row['level'])
472        return None
473
474    def parentsExist(self, row, site):
475        return self.getParent(row, site) is not None
476
477    def entryExists(self, row, site):
478        return self.getEntry(row, site) is not None
479
480    def getEntry(self, row, site):
481        level = self.getParent(row, site)
482        if level is None:
483            return None
484        return level.get(row['code'])
485
486    def addEntry(self, obj, row, site):
487        parent = self.getParent(row, site)
488        catalog = getUtility(ICatalog, name='courses_catalog')
489        entries = list(catalog.searchResults(code=(row['code'],row['code'])))
490        obj.fcode = entries[0].__parent__.__parent__.__parent__.code
491        obj.dcode = entries[0].__parent__.__parent__.code
492        obj.title = entries[0].title
493        obj.credits = entries[0].credits
494        obj.passmark = entries[0].passmark
495        obj.semester = entries[0].semester
496        parent[row['code']] = obj
497        return
498
499    def checkConversion(self, row, mode='ignore'):
500        """Validates all values in row.
501        """
502        converter = IObjectConverter(self.iface)
503        errs, inv_errs, conv_dict =  converter.fromStringDict(
504            row, self.factory_name)
505        # We have to check if course really exists.
506        # This is not done by the converter.
507        catalog = getUtility(ICatalog, name='courses_catalog')
508        entries = catalog.searchResults(code=(row['code'],row['code']))
509        if len(entries) == 0:
510            errs.append(('code','non-existent'))
511            return errs, inv_errs, conv_dict
[7623]512        return errs, inv_errs, conv_dict
513
514class StudentOnlinePaymentProcessor(BatchProcessor):
515    """A batch processor for IStudentOnlinePayment objects.
516    """
517    grok.implements(IBatchProcessor)
518    grok.provides(IBatchProcessor)
519    grok.context(Interface)
520    util_name = 'paymentimporter'
521    grok.name(util_name)
522
523    name = u'Payment Importer'
524    iface = IStudentOnlinePayment
525    factory_name = 'waeup.StudentOnlinePayment'
526
527    location_fields = []
528
529    mode = None
530
531    @property
532    def available_fields(self):
533        return sorted(list(set(
534            ['student_id','reg_number','matric_number','p_id'] + getFields(
535                self.iface).keys())))
536
537    def checkHeaders(self, headerfields, mode='ignore'):
538        if not 'reg_number' in headerfields and not 'student_id' \
539            in headerfields and not 'matric_number' in headerfields:
540            raise FatalCSVError(
541                "Need at least columns student_id " +
542                "or reg_number or matric_number for import!")
543        if not 'p_id' in headerfields:
544            raise FatalCSVError(
545                "Need p_id for import!")
546        # Check for fields to be ignored...
547        not_ignored_fields = [x for x in headerfields
548                              if not x.startswith('--')]
549        if len(set(not_ignored_fields)) < len(not_ignored_fields):
550            raise FatalCSVError(
551                "Double headers: each column name may only appear once.")
552        return True
553
554    def getParent(self, row, site):
555        if not 'students' in site.keys():
556            return None
557        if 'student_id' in row.keys() and row['student_id']:
558            if row['student_id'] in site['students']:
559                student = site['students'][row['student_id']]
560                return student['payments']
561        elif 'reg_number' in row.keys() and row['reg_number']:
562            reg_number = row['reg_number']
563            cat = queryUtility(ICatalog, name='students_catalog')
564            results = list(
565                cat.searchResults(reg_number=(reg_number, reg_number)))
566            if results:
567                return results[0]['payments']
568        elif 'matric_number' in row.keys() and row['matric_number']:
569            matric_number = row['matric_number']
570            cat = queryUtility(ICatalog, name='students_catalog')
571            results = list(
572                cat.searchResults(matric_number=(matric_number, matric_number)))
573            if results:
574                return results[0]['payments']
575        return None
576
577    def parentsExist(self, row, site):
578        return self.getParent(row, site) is not None
579
580    def entryExists(self, row, site):
581        return self.getEntry(row, site) is not None
582
583    def getEntry(self, row, site):
584        payments = self.getParent(row, site)
585        if payments is None:
586            return None
[7626]587        # We can use the hash symbol at the end of p_id in import files
588        # to avoid annoying automatic number transformation
589        # by Excel or Calc
590        p_id = row['p_id'].strip('#')
591        if p_id.startswith('p'):
592            entry = payments.get(p_id)
[7623]593        else:
594            # For data migration from old SRP
[7626]595            entry = payments.get('p' + p_id[6:])
[7623]596        return entry
597
598    def addEntry(self, obj, row, site):
599        parent = self.getParent(row, site)
[7626]600        p_id = row['p_id'].strip('#')
601        if not p_id.startswith('p'):
[7623]602            # For data migration from old SRP
[7626]603            obj.p_id = 'p' + p_id[6:]
[7623]604            parent[obj.p_id] = obj
605        else:
[7626]606            parent[p_id] = obj
[7623]607        return
608
609    def checkConversion(self, row, mode='ignore'):
610        """Validates all values in row.
611        """
612        converter = IObjectConverter(self.iface)
613        errs, inv_errs, conv_dict =  converter.fromStringDict(
614            row, self.factory_name)
615        # We have to check p_id.
[7626]616        p_id = row['p_id'].strip('#')
617        if p_id.startswith('p'):
618            if not len(p_id) == 14:
[7623]619                errs.append(('p_id','invalid length'))
620                return errs, inv_errs, conv_dict
621        else:
[7626]622            if not len(p_id) == 19:
[7623]623                errs.append(('p_id','invalid length'))
624                return errs, inv_errs, conv_dict
625        return errs, inv_errs, conv_dict
Note: See TracBrowser for help on using the repository browser.