source: main/waeup.sirp/trunk/src/waeup/sirp/students/batching.py @ 7645

Last change on this file since 7645 was 7643, checked in by Henrik Bettermann, 13 years ago

Enable mixed imports of rows with or without student_id.

  • Property svn:keywords set to Id
File size: 23.6 KB
RevLine 
[7191]1## $Id: batching.py 7643 2012-02-14 09:32:28Z henrik $
2##
3## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
[7433]18"""Batch processing components for student objects.
[6821]19
20Batch processors eat CSV files to add, update or remove large numbers
21of certain kinds of objects at once.
22
[7261]23Here we define the processors for students specific objects like
24students, studycourses, payment tickets and accommodation tickets.
[6821]25"""
26import grok
[6849]27import csv
[6821]28from zope.interface import Interface
[6825]29from zope.schema import getFields
[7548]30from zope.component import queryUtility, getUtility
[7429]31from zope.event import notify
[6825]32from zope.catalog.interfaces import ICatalog
[7513]33from hurry.workflow.interfaces import IWorkflowState
[6849]34from waeup.sirp.interfaces import (
[7522]35    IBatchProcessor, FatalCSVError, IObjectConverter, IUserAccount,
36    IObjectHistory)
[6825]37from waeup.sirp.students.interfaces import (
[7532]38    IStudent, IStudentStudyCourse,
[7536]39    IStudentUpdateByRegNo, IStudentUpdateByMatricNo,
[7623]40    IStudentStudyLevel, ICourseTicket,
41    IStudentOnlinePayment)
[7513]42from waeup.sirp.students.workflow import  IMPORTABLE_STATES
[6821]43from waeup.sirp.utils.batching import BatchProcessor
[7522]44from waeup.sirp.utils.helpers import get_current_principal
[6821]45
46class StudentProcessor(BatchProcessor):
47    """A batch processor for IStudent objects.
48    """
49    grok.implements(IBatchProcessor)
50    grok.provides(IBatchProcessor)
51    grok.context(Interface)
52    util_name = 'studentimporter'
53    grok.name(util_name)
54
55    name = u'Student Importer'
56    iface = IStudent
57
[6849]58    location_fields = []
[6821]59    factory_name = 'waeup.Student'
60
[6841]61    mode = None
62
[6821]63    @property
[6849]64    def available_fields(self):
65        return sorted(list(set(
[7513]66            ['student_id','reg_number','matric_number',
67            'password', 'reg_state'] + getFields(
[6849]68                self.iface).keys())))
[6821]69
[6849]70    def checkHeaders(self, headerfields, mode='create'):
[6854]71        if not 'reg_number' in headerfields and not 'student_id' \
72            in headerfields and not 'matric_number' in headerfields:
[6849]73            raise FatalCSVError(
[6854]74                "Need at least columns student_id or reg_number " +
75                "or matric_number for import!")
[6849]76        if mode == 'create':
77            for field in self.required_fields:
78                if not field in headerfields:
79                    raise FatalCSVError(
80                        "Need at least columns %s for import!" %
81                        ', '.join(["'%s'" % x for x in self.required_fields]))
82        # Check for fields to be ignored...
83        not_ignored_fields = [x for x in headerfields
84                              if not x.startswith('--')]
85        if len(set(not_ignored_fields)) < len(not_ignored_fields):
86            raise FatalCSVError(
87                "Double headers: each column name may only appear once.")
88        return True
89
[6821]90    def parentsExist(self, row, site):
91        return 'students' in site.keys()
92
[6849]93    def getLocator(self, row):
[7269]94        if row.get('student_id',None):
[6849]95            return 'student_id'
[7269]96        elif row.get('reg_number',None):
[6849]97            return 'reg_number'
[7269]98        elif row.get('matric_number',None):
[6849]99            return 'matric_number'
100        else:
101            return None
102
[6821]103    # The entry never exists in create mode.
104    def entryExists(self, row, site):
[7267]105        return self.getEntry(row, site) is not None
106
107    def getParent(self, row, site):
108        return site['students']
109
110    def getEntry(self, row, site):
[6846]111        if not 'students' in site.keys():
[6849]112            return None
113        if self.getLocator(row) == 'student_id':
[6846]114            if row['student_id'] in site['students']:
115                student = site['students'][row['student_id']]
116                return student
[6849]117        elif self.getLocator(row) == 'reg_number':
[6846]118            reg_number = row['reg_number']
119            cat = queryUtility(ICatalog, name='students_catalog')
120            results = list(
121                cat.searchResults(reg_number=(reg_number, reg_number)))
122            if results:
123                return results[0]
[6849]124        elif self.getLocator(row) == 'matric_number':
[6846]125            matric_number = row['matric_number']
126            cat = queryUtility(ICatalog, name='students_catalog')
127            results = list(
128                cat.searchResults(matric_number=(matric_number, matric_number)))
129            if results:
130                return results[0]
[6849]131        return None
[6821]132
[7267]133       
[6821]134    def addEntry(self, obj, row, site):
135        parent = self.getParent(row, site)
136        parent.addStudent(obj)
[7522]137        # In some tests we don't have a students container or a user
138        try:
139            user = get_current_principal()
140            parent.logger.info('%s - %s - Student record imported' % (
141                user.id,obj.student_id))
142            history = IObjectHistory(obj)
143            history.addMessage('Student record imported')
144        except (TypeError, AttributeError):
145            pass
[6821]146        return
147
148    def delEntry(self, row, site):
[7267]149        student = self.getEntry(row, site)
[7263]150        if student is not None:
[6846]151            parent = self.getParent(row, site)
152            del parent[student.student_id]
[6821]153        pass
[6825]154
[7497]155    def updateEntry(self, obj, row, site):
156        """Update obj to the values given in row.
157        """
[7643]158        # Remove student_id from row if empty
159        if row.has_key('student_id') and row['student_id'] is None:
160            row.pop('student_id')
[7497]161        for key, value in row.items():
162            # Set student password and all fields declared in interface.
[7522]163            if key == 'password' and value != '':
[7497]164                IUserAccount(obj).setPassword(value)
[7513]165            elif key == 'reg_state':
166                IWorkflowState(obj).setState(value)
[7522]167                msg = "State '%s' set" % value
168                history = IObjectHistory(obj)
169                history.addMessage(msg)
[7497]170            elif hasattr(obj, key):
171                setattr(obj, key, value)
172        return
173
[6849]174    def getMapping(self, path, headerfields, mode):
175        """Get a mapping from CSV file headerfields to actually used fieldnames.
176        """
177        result = dict()
178        reader = csv.reader(open(path, 'rb'))
179        raw_header = reader.next()
180        for num, field in enumerate(headerfields):
[6854]181            if field not in [
182                'student_id', 'reg_number', 'matric_number'] and mode == 'remove':
[6849]183                continue
184            if field == u'--IGNORE--':
185                # Skip ignored columns in failed and finished data files.
186                continue
187            result[raw_header[num]] = field
188        return result
189
190    def checkConversion(self, row, mode='create'):
191        """Validates all values in row.
192        """
[7643]193        iface = self.iface
[6849]194        if mode in ['update', 'remove']:
195            if self.getLocator(row) == 'reg_number':
196                iface = IStudentUpdateByRegNo
197            elif self.getLocator(row) == 'matric_number':
198                iface = IStudentUpdateByMatricNo
199        converter = IObjectConverter(iface)
200        errs, inv_errs, conv_dict =  converter.fromStringDict(
201            row, self.factory_name)
[7513]202        if row.has_key('reg_state') and \
203            not row['reg_state'] in IMPORTABLE_STATES:
[7522]204            if row['reg_state'] != '':
205                errs.append(('reg_state','not allowed'))
206            else:
207                errs.append(('reg_state','no value provided'))
[6849]208        return errs, inv_errs, conv_dict
209
[6825]210class StudentStudyCourseProcessor(BatchProcessor):
211    """A batch processor for IStudentStudyCourse objects.
212    """
213    grok.implements(IBatchProcessor)
214    grok.provides(IBatchProcessor)
215    grok.context(Interface)
[6837]216    util_name = 'studycourseupdater'
[6825]217    grok.name(util_name)
218
[6837]219    name = u'StudentStudyCourse Importer (update only)'
[7532]220    iface = IStudentStudyCourse
[6825]221    factory_name = 'waeup.StudentStudyCourse'
222
[6849]223    location_fields = []
224
[6841]225    mode = None
226
[6825]227    @property
228    def available_fields(self):
229        return sorted(list(set(
[6843]230            ['student_id','reg_number','matric_number'] + getFields(
231                self.iface).keys())))
[6825]232
[6837]233    def checkHeaders(self, headerfields, mode='ignore'):
[6854]234        if not 'reg_number' in headerfields and not 'student_id' \
235            in headerfields and not 'matric_number' in headerfields:
[6825]236            raise FatalCSVError(
[6854]237                "Need at least columns student_id " +
238                "or reg_number or matric_number for import!")
[6834]239        # Check for fields to be ignored...
[6825]240        not_ignored_fields = [x for x in headerfields
241                              if not x.startswith('--')]
242        if len(set(not_ignored_fields)) < len(not_ignored_fields):
243            raise FatalCSVError(
244                "Double headers: each column name may only appear once.")
245        return True
246
[7267]247    def getParent(self, row, site):
[6846]248        if not 'students' in site.keys():
[6849]249            return None
[6846]250        if 'student_id' in row.keys() and row['student_id']:
[6825]251            if row['student_id'] in site['students']:
252                student = site['students'][row['student_id']]
253                return student
[6843]254        elif 'reg_number' in row.keys() and row['reg_number']:
[6825]255            reg_number = row['reg_number']
256            cat = queryUtility(ICatalog, name='students_catalog')
257            results = list(
258                cat.searchResults(reg_number=(reg_number, reg_number)))
259            if results:
260                return results[0]
[6843]261        elif 'matric_number' in row.keys() and row['matric_number']:
262            matric_number = row['matric_number']
263            cat = queryUtility(ICatalog, name='students_catalog')
264            results = list(
265                cat.searchResults(matric_number=(matric_number, matric_number)))
266            if results:
267                return results[0]
[6849]268        return None
[6825]269
[7267]270    def parentsExist(self, row, site):
271        return self.getParent(row, site) is not None
272
[6825]273    def entryExists(self, row, site):
[7534]274        return self.getEntry(row, site) is not None
[6825]275
276    def getEntry(self, row, site):
[7534]277        student = self.getParent(row, site)
[7536]278        if student is None:
[6825]279            return None
280        return student.get('studycourse')
[7429]281
282    def updateEntry(self, obj, row, site):
283        """Update obj to the values given in row.
284        """
285        for key, value in row.items():
286            # Skip fields not declared in interface.
287            if hasattr(obj, key):
288                setattr(obj, key, value)
289        # Update the students_catalog
290        notify(grok.ObjectModifiedEvent(obj.__parent__))
291        return
292
[7532]293    def checkConversion(self, row, mode='ignore'):
294        """Validates all values in row.
295        """
296        converter = IObjectConverter(self.iface)
297        errs, inv_errs, conv_dict =  converter.fromStringDict(
298            row, self.factory_name)
299        # We have to check if current_level is in range of certificate.
[7548]300        # This is not done by the converter. This kind of conversion
301        # checking does only work if a combination of certificate and
302        # current_level is provided.
[7534]303        if conv_dict.has_key('certificate'):
304          certificate = conv_dict['certificate']
305          start_level = certificate.start_level
306          end_level = certificate.end_level
307          if conv_dict['current_level'] < start_level or \
[7612]308              conv_dict['current_level'] > end_level+120:
[7534]309              errs.append(('current_level','not in range'))
[7532]310        return errs, inv_errs, conv_dict
311
[7536]312class StudentStudyLevelProcessor(BatchProcessor):
313    """A batch processor for IStudentStudyLevel objects.
314    """
315    grok.implements(IBatchProcessor)
316    grok.provides(IBatchProcessor)
317    grok.context(Interface)
318    util_name = 'studylevelimporter'
319    grok.name(util_name)
320
321    name = u'StudentStudyLevel Importer'
322    iface = IStudentStudyLevel
323    factory_name = 'waeup.StudentStudyLevel'
324
325    location_fields = []
326
327    mode = None
328
329    @property
330    def available_fields(self):
331        return sorted(list(set(
332            ['student_id','reg_number','matric_number','level'] + getFields(
333                self.iface).keys())))
334
335    def checkHeaders(self, headerfields, mode='ignore'):
336        if not 'reg_number' in headerfields and not 'student_id' \
337            in headerfields and not 'matric_number' in headerfields:
338            raise FatalCSVError(
339                "Need at least columns student_id " +
340                "or reg_number or matric_number for import!")
341        if not 'level' in headerfields:
342            raise FatalCSVError(
343                "Need level for import!")
344        # Check for fields to be ignored...
345        not_ignored_fields = [x for x in headerfields
346                              if not x.startswith('--')]
347        if len(set(not_ignored_fields)) < len(not_ignored_fields):
348            raise FatalCSVError(
349                "Double headers: each column name may only appear once.")
350        return True
351
352    def getParent(self, row, site):
353        if not 'students' in site.keys():
354            return None
355        if 'student_id' in row.keys() and row['student_id']:
356            if row['student_id'] in site['students']:
357                student = site['students'][row['student_id']]
358                return student['studycourse']
359        elif 'reg_number' in row.keys() and row['reg_number']:
360            reg_number = row['reg_number']
361            cat = queryUtility(ICatalog, name='students_catalog')
362            results = list(
363                cat.searchResults(reg_number=(reg_number, reg_number)))
364            if results:
365                return results[0]['studycourse']
366        elif 'matric_number' in row.keys() and row['matric_number']:
367            matric_number = row['matric_number']
368            cat = queryUtility(ICatalog, name='students_catalog')
369            results = list(
370                cat.searchResults(matric_number=(matric_number, matric_number)))
371            if results:
372                return results[0]['studycourse']
373        return None
374
375    def parentsExist(self, row, site):
376        return self.getParent(row, site) is not None
377
378    def entryExists(self, row, site):
379        return self.getEntry(row, site) is not None
380
381    def getEntry(self, row, site):
382        studycourse = self.getParent(row, site)
383        if studycourse is None:
384            return None
385        return studycourse.get(row['level'])
386
387    def addEntry(self, obj, row, site):
388        parent = self.getParent(row, site)
389        obj.level = int(row['level'])
390        parent[row['level']] = obj
391        return
392
393    def checkConversion(self, row, mode='ignore'):
394        """Validates all values in row.
395        """
396        converter = IObjectConverter(self.iface)
397        errs, inv_errs, conv_dict =  converter.fromStringDict(
398            row, self.factory_name)
399        # We have to check if level is a valid integer.
[7548]400        # This is not done by the converter.
[7536]401        try:
402            level = int(row['level'])
[7612]403            if level not in range(0,700,10):
[7536]404                errs.append(('level','no valid integer'))
405        except ValueError:
406            errs.append(('level','no integer'))
407        return errs, inv_errs, conv_dict
[7548]408
409class CourseTicketProcessor(BatchProcessor):
410    """A batch processor for ICourseTicket objects.
411    """
412    grok.implements(IBatchProcessor)
413    grok.provides(IBatchProcessor)
414    grok.context(Interface)
415    util_name = 'courseticketimporter'
416    grok.name(util_name)
417
418    name = u'CourseTicket Importer'
419    iface = ICourseTicket
420    factory_name = 'waeup.CourseTicket'
421
422    location_fields = []
423
424    mode = None
425
426    @property
427    def available_fields(self):
428        return sorted(list(set(
429            ['student_id','reg_number','matric_number','level','code'] + getFields(
430                self.iface).keys())))
431
432    def checkHeaders(self, headerfields, mode='ignore'):
433        if not 'reg_number' in headerfields and not 'student_id' \
434            in headerfields and not 'matric_number' in headerfields:
435            raise FatalCSVError(
436                "Need at least columns student_id " +
437                "or reg_number or matric_number for import!")
438        if not 'level' in headerfields:
439            raise FatalCSVError(
440                "Need level for import!")
441        if not 'code' in headerfields:
442            raise FatalCSVError(
443                "Need code for import!")
444        # Check for fields to be ignored...
445        not_ignored_fields = [x for x in headerfields
446                              if not x.startswith('--')]
447        if len(set(not_ignored_fields)) < len(not_ignored_fields):
448            raise FatalCSVError(
449                "Double headers: each column name may only appear once.")
450        return True
451
452    def getParent(self, row, site):
453        if not 'students' in site.keys():
454            return None
455        if 'student_id' in row.keys() and row['student_id']:
456            if row['student_id'] in site['students']:
457                student = site['students'][row['student_id']]
458                return student['studycourse'].get(row['level'])
459        elif 'reg_number' in row.keys() and row['reg_number']:
460            reg_number = row['reg_number']
461            #import pdb; pdb.set_trace()
462            cat = queryUtility(ICatalog, name='students_catalog')
463            results = list(
464                cat.searchResults(reg_number=(reg_number, reg_number)))
465            if results:
466                return results[0]['studycourse'].get(row['level'])
467        elif 'matric_number' in row.keys() and row['matric_number']:
468            matric_number = row['matric_number']
469            cat = queryUtility(ICatalog, name='students_catalog')
470            results = list(
471                cat.searchResults(matric_number=(matric_number, matric_number)))
472            if results:
473                return results[0]['studycourse'].get(row['level'])
474        return None
475
476    def parentsExist(self, row, site):
477        return self.getParent(row, site) is not None
478
479    def entryExists(self, row, site):
480        return self.getEntry(row, site) is not None
481
482    def getEntry(self, row, site):
483        level = self.getParent(row, site)
484        if level is None:
485            return None
486        return level.get(row['code'])
487
488    def addEntry(self, obj, row, site):
489        parent = self.getParent(row, site)
490        catalog = getUtility(ICatalog, name='courses_catalog')
491        entries = list(catalog.searchResults(code=(row['code'],row['code'])))
492        obj.fcode = entries[0].__parent__.__parent__.__parent__.code
493        obj.dcode = entries[0].__parent__.__parent__.code
494        obj.title = entries[0].title
495        obj.credits = entries[0].credits
496        obj.passmark = entries[0].passmark
497        obj.semester = entries[0].semester
498        parent[row['code']] = obj
499        return
500
501    def checkConversion(self, row, mode='ignore'):
502        """Validates all values in row.
503        """
504        converter = IObjectConverter(self.iface)
505        errs, inv_errs, conv_dict =  converter.fromStringDict(
506            row, self.factory_name)
507        # We have to check if course really exists.
508        # This is not done by the converter.
509        catalog = getUtility(ICatalog, name='courses_catalog')
510        entries = catalog.searchResults(code=(row['code'],row['code']))
511        if len(entries) == 0:
512            errs.append(('code','non-existent'))
513            return errs, inv_errs, conv_dict
[7623]514        return errs, inv_errs, conv_dict
515
516class StudentOnlinePaymentProcessor(BatchProcessor):
517    """A batch processor for IStudentOnlinePayment objects.
518    """
519    grok.implements(IBatchProcessor)
520    grok.provides(IBatchProcessor)
521    grok.context(Interface)
522    util_name = 'paymentimporter'
523    grok.name(util_name)
524
525    name = u'Payment Importer'
526    iface = IStudentOnlinePayment
527    factory_name = 'waeup.StudentOnlinePayment'
528
529    location_fields = []
530
531    mode = None
532
533    @property
534    def available_fields(self):
535        return sorted(list(set(
536            ['student_id','reg_number','matric_number','p_id'] + getFields(
537                self.iface).keys())))
538
539    def checkHeaders(self, headerfields, mode='ignore'):
540        if not 'reg_number' in headerfields and not 'student_id' \
541            in headerfields and not 'matric_number' in headerfields:
542            raise FatalCSVError(
543                "Need at least columns student_id " +
544                "or reg_number or matric_number for import!")
545        if not 'p_id' in headerfields:
546            raise FatalCSVError(
547                "Need p_id for import!")
548        # Check for fields to be ignored...
549        not_ignored_fields = [x for x in headerfields
550                              if not x.startswith('--')]
551        if len(set(not_ignored_fields)) < len(not_ignored_fields):
552            raise FatalCSVError(
553                "Double headers: each column name may only appear once.")
554        return True
555
556    def getParent(self, row, site):
557        if not 'students' in site.keys():
558            return None
559        if 'student_id' in row.keys() and row['student_id']:
560            if row['student_id'] in site['students']:
561                student = site['students'][row['student_id']]
562                return student['payments']
563        elif 'reg_number' in row.keys() and row['reg_number']:
564            reg_number = row['reg_number']
565            cat = queryUtility(ICatalog, name='students_catalog')
566            results = list(
567                cat.searchResults(reg_number=(reg_number, reg_number)))
568            if results:
569                return results[0]['payments']
570        elif 'matric_number' in row.keys() and row['matric_number']:
571            matric_number = row['matric_number']
572            cat = queryUtility(ICatalog, name='students_catalog')
573            results = list(
574                cat.searchResults(matric_number=(matric_number, matric_number)))
575            if results:
576                return results[0]['payments']
577        return None
578
579    def parentsExist(self, row, site):
580        return self.getParent(row, site) is not None
581
582    def entryExists(self, row, site):
583        return self.getEntry(row, site) is not None
584
585    def getEntry(self, row, site):
586        payments = self.getParent(row, site)
587        if payments is None:
588            return None
[7626]589        # We can use the hash symbol at the end of p_id in import files
590        # to avoid annoying automatic number transformation
591        # by Excel or Calc
592        p_id = row['p_id'].strip('#')
593        if p_id.startswith('p'):
594            entry = payments.get(p_id)
[7623]595        else:
596            # For data migration from old SRP
[7626]597            entry = payments.get('p' + p_id[6:])
[7623]598        return entry
599
600    def addEntry(self, obj, row, site):
601        parent = self.getParent(row, site)
[7626]602        p_id = row['p_id'].strip('#')
603        if not p_id.startswith('p'):
[7623]604            # For data migration from old SRP
[7626]605            obj.p_id = 'p' + p_id[6:]
[7623]606            parent[obj.p_id] = obj
607        else:
[7626]608            parent[p_id] = obj
[7623]609        return
610
611    def checkConversion(self, row, mode='ignore'):
612        """Validates all values in row.
613        """
614        converter = IObjectConverter(self.iface)
615        errs, inv_errs, conv_dict =  converter.fromStringDict(
616            row, self.factory_name)
617        # We have to check p_id.
[7626]618        p_id = row['p_id'].strip('#')
619        if p_id.startswith('p'):
620            if not len(p_id) == 14:
[7623]621                errs.append(('p_id','invalid length'))
622                return errs, inv_errs, conv_dict
623        else:
[7626]624            if not len(p_id) == 19:
[7623]625                errs.append(('p_id','invalid length'))
626                return errs, inv_errs, conv_dict
627        return errs, inv_errs, conv_dict
Note: See TracBrowser for help on using the repository browser.