source: main/waeup.sirp/trunk/src/waeup/sirp/students/batching.py @ 7640

Last change on this file since 7640 was 7626, checked in by Henrik Bettermann, 13 years ago

We can use the hash symbol at the end of p_id in import files
to avoid annoying automatic number transformation
by Excel or Calc.

  • Property svn:keywords set to Id
File size: 23.5 KB
Line 
1## $Id: batching.py 7626 2012-02-10 20:26:34Z henrik $
2##
3## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""Batch processing components for student objects.
19
20Batch processors eat CSV files to add, update or remove large numbers
21of certain kinds of objects at once.
22
23Here we define the processors for students specific objects like
24students, studycourses, payment tickets and accommodation tickets.
25"""
26import grok
27import csv
28from zope.interface import Interface
29from zope.schema import getFields
30from zope.component import queryUtility, getUtility
31from zope.event import notify
32from zope.catalog.interfaces import ICatalog
33from hurry.workflow.interfaces import IWorkflowState
34from waeup.sirp.interfaces import (
35    IBatchProcessor, FatalCSVError, IObjectConverter, IUserAccount,
36    IObjectHistory)
37from waeup.sirp.students.interfaces import (
38    IStudent, IStudentStudyCourse,
39    IStudentUpdateByRegNo, IStudentUpdateByMatricNo,
40    IStudentStudyLevel, ICourseTicket,
41    IStudentOnlinePayment)
42from waeup.sirp.students.workflow import  IMPORTABLE_STATES
43from waeup.sirp.utils.batching import BatchProcessor
44from waeup.sirp.utils.helpers import get_current_principal
45
46class StudentProcessor(BatchProcessor):
47    """A batch processor for IStudent objects.
48    """
49    grok.implements(IBatchProcessor)
50    grok.provides(IBatchProcessor)
51    grok.context(Interface)
52    util_name = 'studentimporter'
53    grok.name(util_name)
54
55    name = u'Student Importer'
56    iface = IStudent
57
58    location_fields = []
59    factory_name = 'waeup.Student'
60
61    mode = None
62
63    @property
64    def available_fields(self):
65        return sorted(list(set(
66            ['student_id','reg_number','matric_number',
67            'password', 'reg_state'] + getFields(
68                self.iface).keys())))
69
70    def checkHeaders(self, headerfields, mode='create'):
71        if not 'reg_number' in headerfields and not 'student_id' \
72            in headerfields and not 'matric_number' in headerfields:
73            raise FatalCSVError(
74                "Need at least columns student_id or reg_number " +
75                "or matric_number for import!")
76        if mode == 'create':
77            for field in self.required_fields:
78                if not field in headerfields:
79                    raise FatalCSVError(
80                        "Need at least columns %s for import!" %
81                        ', '.join(["'%s'" % x for x in self.required_fields]))
82        # Check for fields to be ignored...
83        not_ignored_fields = [x for x in headerfields
84                              if not x.startswith('--')]
85        if len(set(not_ignored_fields)) < len(not_ignored_fields):
86            raise FatalCSVError(
87                "Double headers: each column name may only appear once.")
88        return True
89
90    def parentsExist(self, row, site):
91        return 'students' in site.keys()
92
93    def getLocator(self, row):
94        if row.get('student_id',None):
95            return 'student_id'
96        elif row.get('reg_number',None):
97            return 'reg_number'
98        elif row.get('matric_number',None):
99            return 'matric_number'
100        else:
101            return None
102
103    # The entry never exists in create mode.
104    def entryExists(self, row, site):
105        return self.getEntry(row, site) is not None
106
107    def getParent(self, row, site):
108        return site['students']
109
110    def getEntry(self, row, site):
111        if not 'students' in site.keys():
112            return None
113        if self.getLocator(row) == 'student_id':
114            if row['student_id'] in site['students']:
115                student = site['students'][row['student_id']]
116                return student
117        elif self.getLocator(row) == 'reg_number':
118            reg_number = row['reg_number']
119            cat = queryUtility(ICatalog, name='students_catalog')
120            results = list(
121                cat.searchResults(reg_number=(reg_number, reg_number)))
122            if results:
123                return results[0]
124        elif self.getLocator(row) == 'matric_number':
125            matric_number = row['matric_number']
126            cat = queryUtility(ICatalog, name='students_catalog')
127            results = list(
128                cat.searchResults(matric_number=(matric_number, matric_number)))
129            if results:
130                return results[0]
131        return None
132
133       
134    def addEntry(self, obj, row, site):
135        parent = self.getParent(row, site)
136        parent.addStudent(obj)
137        # In some tests we don't have a students container or a user
138        try:
139            user = get_current_principal()
140            parent.logger.info('%s - %s - Student record imported' % (
141                user.id,obj.student_id))
142            history = IObjectHistory(obj)
143            history.addMessage('Student record imported')
144        except (TypeError, AttributeError):
145            pass
146        return
147
148    def delEntry(self, row, site):
149        student = self.getEntry(row, site)
150        if student is not None:
151            parent = self.getParent(row, site)
152            del parent[student.student_id]
153        pass
154
155    def updateEntry(self, obj, row, site):
156        """Update obj to the values given in row.
157        """
158        for key, value in row.items():
159            # Set student password and all fields declared in interface.
160            if key == 'password' and value != '':
161                IUserAccount(obj).setPassword(value)
162            elif key == 'reg_state':
163                IWorkflowState(obj).setState(value)
164                msg = "State '%s' set" % value
165                history = IObjectHistory(obj)
166                history.addMessage(msg)
167            elif hasattr(obj, key):
168                setattr(obj, key, value)
169        return
170
171    def getMapping(self, path, headerfields, mode):
172        """Get a mapping from CSV file headerfields to actually used fieldnames.
173        """
174        result = dict()
175        reader = csv.reader(open(path, 'rb'))
176        raw_header = reader.next()
177        for num, field in enumerate(headerfields):
178            if field not in [
179                'student_id', 'reg_number', 'matric_number'] and mode == 'remove':
180                continue
181            if field == u'--IGNORE--':
182                # Skip ignored columns in failed and finished data files.
183                continue
184            result[raw_header[num]] = field
185        return result
186
187    def checkConversion(self, row, mode='create'):
188        """Validates all values in row.
189        """
190        if mode in ['update', 'remove']:
191            if self.getLocator(row) == 'reg_number':
192                iface = IStudentUpdateByRegNo
193            elif self.getLocator(row) == 'matric_number':
194                iface = IStudentUpdateByMatricNo
195        else:
196            iface = self.iface
197        converter = IObjectConverter(iface)
198        errs, inv_errs, conv_dict =  converter.fromStringDict(
199            row, self.factory_name)
200        if row.has_key('reg_state') and \
201            not row['reg_state'] in IMPORTABLE_STATES:
202            if row['reg_state'] != '':
203                errs.append(('reg_state','not allowed'))
204            else:
205                errs.append(('reg_state','no value provided'))
206        return errs, inv_errs, conv_dict
207
208class StudentStudyCourseProcessor(BatchProcessor):
209    """A batch processor for IStudentStudyCourse objects.
210    """
211    grok.implements(IBatchProcessor)
212    grok.provides(IBatchProcessor)
213    grok.context(Interface)
214    util_name = 'studycourseupdater'
215    grok.name(util_name)
216
217    name = u'StudentStudyCourse Importer (update only)'
218    iface = IStudentStudyCourse
219    factory_name = 'waeup.StudentStudyCourse'
220
221    location_fields = []
222
223    mode = None
224
225    @property
226    def available_fields(self):
227        return sorted(list(set(
228            ['student_id','reg_number','matric_number'] + getFields(
229                self.iface).keys())))
230
231    def checkHeaders(self, headerfields, mode='ignore'):
232        if not 'reg_number' in headerfields and not 'student_id' \
233            in headerfields and not 'matric_number' in headerfields:
234            raise FatalCSVError(
235                "Need at least columns student_id " +
236                "or reg_number or matric_number for import!")
237        # Check for fields to be ignored...
238        not_ignored_fields = [x for x in headerfields
239                              if not x.startswith('--')]
240        if len(set(not_ignored_fields)) < len(not_ignored_fields):
241            raise FatalCSVError(
242                "Double headers: each column name may only appear once.")
243        return True
244
245    def getParent(self, row, site):
246        if not 'students' in site.keys():
247            return None
248        if 'student_id' in row.keys() and row['student_id']:
249            if row['student_id'] in site['students']:
250                student = site['students'][row['student_id']]
251                return student
252        elif 'reg_number' in row.keys() and row['reg_number']:
253            reg_number = row['reg_number']
254            cat = queryUtility(ICatalog, name='students_catalog')
255            results = list(
256                cat.searchResults(reg_number=(reg_number, reg_number)))
257            if results:
258                return results[0]
259        elif 'matric_number' in row.keys() and row['matric_number']:
260            matric_number = row['matric_number']
261            cat = queryUtility(ICatalog, name='students_catalog')
262            results = list(
263                cat.searchResults(matric_number=(matric_number, matric_number)))
264            if results:
265                return results[0]
266        return None
267
268    def parentsExist(self, row, site):
269        return self.getParent(row, site) is not None
270
271    def entryExists(self, row, site):
272        return self.getEntry(row, site) is not None
273
274    def getEntry(self, row, site):
275        student = self.getParent(row, site)
276        if student is None:
277            return None
278        return student.get('studycourse')
279
280    def updateEntry(self, obj, row, site):
281        """Update obj to the values given in row.
282        """
283        for key, value in row.items():
284            # Skip fields not declared in interface.
285            if hasattr(obj, key):
286                setattr(obj, key, value)
287        # Update the students_catalog
288        notify(grok.ObjectModifiedEvent(obj.__parent__))
289        return
290
291    def checkConversion(self, row, mode='ignore'):
292        """Validates all values in row.
293        """
294        converter = IObjectConverter(self.iface)
295        errs, inv_errs, conv_dict =  converter.fromStringDict(
296            row, self.factory_name)
297        # We have to check if current_level is in range of certificate.
298        # This is not done by the converter. This kind of conversion
299        # checking does only work if a combination of certificate and
300        # current_level is provided.
301        if conv_dict.has_key('certificate'):
302          certificate = conv_dict['certificate']
303          start_level = certificate.start_level
304          end_level = certificate.end_level
305          if conv_dict['current_level'] < start_level or \
306              conv_dict['current_level'] > end_level+120:
307              errs.append(('current_level','not in range'))
308        return errs, inv_errs, conv_dict
309
310class StudentStudyLevelProcessor(BatchProcessor):
311    """A batch processor for IStudentStudyLevel objects.
312    """
313    grok.implements(IBatchProcessor)
314    grok.provides(IBatchProcessor)
315    grok.context(Interface)
316    util_name = 'studylevelimporter'
317    grok.name(util_name)
318
319    name = u'StudentStudyLevel Importer'
320    iface = IStudentStudyLevel
321    factory_name = 'waeup.StudentStudyLevel'
322
323    location_fields = []
324
325    mode = None
326
327    @property
328    def available_fields(self):
329        return sorted(list(set(
330            ['student_id','reg_number','matric_number','level'] + getFields(
331                self.iface).keys())))
332
333    def checkHeaders(self, headerfields, mode='ignore'):
334        if not 'reg_number' in headerfields and not 'student_id' \
335            in headerfields and not 'matric_number' in headerfields:
336            raise FatalCSVError(
337                "Need at least columns student_id " +
338                "or reg_number or matric_number for import!")
339        if not 'level' in headerfields:
340            raise FatalCSVError(
341                "Need level for import!")
342        # Check for fields to be ignored...
343        not_ignored_fields = [x for x in headerfields
344                              if not x.startswith('--')]
345        if len(set(not_ignored_fields)) < len(not_ignored_fields):
346            raise FatalCSVError(
347                "Double headers: each column name may only appear once.")
348        return True
349
350    def getParent(self, row, site):
351        if not 'students' in site.keys():
352            return None
353        if 'student_id' in row.keys() and row['student_id']:
354            if row['student_id'] in site['students']:
355                student = site['students'][row['student_id']]
356                return student['studycourse']
357        elif 'reg_number' in row.keys() and row['reg_number']:
358            reg_number = row['reg_number']
359            cat = queryUtility(ICatalog, name='students_catalog')
360            results = list(
361                cat.searchResults(reg_number=(reg_number, reg_number)))
362            if results:
363                return results[0]['studycourse']
364        elif 'matric_number' in row.keys() and row['matric_number']:
365            matric_number = row['matric_number']
366            cat = queryUtility(ICatalog, name='students_catalog')
367            results = list(
368                cat.searchResults(matric_number=(matric_number, matric_number)))
369            if results:
370                return results[0]['studycourse']
371        return None
372
373    def parentsExist(self, row, site):
374        return self.getParent(row, site) is not None
375
376    def entryExists(self, row, site):
377        return self.getEntry(row, site) is not None
378
379    def getEntry(self, row, site):
380        studycourse = self.getParent(row, site)
381        if studycourse is None:
382            return None
383        return studycourse.get(row['level'])
384
385    def addEntry(self, obj, row, site):
386        parent = self.getParent(row, site)
387        obj.level = int(row['level'])
388        parent[row['level']] = obj
389        return
390
391    def checkConversion(self, row, mode='ignore'):
392        """Validates all values in row.
393        """
394        converter = IObjectConverter(self.iface)
395        errs, inv_errs, conv_dict =  converter.fromStringDict(
396            row, self.factory_name)
397        # We have to check if level is a valid integer.
398        # This is not done by the converter.
399        try:
400            level = int(row['level'])
401            if level not in range(0,700,10):
402                errs.append(('level','no valid integer'))
403        except ValueError:
404            errs.append(('level','no integer'))
405        return errs, inv_errs, conv_dict
406
407class CourseTicketProcessor(BatchProcessor):
408    """A batch processor for ICourseTicket objects.
409    """
410    grok.implements(IBatchProcessor)
411    grok.provides(IBatchProcessor)
412    grok.context(Interface)
413    util_name = 'courseticketimporter'
414    grok.name(util_name)
415
416    name = u'CourseTicket Importer'
417    iface = ICourseTicket
418    factory_name = 'waeup.CourseTicket'
419
420    location_fields = []
421
422    mode = None
423
424    @property
425    def available_fields(self):
426        return sorted(list(set(
427            ['student_id','reg_number','matric_number','level','code'] + getFields(
428                self.iface).keys())))
429
430    def checkHeaders(self, headerfields, mode='ignore'):
431        if not 'reg_number' in headerfields and not 'student_id' \
432            in headerfields and not 'matric_number' in headerfields:
433            raise FatalCSVError(
434                "Need at least columns student_id " +
435                "or reg_number or matric_number for import!")
436        if not 'level' in headerfields:
437            raise FatalCSVError(
438                "Need level for import!")
439        if not 'code' in headerfields:
440            raise FatalCSVError(
441                "Need code for import!")
442        # Check for fields to be ignored...
443        not_ignored_fields = [x for x in headerfields
444                              if not x.startswith('--')]
445        if len(set(not_ignored_fields)) < len(not_ignored_fields):
446            raise FatalCSVError(
447                "Double headers: each column name may only appear once.")
448        return True
449
450    def getParent(self, row, site):
451        if not 'students' in site.keys():
452            return None
453        if 'student_id' in row.keys() and row['student_id']:
454            if row['student_id'] in site['students']:
455                student = site['students'][row['student_id']]
456                return student['studycourse'].get(row['level'])
457        elif 'reg_number' in row.keys() and row['reg_number']:
458            reg_number = row['reg_number']
459            #import pdb; pdb.set_trace()
460            cat = queryUtility(ICatalog, name='students_catalog')
461            results = list(
462                cat.searchResults(reg_number=(reg_number, reg_number)))
463            if results:
464                return results[0]['studycourse'].get(row['level'])
465        elif 'matric_number' in row.keys() and row['matric_number']:
466            matric_number = row['matric_number']
467            cat = queryUtility(ICatalog, name='students_catalog')
468            results = list(
469                cat.searchResults(matric_number=(matric_number, matric_number)))
470            if results:
471                return results[0]['studycourse'].get(row['level'])
472        return None
473
474    def parentsExist(self, row, site):
475        return self.getParent(row, site) is not None
476
477    def entryExists(self, row, site):
478        return self.getEntry(row, site) is not None
479
480    def getEntry(self, row, site):
481        level = self.getParent(row, site)
482        if level is None:
483            return None
484        return level.get(row['code'])
485
486    def addEntry(self, obj, row, site):
487        parent = self.getParent(row, site)
488        catalog = getUtility(ICatalog, name='courses_catalog')
489        entries = list(catalog.searchResults(code=(row['code'],row['code'])))
490        obj.fcode = entries[0].__parent__.__parent__.__parent__.code
491        obj.dcode = entries[0].__parent__.__parent__.code
492        obj.title = entries[0].title
493        obj.credits = entries[0].credits
494        obj.passmark = entries[0].passmark
495        obj.semester = entries[0].semester
496        parent[row['code']] = obj
497        return
498
499    def checkConversion(self, row, mode='ignore'):
500        """Validates all values in row.
501        """
502        converter = IObjectConverter(self.iface)
503        errs, inv_errs, conv_dict =  converter.fromStringDict(
504            row, self.factory_name)
505        # We have to check if course really exists.
506        # This is not done by the converter.
507        catalog = getUtility(ICatalog, name='courses_catalog')
508        entries = catalog.searchResults(code=(row['code'],row['code']))
509        if len(entries) == 0:
510            errs.append(('code','non-existent'))
511            return errs, inv_errs, conv_dict
512        return errs, inv_errs, conv_dict
513
514class StudentOnlinePaymentProcessor(BatchProcessor):
515    """A batch processor for IStudentOnlinePayment objects.
516    """
517    grok.implements(IBatchProcessor)
518    grok.provides(IBatchProcessor)
519    grok.context(Interface)
520    util_name = 'paymentimporter'
521    grok.name(util_name)
522
523    name = u'Payment Importer'
524    iface = IStudentOnlinePayment
525    factory_name = 'waeup.StudentOnlinePayment'
526
527    location_fields = []
528
529    mode = None
530
531    @property
532    def available_fields(self):
533        return sorted(list(set(
534            ['student_id','reg_number','matric_number','p_id'] + getFields(
535                self.iface).keys())))
536
537    def checkHeaders(self, headerfields, mode='ignore'):
538        if not 'reg_number' in headerfields and not 'student_id' \
539            in headerfields and not 'matric_number' in headerfields:
540            raise FatalCSVError(
541                "Need at least columns student_id " +
542                "or reg_number or matric_number for import!")
543        if not 'p_id' in headerfields:
544            raise FatalCSVError(
545                "Need p_id for import!")
546        # Check for fields to be ignored...
547        not_ignored_fields = [x for x in headerfields
548                              if not x.startswith('--')]
549        if len(set(not_ignored_fields)) < len(not_ignored_fields):
550            raise FatalCSVError(
551                "Double headers: each column name may only appear once.")
552        return True
553
554    def getParent(self, row, site):
555        if not 'students' in site.keys():
556            return None
557        if 'student_id' in row.keys() and row['student_id']:
558            if row['student_id'] in site['students']:
559                student = site['students'][row['student_id']]
560                return student['payments']
561        elif 'reg_number' in row.keys() and row['reg_number']:
562            reg_number = row['reg_number']
563            cat = queryUtility(ICatalog, name='students_catalog')
564            results = list(
565                cat.searchResults(reg_number=(reg_number, reg_number)))
566            if results:
567                return results[0]['payments']
568        elif 'matric_number' in row.keys() and row['matric_number']:
569            matric_number = row['matric_number']
570            cat = queryUtility(ICatalog, name='students_catalog')
571            results = list(
572                cat.searchResults(matric_number=(matric_number, matric_number)))
573            if results:
574                return results[0]['payments']
575        return None
576
577    def parentsExist(self, row, site):
578        return self.getParent(row, site) is not None
579
580    def entryExists(self, row, site):
581        return self.getEntry(row, site) is not None
582
583    def getEntry(self, row, site):
584        payments = self.getParent(row, site)
585        if payments is None:
586            return None
587        # We can use the hash symbol at the end of p_id in import files
588        # to avoid annoying automatic number transformation
589        # by Excel or Calc
590        p_id = row['p_id'].strip('#')
591        if p_id.startswith('p'):
592            entry = payments.get(p_id)
593        else:
594            # For data migration from old SRP
595            entry = payments.get('p' + p_id[6:])
596        return entry
597
598    def addEntry(self, obj, row, site):
599        parent = self.getParent(row, site)
600        p_id = row['p_id'].strip('#')
601        if not p_id.startswith('p'):
602            # For data migration from old SRP
603            obj.p_id = 'p' + p_id[6:]
604            parent[obj.p_id] = obj
605        else:
606            parent[p_id] = obj
607        return
608
609    def checkConversion(self, row, mode='ignore'):
610        """Validates all values in row.
611        """
612        converter = IObjectConverter(self.iface)
613        errs, inv_errs, conv_dict =  converter.fromStringDict(
614            row, self.factory_name)
615        # We have to check p_id.
616        p_id = row['p_id'].strip('#')
617        if p_id.startswith('p'):
618            if not len(p_id) == 14:
619                errs.append(('p_id','invalid length'))
620                return errs, inv_errs, conv_dict
621        else:
622            if not len(p_id) == 19:
623                errs.append(('p_id','invalid length'))
624                return errs, inv_errs, conv_dict
625        return errs, inv_errs, conv_dict
Note: See TracBrowser for help on using the repository browser.