source: main/waeup.sirp/trunk/src/waeup/sirp/students/batching.py @ 7643

Last change on this file since 7643 was 7643, checked in by Henrik Bettermann, 13 years ago

Enable mixed imports of rows with or without student_id.

  • Property svn:keywords set to Id
File size: 23.6 KB
Line 
1## $Id: batching.py 7643 2012-02-14 09:32:28Z henrik $
2##
3## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""Batch processing components for student objects.
19
20Batch processors eat CSV files to add, update or remove large numbers
21of certain kinds of objects at once.
22
23Here we define the processors for students specific objects like
24students, studycourses, payment tickets and accommodation tickets.
25"""
26import grok
27import csv
28from zope.interface import Interface
29from zope.schema import getFields
30from zope.component import queryUtility, getUtility
31from zope.event import notify
32from zope.catalog.interfaces import ICatalog
33from hurry.workflow.interfaces import IWorkflowState
34from waeup.sirp.interfaces import (
35    IBatchProcessor, FatalCSVError, IObjectConverter, IUserAccount,
36    IObjectHistory)
37from waeup.sirp.students.interfaces import (
38    IStudent, IStudentStudyCourse,
39    IStudentUpdateByRegNo, IStudentUpdateByMatricNo,
40    IStudentStudyLevel, ICourseTicket,
41    IStudentOnlinePayment)
42from waeup.sirp.students.workflow import  IMPORTABLE_STATES
43from waeup.sirp.utils.batching import BatchProcessor
44from waeup.sirp.utils.helpers import get_current_principal
45
46class StudentProcessor(BatchProcessor):
47    """A batch processor for IStudent objects.
48    """
49    grok.implements(IBatchProcessor)
50    grok.provides(IBatchProcessor)
51    grok.context(Interface)
52    util_name = 'studentimporter'
53    grok.name(util_name)
54
55    name = u'Student Importer'
56    iface = IStudent
57
58    location_fields = []
59    factory_name = 'waeup.Student'
60
61    mode = None
62
63    @property
64    def available_fields(self):
65        return sorted(list(set(
66            ['student_id','reg_number','matric_number',
67            'password', 'reg_state'] + getFields(
68                self.iface).keys())))
69
70    def checkHeaders(self, headerfields, mode='create'):
71        if not 'reg_number' in headerfields and not 'student_id' \
72            in headerfields and not 'matric_number' in headerfields:
73            raise FatalCSVError(
74                "Need at least columns student_id or reg_number " +
75                "or matric_number for import!")
76        if mode == 'create':
77            for field in self.required_fields:
78                if not field in headerfields:
79                    raise FatalCSVError(
80                        "Need at least columns %s for import!" %
81                        ', '.join(["'%s'" % x for x in self.required_fields]))
82        # Check for fields to be ignored...
83        not_ignored_fields = [x for x in headerfields
84                              if not x.startswith('--')]
85        if len(set(not_ignored_fields)) < len(not_ignored_fields):
86            raise FatalCSVError(
87                "Double headers: each column name may only appear once.")
88        return True
89
90    def parentsExist(self, row, site):
91        return 'students' in site.keys()
92
93    def getLocator(self, row):
94        if row.get('student_id',None):
95            return 'student_id'
96        elif row.get('reg_number',None):
97            return 'reg_number'
98        elif row.get('matric_number',None):
99            return 'matric_number'
100        else:
101            return None
102
103    # The entry never exists in create mode.
104    def entryExists(self, row, site):
105        return self.getEntry(row, site) is not None
106
107    def getParent(self, row, site):
108        return site['students']
109
110    def getEntry(self, row, site):
111        if not 'students' in site.keys():
112            return None
113        if self.getLocator(row) == 'student_id':
114            if row['student_id'] in site['students']:
115                student = site['students'][row['student_id']]
116                return student
117        elif self.getLocator(row) == 'reg_number':
118            reg_number = row['reg_number']
119            cat = queryUtility(ICatalog, name='students_catalog')
120            results = list(
121                cat.searchResults(reg_number=(reg_number, reg_number)))
122            if results:
123                return results[0]
124        elif self.getLocator(row) == 'matric_number':
125            matric_number = row['matric_number']
126            cat = queryUtility(ICatalog, name='students_catalog')
127            results = list(
128                cat.searchResults(matric_number=(matric_number, matric_number)))
129            if results:
130                return results[0]
131        return None
132
133       
134    def addEntry(self, obj, row, site):
135        parent = self.getParent(row, site)
136        parent.addStudent(obj)
137        # In some tests we don't have a students container or a user
138        try:
139            user = get_current_principal()
140            parent.logger.info('%s - %s - Student record imported' % (
141                user.id,obj.student_id))
142            history = IObjectHistory(obj)
143            history.addMessage('Student record imported')
144        except (TypeError, AttributeError):
145            pass
146        return
147
148    def delEntry(self, row, site):
149        student = self.getEntry(row, site)
150        if student is not None:
151            parent = self.getParent(row, site)
152            del parent[student.student_id]
153        pass
154
155    def updateEntry(self, obj, row, site):
156        """Update obj to the values given in row.
157        """
158        # Remove student_id from row if empty
159        if row.has_key('student_id') and row['student_id'] is None:
160            row.pop('student_id')
161        for key, value in row.items():
162            # Set student password and all fields declared in interface.
163            if key == 'password' and value != '':
164                IUserAccount(obj).setPassword(value)
165            elif key == 'reg_state':
166                IWorkflowState(obj).setState(value)
167                msg = "State '%s' set" % value
168                history = IObjectHistory(obj)
169                history.addMessage(msg)
170            elif hasattr(obj, key):
171                setattr(obj, key, value)
172        return
173
174    def getMapping(self, path, headerfields, mode):
175        """Get a mapping from CSV file headerfields to actually used fieldnames.
176        """
177        result = dict()
178        reader = csv.reader(open(path, 'rb'))
179        raw_header = reader.next()
180        for num, field in enumerate(headerfields):
181            if field not in [
182                'student_id', 'reg_number', 'matric_number'] and mode == 'remove':
183                continue
184            if field == u'--IGNORE--':
185                # Skip ignored columns in failed and finished data files.
186                continue
187            result[raw_header[num]] = field
188        return result
189
190    def checkConversion(self, row, mode='create'):
191        """Validates all values in row.
192        """
193        iface = self.iface
194        if mode in ['update', 'remove']:
195            if self.getLocator(row) == 'reg_number':
196                iface = IStudentUpdateByRegNo
197            elif self.getLocator(row) == 'matric_number':
198                iface = IStudentUpdateByMatricNo
199        converter = IObjectConverter(iface)
200        errs, inv_errs, conv_dict =  converter.fromStringDict(
201            row, self.factory_name)
202        if row.has_key('reg_state') and \
203            not row['reg_state'] in IMPORTABLE_STATES:
204            if row['reg_state'] != '':
205                errs.append(('reg_state','not allowed'))
206            else:
207                errs.append(('reg_state','no value provided'))
208        return errs, inv_errs, conv_dict
209
210class StudentStudyCourseProcessor(BatchProcessor):
211    """A batch processor for IStudentStudyCourse objects.
212    """
213    grok.implements(IBatchProcessor)
214    grok.provides(IBatchProcessor)
215    grok.context(Interface)
216    util_name = 'studycourseupdater'
217    grok.name(util_name)
218
219    name = u'StudentStudyCourse Importer (update only)'
220    iface = IStudentStudyCourse
221    factory_name = 'waeup.StudentStudyCourse'
222
223    location_fields = []
224
225    mode = None
226
227    @property
228    def available_fields(self):
229        return sorted(list(set(
230            ['student_id','reg_number','matric_number'] + getFields(
231                self.iface).keys())))
232
233    def checkHeaders(self, headerfields, mode='ignore'):
234        if not 'reg_number' in headerfields and not 'student_id' \
235            in headerfields and not 'matric_number' in headerfields:
236            raise FatalCSVError(
237                "Need at least columns student_id " +
238                "or reg_number or matric_number for import!")
239        # Check for fields to be ignored...
240        not_ignored_fields = [x for x in headerfields
241                              if not x.startswith('--')]
242        if len(set(not_ignored_fields)) < len(not_ignored_fields):
243            raise FatalCSVError(
244                "Double headers: each column name may only appear once.")
245        return True
246
247    def getParent(self, row, site):
248        if not 'students' in site.keys():
249            return None
250        if 'student_id' in row.keys() and row['student_id']:
251            if row['student_id'] in site['students']:
252                student = site['students'][row['student_id']]
253                return student
254        elif 'reg_number' in row.keys() and row['reg_number']:
255            reg_number = row['reg_number']
256            cat = queryUtility(ICatalog, name='students_catalog')
257            results = list(
258                cat.searchResults(reg_number=(reg_number, reg_number)))
259            if results:
260                return results[0]
261        elif 'matric_number' in row.keys() and row['matric_number']:
262            matric_number = row['matric_number']
263            cat = queryUtility(ICatalog, name='students_catalog')
264            results = list(
265                cat.searchResults(matric_number=(matric_number, matric_number)))
266            if results:
267                return results[0]
268        return None
269
270    def parentsExist(self, row, site):
271        return self.getParent(row, site) is not None
272
273    def entryExists(self, row, site):
274        return self.getEntry(row, site) is not None
275
276    def getEntry(self, row, site):
277        student = self.getParent(row, site)
278        if student is None:
279            return None
280        return student.get('studycourse')
281
282    def updateEntry(self, obj, row, site):
283        """Update obj to the values given in row.
284        """
285        for key, value in row.items():
286            # Skip fields not declared in interface.
287            if hasattr(obj, key):
288                setattr(obj, key, value)
289        # Update the students_catalog
290        notify(grok.ObjectModifiedEvent(obj.__parent__))
291        return
292
293    def checkConversion(self, row, mode='ignore'):
294        """Validates all values in row.
295        """
296        converter = IObjectConverter(self.iface)
297        errs, inv_errs, conv_dict =  converter.fromStringDict(
298            row, self.factory_name)
299        # We have to check if current_level is in range of certificate.
300        # This is not done by the converter. This kind of conversion
301        # checking does only work if a combination of certificate and
302        # current_level is provided.
303        if conv_dict.has_key('certificate'):
304          certificate = conv_dict['certificate']
305          start_level = certificate.start_level
306          end_level = certificate.end_level
307          if conv_dict['current_level'] < start_level or \
308              conv_dict['current_level'] > end_level+120:
309              errs.append(('current_level','not in range'))
310        return errs, inv_errs, conv_dict
311
312class StudentStudyLevelProcessor(BatchProcessor):
313    """A batch processor for IStudentStudyLevel objects.
314    """
315    grok.implements(IBatchProcessor)
316    grok.provides(IBatchProcessor)
317    grok.context(Interface)
318    util_name = 'studylevelimporter'
319    grok.name(util_name)
320
321    name = u'StudentStudyLevel Importer'
322    iface = IStudentStudyLevel
323    factory_name = 'waeup.StudentStudyLevel'
324
325    location_fields = []
326
327    mode = None
328
329    @property
330    def available_fields(self):
331        return sorted(list(set(
332            ['student_id','reg_number','matric_number','level'] + getFields(
333                self.iface).keys())))
334
335    def checkHeaders(self, headerfields, mode='ignore'):
336        if not 'reg_number' in headerfields and not 'student_id' \
337            in headerfields and not 'matric_number' in headerfields:
338            raise FatalCSVError(
339                "Need at least columns student_id " +
340                "or reg_number or matric_number for import!")
341        if not 'level' in headerfields:
342            raise FatalCSVError(
343                "Need level for import!")
344        # Check for fields to be ignored...
345        not_ignored_fields = [x for x in headerfields
346                              if not x.startswith('--')]
347        if len(set(not_ignored_fields)) < len(not_ignored_fields):
348            raise FatalCSVError(
349                "Double headers: each column name may only appear once.")
350        return True
351
352    def getParent(self, row, site):
353        if not 'students' in site.keys():
354            return None
355        if 'student_id' in row.keys() and row['student_id']:
356            if row['student_id'] in site['students']:
357                student = site['students'][row['student_id']]
358                return student['studycourse']
359        elif 'reg_number' in row.keys() and row['reg_number']:
360            reg_number = row['reg_number']
361            cat = queryUtility(ICatalog, name='students_catalog')
362            results = list(
363                cat.searchResults(reg_number=(reg_number, reg_number)))
364            if results:
365                return results[0]['studycourse']
366        elif 'matric_number' in row.keys() and row['matric_number']:
367            matric_number = row['matric_number']
368            cat = queryUtility(ICatalog, name='students_catalog')
369            results = list(
370                cat.searchResults(matric_number=(matric_number, matric_number)))
371            if results:
372                return results[0]['studycourse']
373        return None
374
375    def parentsExist(self, row, site):
376        return self.getParent(row, site) is not None
377
378    def entryExists(self, row, site):
379        return self.getEntry(row, site) is not None
380
381    def getEntry(self, row, site):
382        studycourse = self.getParent(row, site)
383        if studycourse is None:
384            return None
385        return studycourse.get(row['level'])
386
387    def addEntry(self, obj, row, site):
388        parent = self.getParent(row, site)
389        obj.level = int(row['level'])
390        parent[row['level']] = obj
391        return
392
393    def checkConversion(self, row, mode='ignore'):
394        """Validates all values in row.
395        """
396        converter = IObjectConverter(self.iface)
397        errs, inv_errs, conv_dict =  converter.fromStringDict(
398            row, self.factory_name)
399        # We have to check if level is a valid integer.
400        # This is not done by the converter.
401        try:
402            level = int(row['level'])
403            if level not in range(0,700,10):
404                errs.append(('level','no valid integer'))
405        except ValueError:
406            errs.append(('level','no integer'))
407        return errs, inv_errs, conv_dict
408
409class CourseTicketProcessor(BatchProcessor):
410    """A batch processor for ICourseTicket objects.
411    """
412    grok.implements(IBatchProcessor)
413    grok.provides(IBatchProcessor)
414    grok.context(Interface)
415    util_name = 'courseticketimporter'
416    grok.name(util_name)
417
418    name = u'CourseTicket Importer'
419    iface = ICourseTicket
420    factory_name = 'waeup.CourseTicket'
421
422    location_fields = []
423
424    mode = None
425
426    @property
427    def available_fields(self):
428        return sorted(list(set(
429            ['student_id','reg_number','matric_number','level','code'] + getFields(
430                self.iface).keys())))
431
432    def checkHeaders(self, headerfields, mode='ignore'):
433        if not 'reg_number' in headerfields and not 'student_id' \
434            in headerfields and not 'matric_number' in headerfields:
435            raise FatalCSVError(
436                "Need at least columns student_id " +
437                "or reg_number or matric_number for import!")
438        if not 'level' in headerfields:
439            raise FatalCSVError(
440                "Need level for import!")
441        if not 'code' in headerfields:
442            raise FatalCSVError(
443                "Need code for import!")
444        # Check for fields to be ignored...
445        not_ignored_fields = [x for x in headerfields
446                              if not x.startswith('--')]
447        if len(set(not_ignored_fields)) < len(not_ignored_fields):
448            raise FatalCSVError(
449                "Double headers: each column name may only appear once.")
450        return True
451
452    def getParent(self, row, site):
453        if not 'students' in site.keys():
454            return None
455        if 'student_id' in row.keys() and row['student_id']:
456            if row['student_id'] in site['students']:
457                student = site['students'][row['student_id']]
458                return student['studycourse'].get(row['level'])
459        elif 'reg_number' in row.keys() and row['reg_number']:
460            reg_number = row['reg_number']
461            #import pdb; pdb.set_trace()
462            cat = queryUtility(ICatalog, name='students_catalog')
463            results = list(
464                cat.searchResults(reg_number=(reg_number, reg_number)))
465            if results:
466                return results[0]['studycourse'].get(row['level'])
467        elif 'matric_number' in row.keys() and row['matric_number']:
468            matric_number = row['matric_number']
469            cat = queryUtility(ICatalog, name='students_catalog')
470            results = list(
471                cat.searchResults(matric_number=(matric_number, matric_number)))
472            if results:
473                return results[0]['studycourse'].get(row['level'])
474        return None
475
476    def parentsExist(self, row, site):
477        return self.getParent(row, site) is not None
478
479    def entryExists(self, row, site):
480        return self.getEntry(row, site) is not None
481
482    def getEntry(self, row, site):
483        level = self.getParent(row, site)
484        if level is None:
485            return None
486        return level.get(row['code'])
487
488    def addEntry(self, obj, row, site):
489        parent = self.getParent(row, site)
490        catalog = getUtility(ICatalog, name='courses_catalog')
491        entries = list(catalog.searchResults(code=(row['code'],row['code'])))
492        obj.fcode = entries[0].__parent__.__parent__.__parent__.code
493        obj.dcode = entries[0].__parent__.__parent__.code
494        obj.title = entries[0].title
495        obj.credits = entries[0].credits
496        obj.passmark = entries[0].passmark
497        obj.semester = entries[0].semester
498        parent[row['code']] = obj
499        return
500
501    def checkConversion(self, row, mode='ignore'):
502        """Validates all values in row.
503        """
504        converter = IObjectConverter(self.iface)
505        errs, inv_errs, conv_dict =  converter.fromStringDict(
506            row, self.factory_name)
507        # We have to check if course really exists.
508        # This is not done by the converter.
509        catalog = getUtility(ICatalog, name='courses_catalog')
510        entries = catalog.searchResults(code=(row['code'],row['code']))
511        if len(entries) == 0:
512            errs.append(('code','non-existent'))
513            return errs, inv_errs, conv_dict
514        return errs, inv_errs, conv_dict
515
516class StudentOnlinePaymentProcessor(BatchProcessor):
517    """A batch processor for IStudentOnlinePayment objects.
518    """
519    grok.implements(IBatchProcessor)
520    grok.provides(IBatchProcessor)
521    grok.context(Interface)
522    util_name = 'paymentimporter'
523    grok.name(util_name)
524
525    name = u'Payment Importer'
526    iface = IStudentOnlinePayment
527    factory_name = 'waeup.StudentOnlinePayment'
528
529    location_fields = []
530
531    mode = None
532
533    @property
534    def available_fields(self):
535        return sorted(list(set(
536            ['student_id','reg_number','matric_number','p_id'] + getFields(
537                self.iface).keys())))
538
539    def checkHeaders(self, headerfields, mode='ignore'):
540        if not 'reg_number' in headerfields and not 'student_id' \
541            in headerfields and not 'matric_number' in headerfields:
542            raise FatalCSVError(
543                "Need at least columns student_id " +
544                "or reg_number or matric_number for import!")
545        if not 'p_id' in headerfields:
546            raise FatalCSVError(
547                "Need p_id for import!")
548        # Check for fields to be ignored...
549        not_ignored_fields = [x for x in headerfields
550                              if not x.startswith('--')]
551        if len(set(not_ignored_fields)) < len(not_ignored_fields):
552            raise FatalCSVError(
553                "Double headers: each column name may only appear once.")
554        return True
555
556    def getParent(self, row, site):
557        if not 'students' in site.keys():
558            return None
559        if 'student_id' in row.keys() and row['student_id']:
560            if row['student_id'] in site['students']:
561                student = site['students'][row['student_id']]
562                return student['payments']
563        elif 'reg_number' in row.keys() and row['reg_number']:
564            reg_number = row['reg_number']
565            cat = queryUtility(ICatalog, name='students_catalog')
566            results = list(
567                cat.searchResults(reg_number=(reg_number, reg_number)))
568            if results:
569                return results[0]['payments']
570        elif 'matric_number' in row.keys() and row['matric_number']:
571            matric_number = row['matric_number']
572            cat = queryUtility(ICatalog, name='students_catalog')
573            results = list(
574                cat.searchResults(matric_number=(matric_number, matric_number)))
575            if results:
576                return results[0]['payments']
577        return None
578
579    def parentsExist(self, row, site):
580        return self.getParent(row, site) is not None
581
582    def entryExists(self, row, site):
583        return self.getEntry(row, site) is not None
584
585    def getEntry(self, row, site):
586        payments = self.getParent(row, site)
587        if payments is None:
588            return None
589        # We can use the hash symbol at the end of p_id in import files
590        # to avoid annoying automatic number transformation
591        # by Excel or Calc
592        p_id = row['p_id'].strip('#')
593        if p_id.startswith('p'):
594            entry = payments.get(p_id)
595        else:
596            # For data migration from old SRP
597            entry = payments.get('p' + p_id[6:])
598        return entry
599
600    def addEntry(self, obj, row, site):
601        parent = self.getParent(row, site)
602        p_id = row['p_id'].strip('#')
603        if not p_id.startswith('p'):
604            # For data migration from old SRP
605            obj.p_id = 'p' + p_id[6:]
606            parent[obj.p_id] = obj
607        else:
608            parent[p_id] = obj
609        return
610
611    def checkConversion(self, row, mode='ignore'):
612        """Validates all values in row.
613        """
614        converter = IObjectConverter(self.iface)
615        errs, inv_errs, conv_dict =  converter.fromStringDict(
616            row, self.factory_name)
617        # We have to check p_id.
618        p_id = row['p_id'].strip('#')
619        if p_id.startswith('p'):
620            if not len(p_id) == 14:
621                errs.append(('p_id','invalid length'))
622                return errs, inv_errs, conv_dict
623        else:
624            if not len(p_id) == 19:
625                errs.append(('p_id','invalid length'))
626                return errs, inv_errs, conv_dict
627        return errs, inv_errs, conv_dict
Note: See TracBrowser for help on using the repository browser.