source: main/waeup.sirp/trunk/src/waeup/sirp/students/batching.py @ 6850

Last change on this file since 6850 was 6849, checked in by Henrik Bettermann, 13 years ago

Searching for reg_numbers or matric_numbers makes batch importing more difficult. Field validation must be skipped for reg_numbers and matric_numbers respectively if these fields are used for seeking students. After quite a lot of experiments I came to the conclusion that we need dedicated interfaces to skip the regular validation.

  • Property svn:keywords set to Id
File size: 8.1 KB
RevLine 
[6821]1"""Batch processing components for academics objects.
2
3Batch processors eat CSV files to add, update or remove large numbers
4of certain kinds of objects at once.
5
6Here we define the processors for academics specific objects like
7faculties, departments and the like.
8"""
9import grok
[6849]10import csv
11import copy
[6821]12from zope.interface import Interface
[6825]13from zope.schema import getFields
14from zope.component import queryUtility
15from zope.catalog.interfaces import ICatalog
[6849]16from waeup.sirp.interfaces import (
17    IBatchProcessor, FatalCSVError, IObjectConverter)
[6825]18from waeup.sirp.students.interfaces import (
[6849]19    IStudent, IStudentStudyCourse, IStudentStudyCourseImport,
20    IStudentUpdateByRegNo, IStudentUpdateByMatricNo)
[6821]21from waeup.sirp.utils.batching import BatchProcessor
22
23class StudentProcessor(BatchProcessor):
24    """A batch processor for IStudent objects.
25    """
26    grok.implements(IBatchProcessor)
27    grok.provides(IBatchProcessor)
28    grok.context(Interface)
29    util_name = 'studentimporter'
30    grok.name(util_name)
31
32    name = u'Student Importer'
33    iface = IStudent
34
[6849]35    location_fields = []
[6821]36    factory_name = 'waeup.Student'
37
[6841]38    mode = None
39
[6821]40    @property
[6849]41    def available_fields(self):
42        result = []
43        return sorted(list(set(
44            ['student_id','reg_number','matric_number'] + getFields(
45                self.iface).keys())))
[6821]46
[6849]47    def checkHeaders(self, headerfields, mode='create'):
48        if not 'reg_number' in headerfields and not 'student_id' in headerfields and not 'matric_number' in headerfields:
49            raise FatalCSVError(
50                "Need at least columns student_id or reg_number or matric_number for import!")
51        if mode == 'create':
52            for field in self.required_fields:
53                if not field in headerfields:
54                    raise FatalCSVError(
55                        "Need at least columns %s for import!" %
56                        ', '.join(["'%s'" % x for x in self.required_fields]))
57        # Check for fields to be ignored...
58        not_ignored_fields = [x for x in headerfields
59                              if not x.startswith('--')]
60        if len(set(not_ignored_fields)) < len(not_ignored_fields):
61            raise FatalCSVError(
62                "Double headers: each column name may only appear once.")
63        return True
64
[6821]65    def parentsExist(self, row, site):
66        return 'students' in site.keys()
67
[6849]68    def getLocator(self, row):
69        if 'student_id' in row.keys() and row['student_id']:
70            return 'student_id'
71        elif 'reg_number' in row.keys() and row['reg_number']:
72            return 'reg_number'
73        elif 'matric_number' in row.keys() and row['matric_number']:
74            return 'matric_number'
75        else:
76            return None
77
[6821]78    # The entry never exists in create mode.
79    def entryExists(self, row, site):
[6846]80        if not 'students' in site.keys():
[6849]81            return None
82        if self.getLocator(row) == 'student_id':
[6846]83            if row['student_id'] in site['students']:
84                student = site['students'][row['student_id']]
85                return student
[6849]86        elif self.getLocator(row) == 'reg_number':
[6846]87            reg_number = row['reg_number']
88            cat = queryUtility(ICatalog, name='students_catalog')
89            results = list(
90                cat.searchResults(reg_number=(reg_number, reg_number)))
91            if results:
92                return results[0]
[6849]93        elif self.getLocator(row) == 'matric_number':
[6846]94            matric_number = row['matric_number']
95            cat = queryUtility(ICatalog, name='students_catalog')
96            results = list(
97                cat.searchResults(matric_number=(matric_number, matric_number)))
98            if results:
99                return results[0]
[6849]100        return None
[6821]101
102    def getParent(self, row, site):
103        return site['students']
104
105    def getEntry(self, row, site):
[6846]106        return self.entryExists(row, site)
[6821]107
108    def addEntry(self, obj, row, site):
109        parent = self.getParent(row, site)
110        parent.addStudent(obj)
111        return
112
113    def delEntry(self, row, site):
[6846]114        student = self.entryExists(row, site)
115        if student:
116            parent = self.getParent(row, site)
117            del parent[student.student_id]
[6821]118        pass
[6825]119
[6849]120    def getMapping(self, path, headerfields, mode):
121        """Get a mapping from CSV file headerfields to actually used fieldnames.
122        """
123        result = dict()
124        reader = csv.reader(open(path, 'rb'))
125        raw_header = reader.next()
126        for num, field in enumerate(headerfields):
127            if field not in ['student_id', 'reg_number', 'matric_number'] and mode == 'remove':
128                continue
129            if field == u'--IGNORE--':
130                # Skip ignored columns in failed and finished data files.
131                continue
132            result[raw_header[num]] = field
133        return result
134
135    def checkConversion(self, row, mode='create'):
136        """Validates all values in row.
137        """
138        if mode in ['update', 'remove']:
139            if self.getLocator(row) == 'reg_number':
140                iface = IStudentUpdateByRegNo
141            elif self.getLocator(row) == 'matric_number':
142                iface = IStudentUpdateByMatricNo
143        else:
144            iface = self.iface
145        converter = IObjectConverter(iface)
146        errs, inv_errs, conv_dict =  converter.fromStringDict(
147            row, self.factory_name)
148        return errs, inv_errs, conv_dict
149
[6825]150class StudentStudyCourseProcessor(BatchProcessor):
151    """A batch processor for IStudentStudyCourse objects.
152    """
153    grok.implements(IBatchProcessor)
154    grok.provides(IBatchProcessor)
155    grok.context(Interface)
[6837]156    util_name = 'studycourseupdater'
[6825]157    grok.name(util_name)
158
[6837]159    name = u'StudentStudyCourse Importer (update only)'
[6825]160    iface = IStudentStudyCourseImport
161    factory_name = 'waeup.StudentStudyCourse'
162
[6849]163    location_fields = []
164
[6841]165    mode = None
166
[6825]167    @property
168    def available_fields(self):
169        result = []
170        return sorted(list(set(
[6843]171            ['student_id','reg_number','matric_number'] + getFields(
172                self.iface).keys())))
[6825]173
[6837]174    def checkHeaders(self, headerfields, mode='ignore'):
[6843]175        if not 'reg_number' in headerfields and not 'student_id' in headerfields and not 'matric_number' in headerfields:
[6825]176            raise FatalCSVError(
[6843]177                "Need at least columns student_id or reg_number or matric_number for import!")
[6834]178        # Check for fields to be ignored...
[6825]179        not_ignored_fields = [x for x in headerfields
180                              if not x.startswith('--')]
181        if len(set(not_ignored_fields)) < len(not_ignored_fields):
182            raise FatalCSVError(
183                "Double headers: each column name may only appear once.")
184        return True
185
186    def parentsExist(self, row, site):
[6846]187        if not 'students' in site.keys():
[6849]188            return None
[6846]189        if 'student_id' in row.keys() and row['student_id']:
[6825]190            if row['student_id'] in site['students']:
191                student = site['students'][row['student_id']]
192                return student
[6843]193        elif 'reg_number' in row.keys() and row['reg_number']:
[6825]194            reg_number = row['reg_number']
[6849]195            #import pdb; pdb.set_trace()
[6825]196            cat = queryUtility(ICatalog, name='students_catalog')
197            results = list(
198                cat.searchResults(reg_number=(reg_number, reg_number)))
199            if results:
200                return results[0]
[6843]201        elif 'matric_number' in row.keys() and row['matric_number']:
202            matric_number = row['matric_number']
203            cat = queryUtility(ICatalog, name='students_catalog')
204            results = list(
205                cat.searchResults(matric_number=(matric_number, matric_number)))
206            if results:
207                return results[0]
[6849]208        return None
[6825]209
210    def entryExists(self, row, site):
211        student = self.parentsExist(row, site)
212        if not student:
[6849]213            return None
[6825]214        if 'studycourse' in student:
215            return student
[6849]216        return None
[6825]217
218    def getEntry(self, row, site):
219        student = self.entryExists(row, site)
220        if not student:
221            return None
222        return student.get('studycourse')
Note: See TracBrowser for help on using the repository browser.