source: main/waeup.sirp/trunk/src/waeup/sirp/students/batching.py @ 6850

Last change on this file since 6850 was 6849, checked in by Henrik Bettermann, 13 years ago

Searching for reg_numbers or matric_numbers makes batch importing more difficult. Field validation must be skipped for reg_numbers and matric_numbers respectively if these fields are used for seeking students. After quite a lot of experiments I came to the conclusion that we need dedicated interfaces to skip the regular validation.

  • Property svn:keywords set to Id
File size: 8.1 KB
Line 
1"""Batch processing components for academics objects.
2
3Batch processors eat CSV files to add, update or remove large numbers
4of certain kinds of objects at once.
5
6Here we define the processors for academics specific objects like
7faculties, departments and the like.
8"""
9import grok
10import csv
11import copy
12from zope.interface import Interface
13from zope.schema import getFields
14from zope.component import queryUtility
15from zope.catalog.interfaces import ICatalog
16from waeup.sirp.interfaces import (
17    IBatchProcessor, FatalCSVError, IObjectConverter)
18from waeup.sirp.students.interfaces import (
19    IStudent, IStudentStudyCourse, IStudentStudyCourseImport,
20    IStudentUpdateByRegNo, IStudentUpdateByMatricNo)
21from waeup.sirp.utils.batching import BatchProcessor
22
23class StudentProcessor(BatchProcessor):
24    """A batch processor for IStudent objects.
25    """
26    grok.implements(IBatchProcessor)
27    grok.provides(IBatchProcessor)
28    grok.context(Interface)
29    util_name = 'studentimporter'
30    grok.name(util_name)
31
32    name = u'Student Importer'
33    iface = IStudent
34
35    location_fields = []
36    factory_name = 'waeup.Student'
37
38    mode = None
39
40    @property
41    def available_fields(self):
42        result = []
43        return sorted(list(set(
44            ['student_id','reg_number','matric_number'] + getFields(
45                self.iface).keys())))
46
47    def checkHeaders(self, headerfields, mode='create'):
48        if not 'reg_number' in headerfields and not 'student_id' in headerfields and not 'matric_number' in headerfields:
49            raise FatalCSVError(
50                "Need at least columns student_id or reg_number or matric_number for import!")
51        if mode == 'create':
52            for field in self.required_fields:
53                if not field in headerfields:
54                    raise FatalCSVError(
55                        "Need at least columns %s for import!" %
56                        ', '.join(["'%s'" % x for x in self.required_fields]))
57        # Check for fields to be ignored...
58        not_ignored_fields = [x for x in headerfields
59                              if not x.startswith('--')]
60        if len(set(not_ignored_fields)) < len(not_ignored_fields):
61            raise FatalCSVError(
62                "Double headers: each column name may only appear once.")
63        return True
64
65    def parentsExist(self, row, site):
66        return 'students' in site.keys()
67
68    def getLocator(self, row):
69        if 'student_id' in row.keys() and row['student_id']:
70            return 'student_id'
71        elif 'reg_number' in row.keys() and row['reg_number']:
72            return 'reg_number'
73        elif 'matric_number' in row.keys() and row['matric_number']:
74            return 'matric_number'
75        else:
76            return None
77
78    # The entry never exists in create mode.
79    def entryExists(self, row, site):
80        if not 'students' in site.keys():
81            return None
82        if self.getLocator(row) == 'student_id':
83            if row['student_id'] in site['students']:
84                student = site['students'][row['student_id']]
85                return student
86        elif self.getLocator(row) == 'reg_number':
87            reg_number = row['reg_number']
88            cat = queryUtility(ICatalog, name='students_catalog')
89            results = list(
90                cat.searchResults(reg_number=(reg_number, reg_number)))
91            if results:
92                return results[0]
93        elif self.getLocator(row) == 'matric_number':
94            matric_number = row['matric_number']
95            cat = queryUtility(ICatalog, name='students_catalog')
96            results = list(
97                cat.searchResults(matric_number=(matric_number, matric_number)))
98            if results:
99                return results[0]
100        return None
101
102    def getParent(self, row, site):
103        return site['students']
104
105    def getEntry(self, row, site):
106        return self.entryExists(row, site)
107
108    def addEntry(self, obj, row, site):
109        parent = self.getParent(row, site)
110        parent.addStudent(obj)
111        return
112
113    def delEntry(self, row, site):
114        student = self.entryExists(row, site)
115        if student:
116            parent = self.getParent(row, site)
117            del parent[student.student_id]
118        pass
119
120    def getMapping(self, path, headerfields, mode):
121        """Get a mapping from CSV file headerfields to actually used fieldnames.
122        """
123        result = dict()
124        reader = csv.reader(open(path, 'rb'))
125        raw_header = reader.next()
126        for num, field in enumerate(headerfields):
127            if field not in ['student_id', 'reg_number', 'matric_number'] and mode == 'remove':
128                continue
129            if field == u'--IGNORE--':
130                # Skip ignored columns in failed and finished data files.
131                continue
132            result[raw_header[num]] = field
133        return result
134
135    def checkConversion(self, row, mode='create'):
136        """Validates all values in row.
137        """
138        if mode in ['update', 'remove']:
139            if self.getLocator(row) == 'reg_number':
140                iface = IStudentUpdateByRegNo
141            elif self.getLocator(row) == 'matric_number':
142                iface = IStudentUpdateByMatricNo
143        else:
144            iface = self.iface
145        converter = IObjectConverter(iface)
146        errs, inv_errs, conv_dict =  converter.fromStringDict(
147            row, self.factory_name)
148        return errs, inv_errs, conv_dict
149
150class StudentStudyCourseProcessor(BatchProcessor):
151    """A batch processor for IStudentStudyCourse objects.
152    """
153    grok.implements(IBatchProcessor)
154    grok.provides(IBatchProcessor)
155    grok.context(Interface)
156    util_name = 'studycourseupdater'
157    grok.name(util_name)
158
159    name = u'StudentStudyCourse Importer (update only)'
160    iface = IStudentStudyCourseImport
161    factory_name = 'waeup.StudentStudyCourse'
162
163    location_fields = []
164
165    mode = None
166
167    @property
168    def available_fields(self):
169        result = []
170        return sorted(list(set(
171            ['student_id','reg_number','matric_number'] + getFields(
172                self.iface).keys())))
173
174    def checkHeaders(self, headerfields, mode='ignore'):
175        if not 'reg_number' in headerfields and not 'student_id' in headerfields and not 'matric_number' in headerfields:
176            raise FatalCSVError(
177                "Need at least columns student_id or reg_number or matric_number for import!")
178        # Check for fields to be ignored...
179        not_ignored_fields = [x for x in headerfields
180                              if not x.startswith('--')]
181        if len(set(not_ignored_fields)) < len(not_ignored_fields):
182            raise FatalCSVError(
183                "Double headers: each column name may only appear once.")
184        return True
185
186    def parentsExist(self, row, site):
187        if not 'students' in site.keys():
188            return None
189        if 'student_id' in row.keys() and row['student_id']:
190            if row['student_id'] in site['students']:
191                student = site['students'][row['student_id']]
192                return student
193        elif 'reg_number' in row.keys() and row['reg_number']:
194            reg_number = row['reg_number']
195            #import pdb; pdb.set_trace()
196            cat = queryUtility(ICatalog, name='students_catalog')
197            results = list(
198                cat.searchResults(reg_number=(reg_number, reg_number)))
199            if results:
200                return results[0]
201        elif 'matric_number' in row.keys() and row['matric_number']:
202            matric_number = row['matric_number']
203            cat = queryUtility(ICatalog, name='students_catalog')
204            results = list(
205                cat.searchResults(matric_number=(matric_number, matric_number)))
206            if results:
207                return results[0]
208        return None
209
210    def entryExists(self, row, site):
211        student = self.parentsExist(row, site)
212        if not student:
213            return None
214        if 'studycourse' in student:
215            return student
216        return None
217
218    def getEntry(self, row, site):
219        student = self.entryExists(row, site)
220        if not student:
221            return None
222        return student.get('studycourse')
Note: See TracBrowser for help on using the repository browser.