source: main/waeup.sirp/trunk/src/waeup/sirp/students/batching.py @ 6935

Last change on this file since 6935 was 6854, checked in by Henrik Bettermann, 13 years ago

Break overlong lines.

  • Property svn:keywords set to Id
File size: 8.1 KB
Line 
1"""Batch processing components for academics objects.
2
3Batch processors eat CSV files to add, update or remove large numbers
4of certain kinds of objects at once.
5
6Here we define the processors for academics specific objects like
7faculties, departments and the like.
8"""
9import grok
10import csv
11import copy
12from zope.interface import Interface
13from zope.schema import getFields
14from zope.component import queryUtility
15from zope.catalog.interfaces import ICatalog
16from waeup.sirp.interfaces import (
17    IBatchProcessor, FatalCSVError, IObjectConverter)
18from waeup.sirp.students.interfaces import (
19    IStudent, IStudentStudyCourse, IStudentStudyCourseImport,
20    IStudentUpdateByRegNo, IStudentUpdateByMatricNo)
21from waeup.sirp.utils.batching import BatchProcessor
22
23class StudentProcessor(BatchProcessor):
24    """A batch processor for IStudent objects.
25    """
26    grok.implements(IBatchProcessor)
27    grok.provides(IBatchProcessor)
28    grok.context(Interface)
29    util_name = 'studentimporter'
30    grok.name(util_name)
31
32    name = u'Student Importer'
33    iface = IStudent
34
35    location_fields = []
36    factory_name = 'waeup.Student'
37
38    mode = None
39
40    @property
41    def available_fields(self):
42        result = []
43        return sorted(list(set(
44            ['student_id','reg_number','matric_number'] + getFields(
45                self.iface).keys())))
46
47    def checkHeaders(self, headerfields, mode='create'):
48        if not 'reg_number' in headerfields and not 'student_id' \
49            in headerfields and not 'matric_number' in headerfields:
50            raise FatalCSVError(
51                "Need at least columns student_id or reg_number " +
52                "or matric_number for import!")
53        if mode == 'create':
54            for field in self.required_fields:
55                if not field in headerfields:
56                    raise FatalCSVError(
57                        "Need at least columns %s for import!" %
58                        ', '.join(["'%s'" % x for x in self.required_fields]))
59        # Check for fields to be ignored...
60        not_ignored_fields = [x for x in headerfields
61                              if not x.startswith('--')]
62        if len(set(not_ignored_fields)) < len(not_ignored_fields):
63            raise FatalCSVError(
64                "Double headers: each column name may only appear once.")
65        return True
66
67    def parentsExist(self, row, site):
68        return 'students' in site.keys()
69
70    def getLocator(self, row):
71        if 'student_id' in row.keys() and row['student_id']:
72            return 'student_id'
73        elif 'reg_number' in row.keys() and row['reg_number']:
74            return 'reg_number'
75        elif 'matric_number' in row.keys() and row['matric_number']:
76            return 'matric_number'
77        else:
78            return None
79
80    # The entry never exists in create mode.
81    def entryExists(self, row, site):
82        if not 'students' in site.keys():
83            return None
84        if self.getLocator(row) == 'student_id':
85            if row['student_id'] in site['students']:
86                student = site['students'][row['student_id']]
87                return student
88        elif self.getLocator(row) == 'reg_number':
89            reg_number = row['reg_number']
90            cat = queryUtility(ICatalog, name='students_catalog')
91            results = list(
92                cat.searchResults(reg_number=(reg_number, reg_number)))
93            if results:
94                return results[0]
95        elif self.getLocator(row) == 'matric_number':
96            matric_number = row['matric_number']
97            cat = queryUtility(ICatalog, name='students_catalog')
98            results = list(
99                cat.searchResults(matric_number=(matric_number, matric_number)))
100            if results:
101                return results[0]
102        return None
103
104    def getParent(self, row, site):
105        return site['students']
106
107    def getEntry(self, row, site):
108        return self.entryExists(row, site)
109
110    def addEntry(self, obj, row, site):
111        parent = self.getParent(row, site)
112        parent.addStudent(obj)
113        return
114
115    def delEntry(self, row, site):
116        student = self.entryExists(row, site)
117        if student:
118            parent = self.getParent(row, site)
119            del parent[student.student_id]
120        pass
121
122    def getMapping(self, path, headerfields, mode):
123        """Get a mapping from CSV file headerfields to actually used fieldnames.
124        """
125        result = dict()
126        reader = csv.reader(open(path, 'rb'))
127        raw_header = reader.next()
128        for num, field in enumerate(headerfields):
129            if field not in [
130                'student_id', 'reg_number', 'matric_number'] and mode == 'remove':
131                continue
132            if field == u'--IGNORE--':
133                # Skip ignored columns in failed and finished data files.
134                continue
135            result[raw_header[num]] = field
136        return result
137
138    def checkConversion(self, row, mode='create'):
139        """Validates all values in row.
140        """
141        if mode in ['update', 'remove']:
142            if self.getLocator(row) == 'reg_number':
143                iface = IStudentUpdateByRegNo
144            elif self.getLocator(row) == 'matric_number':
145                iface = IStudentUpdateByMatricNo
146        else:
147            iface = self.iface
148        converter = IObjectConverter(iface)
149        errs, inv_errs, conv_dict =  converter.fromStringDict(
150            row, self.factory_name)
151        return errs, inv_errs, conv_dict
152
153class StudentStudyCourseProcessor(BatchProcessor):
154    """A batch processor for IStudentStudyCourse objects.
155    """
156    grok.implements(IBatchProcessor)
157    grok.provides(IBatchProcessor)
158    grok.context(Interface)
159    util_name = 'studycourseupdater'
160    grok.name(util_name)
161
162    name = u'StudentStudyCourse Importer (update only)'
163    iface = IStudentStudyCourseImport
164    factory_name = 'waeup.StudentStudyCourse'
165
166    location_fields = []
167
168    mode = None
169
170    @property
171    def available_fields(self):
172        result = []
173        return sorted(list(set(
174            ['student_id','reg_number','matric_number'] + getFields(
175                self.iface).keys())))
176
177    def checkHeaders(self, headerfields, mode='ignore'):
178        if not 'reg_number' in headerfields and not 'student_id' \
179            in headerfields and not 'matric_number' in headerfields:
180            raise FatalCSVError(
181                "Need at least columns student_id " +
182                "or reg_number or matric_number for import!")
183        # Check for fields to be ignored...
184        not_ignored_fields = [x for x in headerfields
185                              if not x.startswith('--')]
186        if len(set(not_ignored_fields)) < len(not_ignored_fields):
187            raise FatalCSVError(
188                "Double headers: each column name may only appear once.")
189        return True
190
191    def parentsExist(self, row, site):
192        if not 'students' in site.keys():
193            return None
194        if 'student_id' in row.keys() and row['student_id']:
195            if row['student_id'] in site['students']:
196                student = site['students'][row['student_id']]
197                return student
198        elif 'reg_number' in row.keys() and row['reg_number']:
199            reg_number = row['reg_number']
200            #import pdb; pdb.set_trace()
201            cat = queryUtility(ICatalog, name='students_catalog')
202            results = list(
203                cat.searchResults(reg_number=(reg_number, reg_number)))
204            if results:
205                return results[0]
206        elif 'matric_number' in row.keys() and row['matric_number']:
207            matric_number = row['matric_number']
208            cat = queryUtility(ICatalog, name='students_catalog')
209            results = list(
210                cat.searchResults(matric_number=(matric_number, matric_number)))
211            if results:
212                return results[0]
213        return None
214
215    def entryExists(self, row, site):
216        student = self.parentsExist(row, site)
217        if not student:
218            return None
219        if 'studycourse' in student:
220            return student
221        return None
222
223    def getEntry(self, row, site):
224        student = self.entryExists(row, site)
225        if not student:
226            return None
227        return student.get('studycourse')
Note: See TracBrowser for help on using the repository browser.