source: main/waeup.sirp/trunk/src/waeup/sirp/students/batching.py @ 7262

Last change on this file since 7262 was 7261, checked in by Henrik Bettermann, 13 years ago

Describe the batch processor properly.

  • Property svn:keywords set to Id
File size: 8.9 KB
Line 
1## $Id: batching.py 7261 2011-12-04 08:32:14Z henrik $
2##
3## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""Batch processing components for academics objects.
19
20Batch processors eat CSV files to add, update or remove large numbers
21of certain kinds of objects at once.
22
23Here we define the processors for students specific objects like
24students, studycourses, payment tickets and accommodation tickets.
25"""
26import grok
27import csv
28from zope.interface import Interface
29from zope.schema import getFields
30from zope.component import queryUtility
31from zope.catalog.interfaces import ICatalog
32from waeup.sirp.interfaces import (
33    IBatchProcessor, FatalCSVError, IObjectConverter)
34from waeup.sirp.students.interfaces import (
35    IStudent, IStudentStudyCourseImport,
36    IStudentUpdateByRegNo, IStudentUpdateByMatricNo)
37from waeup.sirp.utils.batching import BatchProcessor
38
39class StudentProcessor(BatchProcessor):
40    """A batch processor for IStudent objects.
41    """
42    grok.implements(IBatchProcessor)
43    grok.provides(IBatchProcessor)
44    grok.context(Interface)
45    util_name = 'studentimporter'
46    grok.name(util_name)
47
48    name = u'Student Importer'
49    iface = IStudent
50
51    location_fields = []
52    factory_name = 'waeup.Student'
53
54    mode = None
55
56    @property
57    def available_fields(self):
58        return sorted(list(set(
59            ['student_id','reg_number','matric_number'] + getFields(
60                self.iface).keys())))
61
62    def checkHeaders(self, headerfields, mode='create'):
63        if not 'reg_number' in headerfields and not 'student_id' \
64            in headerfields and not 'matric_number' in headerfields:
65            raise FatalCSVError(
66                "Need at least columns student_id or reg_number " +
67                "or matric_number for import!")
68        if mode == 'create':
69            for field in self.required_fields:
70                if not field in headerfields:
71                    raise FatalCSVError(
72                        "Need at least columns %s for import!" %
73                        ', '.join(["'%s'" % x for x in self.required_fields]))
74        # Check for fields to be ignored...
75        not_ignored_fields = [x for x in headerfields
76                              if not x.startswith('--')]
77        if len(set(not_ignored_fields)) < len(not_ignored_fields):
78            raise FatalCSVError(
79                "Double headers: each column name may only appear once.")
80        return True
81
82    def parentsExist(self, row, site):
83        return 'students' in site.keys()
84
85    def getLocator(self, row):
86        if 'student_id' in row.keys() and row['student_id']:
87            return 'student_id'
88        elif 'reg_number' in row.keys() and row['reg_number']:
89            return 'reg_number'
90        elif 'matric_number' in row.keys() and row['matric_number']:
91            return 'matric_number'
92        else:
93            return None
94
95    # The entry never exists in create mode.
96    def entryExists(self, row, site):
97        if not 'students' in site.keys():
98            return None
99        if self.getLocator(row) == 'student_id':
100            if row['student_id'] in site['students']:
101                student = site['students'][row['student_id']]
102                return student
103        elif self.getLocator(row) == 'reg_number':
104            reg_number = row['reg_number']
105            cat = queryUtility(ICatalog, name='students_catalog')
106            results = list(
107                cat.searchResults(reg_number=(reg_number, reg_number)))
108            if results:
109                return results[0]
110        elif self.getLocator(row) == 'matric_number':
111            matric_number = row['matric_number']
112            cat = queryUtility(ICatalog, name='students_catalog')
113            results = list(
114                cat.searchResults(matric_number=(matric_number, matric_number)))
115            if results:
116                return results[0]
117        return None
118
119    def getParent(self, row, site):
120        return site['students']
121
122    def getEntry(self, row, site):
123        return self.entryExists(row, site)
124
125    def addEntry(self, obj, row, site):
126        parent = self.getParent(row, site)
127        parent.addStudent(obj)
128        return
129
130    def delEntry(self, row, site):
131        student = self.entryExists(row, site)
132        if student:
133            parent = self.getParent(row, site)
134            del parent[student.student_id]
135        pass
136
137    def getMapping(self, path, headerfields, mode):
138        """Get a mapping from CSV file headerfields to actually used fieldnames.
139        """
140        result = dict()
141        reader = csv.reader(open(path, 'rb'))
142        raw_header = reader.next()
143        for num, field in enumerate(headerfields):
144            if field not in [
145                'student_id', 'reg_number', 'matric_number'] and mode == 'remove':
146                continue
147            if field == u'--IGNORE--':
148                # Skip ignored columns in failed and finished data files.
149                continue
150            result[raw_header[num]] = field
151        return result
152
153    def checkConversion(self, row, mode='create'):
154        """Validates all values in row.
155        """
156        if mode in ['update', 'remove']:
157            if self.getLocator(row) == 'reg_number':
158                iface = IStudentUpdateByRegNo
159            elif self.getLocator(row) == 'matric_number':
160                iface = IStudentUpdateByMatricNo
161        else:
162            iface = self.iface
163        converter = IObjectConverter(iface)
164        errs, inv_errs, conv_dict =  converter.fromStringDict(
165            row, self.factory_name)
166        return errs, inv_errs, conv_dict
167
168class StudentStudyCourseProcessor(BatchProcessor):
169    """A batch processor for IStudentStudyCourse objects.
170    """
171    grok.implements(IBatchProcessor)
172    grok.provides(IBatchProcessor)
173    grok.context(Interface)
174    util_name = 'studycourseupdater'
175    grok.name(util_name)
176
177    name = u'StudentStudyCourse Importer (update only)'
178    iface = IStudentStudyCourseImport
179    factory_name = 'waeup.StudentStudyCourse'
180
181    location_fields = []
182
183    mode = None
184
185    @property
186    def available_fields(self):
187        return sorted(list(set(
188            ['student_id','reg_number','matric_number'] + getFields(
189                self.iface).keys())))
190
191    def checkHeaders(self, headerfields, mode='ignore'):
192        if not 'reg_number' in headerfields and not 'student_id' \
193            in headerfields and not 'matric_number' in headerfields:
194            raise FatalCSVError(
195                "Need at least columns student_id " +
196                "or reg_number or matric_number for import!")
197        # Check for fields to be ignored...
198        not_ignored_fields = [x for x in headerfields
199                              if not x.startswith('--')]
200        if len(set(not_ignored_fields)) < len(not_ignored_fields):
201            raise FatalCSVError(
202                "Double headers: each column name may only appear once.")
203        return True
204
205    def parentsExist(self, row, site):
206        if not 'students' in site.keys():
207            return None
208        if 'student_id' in row.keys() and row['student_id']:
209            if row['student_id'] in site['students']:
210                student = site['students'][row['student_id']]
211                return student
212        elif 'reg_number' in row.keys() and row['reg_number']:
213            reg_number = row['reg_number']
214            #import pdb; pdb.set_trace()
215            cat = queryUtility(ICatalog, name='students_catalog')
216            results = list(
217                cat.searchResults(reg_number=(reg_number, reg_number)))
218            if results:
219                return results[0]
220        elif 'matric_number' in row.keys() and row['matric_number']:
221            matric_number = row['matric_number']
222            cat = queryUtility(ICatalog, name='students_catalog')
223            results = list(
224                cat.searchResults(matric_number=(matric_number, matric_number)))
225            if results:
226                return results[0]
227        return None
228
229    def entryExists(self, row, site):
230        student = self.parentsExist(row, site)
231        if not student:
232            return None
233        if 'studycourse' in student:
234            return student
235        return None
236
237    def getEntry(self, row, site):
238        student = self.entryExists(row, site)
239        if not student:
240            return None
241        return student.get('studycourse')
Note: See TracBrowser for help on using the repository browser.