source: main/waeup.sirp/trunk/src/waeup/sirp/students/batching.py @ 7199

Last change on this file since 7199 was 7191, checked in by Henrik Bettermann, 13 years ago

Adjust copyright statement and svn keyword in students.

  • Property svn:keywords set to Id
File size: 8.9 KB
Line 
1## $Id: batching.py 7191 2011-11-25 07:13:22Z henrik $
2##
3## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""Batch processing components for academics objects.
19
20Batch processors eat CSV files to add, update or remove large numbers
21of certain kinds of objects at once.
22
23Here we define the processors for academics specific objects like
24faculties, departments and the like.
25"""
26import grok
27import csv
28import copy
29from zope.interface import Interface
30from zope.schema import getFields
31from zope.component import queryUtility
32from zope.catalog.interfaces import ICatalog
33from waeup.sirp.interfaces import (
34    IBatchProcessor, FatalCSVError, IObjectConverter)
35from waeup.sirp.students.interfaces import (
36    IStudent, IStudentStudyCourse, IStudentStudyCourseImport,
37    IStudentUpdateByRegNo, IStudentUpdateByMatricNo)
38from waeup.sirp.utils.batching import BatchProcessor
39
40class StudentProcessor(BatchProcessor):
41    """A batch processor for IStudent objects.
42    """
43    grok.implements(IBatchProcessor)
44    grok.provides(IBatchProcessor)
45    grok.context(Interface)
46    util_name = 'studentimporter'
47    grok.name(util_name)
48
49    name = u'Student Importer'
50    iface = IStudent
51
52    location_fields = []
53    factory_name = 'waeup.Student'
54
55    mode = None
56
57    @property
58    def available_fields(self):
59        result = []
60        return sorted(list(set(
61            ['student_id','reg_number','matric_number'] + getFields(
62                self.iface).keys())))
63
64    def checkHeaders(self, headerfields, mode='create'):
65        if not 'reg_number' in headerfields and not 'student_id' \
66            in headerfields and not 'matric_number' in headerfields:
67            raise FatalCSVError(
68                "Need at least columns student_id or reg_number " +
69                "or matric_number for import!")
70        if mode == 'create':
71            for field in self.required_fields:
72                if not field in headerfields:
73                    raise FatalCSVError(
74                        "Need at least columns %s for import!" %
75                        ', '.join(["'%s'" % x for x in self.required_fields]))
76        # Check for fields to be ignored...
77        not_ignored_fields = [x for x in headerfields
78                              if not x.startswith('--')]
79        if len(set(not_ignored_fields)) < len(not_ignored_fields):
80            raise FatalCSVError(
81                "Double headers: each column name may only appear once.")
82        return True
83
84    def parentsExist(self, row, site):
85        return 'students' in site.keys()
86
87    def getLocator(self, row):
88        if 'student_id' in row.keys() and row['student_id']:
89            return 'student_id'
90        elif 'reg_number' in row.keys() and row['reg_number']:
91            return 'reg_number'
92        elif 'matric_number' in row.keys() and row['matric_number']:
93            return 'matric_number'
94        else:
95            return None
96
97    # The entry never exists in create mode.
98    def entryExists(self, row, site):
99        if not 'students' in site.keys():
100            return None
101        if self.getLocator(row) == 'student_id':
102            if row['student_id'] in site['students']:
103                student = site['students'][row['student_id']]
104                return student
105        elif self.getLocator(row) == 'reg_number':
106            reg_number = row['reg_number']
107            cat = queryUtility(ICatalog, name='students_catalog')
108            results = list(
109                cat.searchResults(reg_number=(reg_number, reg_number)))
110            if results:
111                return results[0]
112        elif self.getLocator(row) == 'matric_number':
113            matric_number = row['matric_number']
114            cat = queryUtility(ICatalog, name='students_catalog')
115            results = list(
116                cat.searchResults(matric_number=(matric_number, matric_number)))
117            if results:
118                return results[0]
119        return None
120
121    def getParent(self, row, site):
122        return site['students']
123
124    def getEntry(self, row, site):
125        return self.entryExists(row, site)
126
127    def addEntry(self, obj, row, site):
128        parent = self.getParent(row, site)
129        parent.addStudent(obj)
130        return
131
132    def delEntry(self, row, site):
133        student = self.entryExists(row, site)
134        if student:
135            parent = self.getParent(row, site)
136            del parent[student.student_id]
137        pass
138
139    def getMapping(self, path, headerfields, mode):
140        """Get a mapping from CSV file headerfields to actually used fieldnames.
141        """
142        result = dict()
143        reader = csv.reader(open(path, 'rb'))
144        raw_header = reader.next()
145        for num, field in enumerate(headerfields):
146            if field not in [
147                'student_id', 'reg_number', 'matric_number'] and mode == 'remove':
148                continue
149            if field == u'--IGNORE--':
150                # Skip ignored columns in failed and finished data files.
151                continue
152            result[raw_header[num]] = field
153        return result
154
155    def checkConversion(self, row, mode='create'):
156        """Validates all values in row.
157        """
158        if mode in ['update', 'remove']:
159            if self.getLocator(row) == 'reg_number':
160                iface = IStudentUpdateByRegNo
161            elif self.getLocator(row) == 'matric_number':
162                iface = IStudentUpdateByMatricNo
163        else:
164            iface = self.iface
165        converter = IObjectConverter(iface)
166        errs, inv_errs, conv_dict =  converter.fromStringDict(
167            row, self.factory_name)
168        return errs, inv_errs, conv_dict
169
170class StudentStudyCourseProcessor(BatchProcessor):
171    """A batch processor for IStudentStudyCourse objects.
172    """
173    grok.implements(IBatchProcessor)
174    grok.provides(IBatchProcessor)
175    grok.context(Interface)
176    util_name = 'studycourseupdater'
177    grok.name(util_name)
178
179    name = u'StudentStudyCourse Importer (update only)'
180    iface = IStudentStudyCourseImport
181    factory_name = 'waeup.StudentStudyCourse'
182
183    location_fields = []
184
185    mode = None
186
187    @property
188    def available_fields(self):
189        result = []
190        return sorted(list(set(
191            ['student_id','reg_number','matric_number'] + getFields(
192                self.iface).keys())))
193
194    def checkHeaders(self, headerfields, mode='ignore'):
195        if not 'reg_number' in headerfields and not 'student_id' \
196            in headerfields and not 'matric_number' in headerfields:
197            raise FatalCSVError(
198                "Need at least columns student_id " +
199                "or reg_number or matric_number for import!")
200        # Check for fields to be ignored...
201        not_ignored_fields = [x for x in headerfields
202                              if not x.startswith('--')]
203        if len(set(not_ignored_fields)) < len(not_ignored_fields):
204            raise FatalCSVError(
205                "Double headers: each column name may only appear once.")
206        return True
207
208    def parentsExist(self, row, site):
209        if not 'students' in site.keys():
210            return None
211        if 'student_id' in row.keys() and row['student_id']:
212            if row['student_id'] in site['students']:
213                student = site['students'][row['student_id']]
214                return student
215        elif 'reg_number' in row.keys() and row['reg_number']:
216            reg_number = row['reg_number']
217            #import pdb; pdb.set_trace()
218            cat = queryUtility(ICatalog, name='students_catalog')
219            results = list(
220                cat.searchResults(reg_number=(reg_number, reg_number)))
221            if results:
222                return results[0]
223        elif 'matric_number' in row.keys() and row['matric_number']:
224            matric_number = row['matric_number']
225            cat = queryUtility(ICatalog, name='students_catalog')
226            results = list(
227                cat.searchResults(matric_number=(matric_number, matric_number)))
228            if results:
229                return results[0]
230        return None
231
232    def entryExists(self, row, site):
233        student = self.parentsExist(row, site)
234        if not student:
235            return None
236        if 'studycourse' in student:
237            return student
238        return None
239
240    def getEntry(self, row, site):
241        student = self.entryExists(row, site)
242        if not student:
243            return None
244        return student.get('studycourse')
Note: See TracBrowser for help on using the repository browser.