source: main/waeup.sirp/trunk/src/waeup/sirp/students/batching.py @ 7262

Last change on this file since 7262 was 7261, checked in by Henrik Bettermann, 13 years ago

Describe the batch processor properly.

  • Property svn:keywords set to Id
File size: 8.9 KB
RevLine 
[7191]1## $Id: batching.py 7261 2011-12-04 08:32:14Z henrik $
2##
3## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
[6821]18"""Batch processing components for academics objects.
19
20Batch processors eat CSV files to add, update or remove large numbers
21of certain kinds of objects at once.
22
[7261]23Here we define the processors for students specific objects like
24students, studycourses, payment tickets and accommodation tickets.
[6821]25"""
26import grok
[6849]27import csv
[6821]28from zope.interface import Interface
[6825]29from zope.schema import getFields
30from zope.component import queryUtility
31from zope.catalog.interfaces import ICatalog
[6849]32from waeup.sirp.interfaces import (
33    IBatchProcessor, FatalCSVError, IObjectConverter)
[6825]34from waeup.sirp.students.interfaces import (
[7256]35    IStudent, IStudentStudyCourseImport,
[6849]36    IStudentUpdateByRegNo, IStudentUpdateByMatricNo)
[6821]37from waeup.sirp.utils.batching import BatchProcessor
38
39class StudentProcessor(BatchProcessor):
40    """A batch processor for IStudent objects.
41    """
42    grok.implements(IBatchProcessor)
43    grok.provides(IBatchProcessor)
44    grok.context(Interface)
45    util_name = 'studentimporter'
46    grok.name(util_name)
47
48    name = u'Student Importer'
49    iface = IStudent
50
[6849]51    location_fields = []
[6821]52    factory_name = 'waeup.Student'
53
[6841]54    mode = None
55
[6821]56    @property
[6849]57    def available_fields(self):
58        return sorted(list(set(
59            ['student_id','reg_number','matric_number'] + getFields(
60                self.iface).keys())))
[6821]61
[6849]62    def checkHeaders(self, headerfields, mode='create'):
[6854]63        if not 'reg_number' in headerfields and not 'student_id' \
64            in headerfields and not 'matric_number' in headerfields:
[6849]65            raise FatalCSVError(
[6854]66                "Need at least columns student_id or reg_number " +
67                "or matric_number for import!")
[6849]68        if mode == 'create':
69            for field in self.required_fields:
70                if not field in headerfields:
71                    raise FatalCSVError(
72                        "Need at least columns %s for import!" %
73                        ', '.join(["'%s'" % x for x in self.required_fields]))
74        # Check for fields to be ignored...
75        not_ignored_fields = [x for x in headerfields
76                              if not x.startswith('--')]
77        if len(set(not_ignored_fields)) < len(not_ignored_fields):
78            raise FatalCSVError(
79                "Double headers: each column name may only appear once.")
80        return True
81
[6821]82    def parentsExist(self, row, site):
83        return 'students' in site.keys()
84
[6849]85    def getLocator(self, row):
86        if 'student_id' in row.keys() and row['student_id']:
87            return 'student_id'
88        elif 'reg_number' in row.keys() and row['reg_number']:
89            return 'reg_number'
90        elif 'matric_number' in row.keys() and row['matric_number']:
91            return 'matric_number'
92        else:
93            return None
94
[6821]95    # The entry never exists in create mode.
96    def entryExists(self, row, site):
[6846]97        if not 'students' in site.keys():
[6849]98            return None
99        if self.getLocator(row) == 'student_id':
[6846]100            if row['student_id'] in site['students']:
101                student = site['students'][row['student_id']]
102                return student
[6849]103        elif self.getLocator(row) == 'reg_number':
[6846]104            reg_number = row['reg_number']
105            cat = queryUtility(ICatalog, name='students_catalog')
106            results = list(
107                cat.searchResults(reg_number=(reg_number, reg_number)))
108            if results:
109                return results[0]
[6849]110        elif self.getLocator(row) == 'matric_number':
[6846]111            matric_number = row['matric_number']
112            cat = queryUtility(ICatalog, name='students_catalog')
113            results = list(
114                cat.searchResults(matric_number=(matric_number, matric_number)))
115            if results:
116                return results[0]
[6849]117        return None
[6821]118
119    def getParent(self, row, site):
120        return site['students']
121
122    def getEntry(self, row, site):
[6846]123        return self.entryExists(row, site)
[6821]124
125    def addEntry(self, obj, row, site):
126        parent = self.getParent(row, site)
127        parent.addStudent(obj)
128        return
129
130    def delEntry(self, row, site):
[6846]131        student = self.entryExists(row, site)
132        if student:
133            parent = self.getParent(row, site)
134            del parent[student.student_id]
[6821]135        pass
[6825]136
[6849]137    def getMapping(self, path, headerfields, mode):
138        """Get a mapping from CSV file headerfields to actually used fieldnames.
139        """
140        result = dict()
141        reader = csv.reader(open(path, 'rb'))
142        raw_header = reader.next()
143        for num, field in enumerate(headerfields):
[6854]144            if field not in [
145                'student_id', 'reg_number', 'matric_number'] and mode == 'remove':
[6849]146                continue
147            if field == u'--IGNORE--':
148                # Skip ignored columns in failed and finished data files.
149                continue
150            result[raw_header[num]] = field
151        return result
152
153    def checkConversion(self, row, mode='create'):
154        """Validates all values in row.
155        """
156        if mode in ['update', 'remove']:
157            if self.getLocator(row) == 'reg_number':
158                iface = IStudentUpdateByRegNo
159            elif self.getLocator(row) == 'matric_number':
160                iface = IStudentUpdateByMatricNo
161        else:
162            iface = self.iface
163        converter = IObjectConverter(iface)
164        errs, inv_errs, conv_dict =  converter.fromStringDict(
165            row, self.factory_name)
166        return errs, inv_errs, conv_dict
167
[6825]168class StudentStudyCourseProcessor(BatchProcessor):
169    """A batch processor for IStudentStudyCourse objects.
170    """
171    grok.implements(IBatchProcessor)
172    grok.provides(IBatchProcessor)
173    grok.context(Interface)
[6837]174    util_name = 'studycourseupdater'
[6825]175    grok.name(util_name)
176
[6837]177    name = u'StudentStudyCourse Importer (update only)'
[6825]178    iface = IStudentStudyCourseImport
179    factory_name = 'waeup.StudentStudyCourse'
180
[6849]181    location_fields = []
182
[6841]183    mode = None
184
[6825]185    @property
186    def available_fields(self):
187        return sorted(list(set(
[6843]188            ['student_id','reg_number','matric_number'] + getFields(
189                self.iface).keys())))
[6825]190
[6837]191    def checkHeaders(self, headerfields, mode='ignore'):
[6854]192        if not 'reg_number' in headerfields and not 'student_id' \
193            in headerfields and not 'matric_number' in headerfields:
[6825]194            raise FatalCSVError(
[6854]195                "Need at least columns student_id " +
196                "or reg_number or matric_number for import!")
[6834]197        # Check for fields to be ignored...
[6825]198        not_ignored_fields = [x for x in headerfields
199                              if not x.startswith('--')]
200        if len(set(not_ignored_fields)) < len(not_ignored_fields):
201            raise FatalCSVError(
202                "Double headers: each column name may only appear once.")
203        return True
204
205    def parentsExist(self, row, site):
[6846]206        if not 'students' in site.keys():
[6849]207            return None
[6846]208        if 'student_id' in row.keys() and row['student_id']:
[6825]209            if row['student_id'] in site['students']:
210                student = site['students'][row['student_id']]
211                return student
[6843]212        elif 'reg_number' in row.keys() and row['reg_number']:
[6825]213            reg_number = row['reg_number']
[6849]214            #import pdb; pdb.set_trace()
[6825]215            cat = queryUtility(ICatalog, name='students_catalog')
216            results = list(
217                cat.searchResults(reg_number=(reg_number, reg_number)))
218            if results:
219                return results[0]
[6843]220        elif 'matric_number' in row.keys() and row['matric_number']:
221            matric_number = row['matric_number']
222            cat = queryUtility(ICatalog, name='students_catalog')
223            results = list(
224                cat.searchResults(matric_number=(matric_number, matric_number)))
225            if results:
226                return results[0]
[6849]227        return None
[6825]228
229    def entryExists(self, row, site):
230        student = self.parentsExist(row, site)
231        if not student:
[6849]232            return None
[6825]233        if 'studycourse' in student:
234            return student
[6849]235        return None
[6825]236
237    def getEntry(self, row, site):
238        student = self.entryExists(row, site)
239        if not student:
240            return None
241        return student.get('studycourse')
Note: See TracBrowser for help on using the repository browser.