source: main/waeup.kofa/trunk/src/waeup/kofa/applicants/batching.py @ 8765

Last change on this file since 8765 was 8617, checked in by uli, 13 years ago

Reduce number of site lookups during imports.

  • Property svn:keywords set to Id
File size: 13.5 KB
Line 
1## $Id: batching.py 8617 2012-06-03 13:57:01Z uli $
2##
3## Copyright (C) 2011 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""Batch processing for applicants.
19"""
20import csv
21import grok
22from zope.schema import getFields
23from zope.interface import Interface
24from zope.component import queryUtility
25from hurry.workflow.interfaces import IWorkflowState
26from zope.catalog.interfaces import ICatalog
27from waeup.kofa.interfaces import (
28    IBatchProcessor, IObjectConverter, FatalCSVError, IGNORE_MARKER,
29    IObjectHistory, IUserAccount)
30from waeup.kofa.interfaces import MessageFactory as _
31from waeup.kofa.utils.batching import BatchProcessor
32from waeup.kofa.applicants.interfaces import (
33    IApplicantsContainer, IApplicant, IApplicantUpdateByRegNo)
34from waeup.kofa.applicants.workflow import  IMPORTABLE_STATES, CREATED
35
36class ApplicantsContainerProcessor(BatchProcessor):
37    """A processor for applicants containers.
38    """
39    grok.implements(IBatchProcessor)
40    grok.provides(IBatchProcessor)
41    grok.context(Interface)
42    util_name = 'applicants container processor'
43    grok.name(util_name)
44
45    name = u'Applicants Container Processor'
46    mode = u'create'
47    iface = IApplicantsContainer
48
49    location_fields = ['code',]
50    factory_name = 'waeup.ApplicantsContainer'
51
52    def parentsExist(self, row, site):
53        return 'applicants' in site.keys()
54
55    def entryExists(self, row, site):
56        return row['code'] in site['applicants'].keys()
57
58    def getParent(self, row, site):
59        return site['applicants']
60
61    def getEntry(self, row, site):
62        if not self.entryExists(row, site):
63            return None
64        parent = self.getParent(row, site)
65        return parent.get(row['code'])
66
67    def addEntry(self, obj, row, site):
68        parent = self.getParent(row, site)
69        parent[row['code']] = obj
70        return
71
72    def delEntry(self, row, site):
73        parent = self.getParent(row, site)
74        del parent[row['code']]
75        return
76
77class ApplicantProcessor(BatchProcessor):
78    """A batch processor for IApplicant objects.
79
80    In create mode container_code is required. If application_number is given
81    an applicant with this number is created in the designated container.
82    If application_number is not given a random application_number is assigned.
83    applicant_id is being determined by the system and can't be imported.
84
85    In update or remove mode container_code and application_number columns
86    must not exist. The applicant object is solely searched by its applicant_id
87    or reg_number.
88    """
89    grok.implements(IBatchProcessor)
90    grok.provides(IBatchProcessor)
91    grok.context(Interface)
92    util_name = 'applicantprocessor'
93    grok.name(util_name)
94    name = u'Applicant Processor'
95    iface = IApplicant
96    iface_byregnumber = IApplicantUpdateByRegNo
97    location_fields = ['']
98    factory_name = 'waeup.Applicant'
99
100    mode = None
101
102    @property
103    def available_fields(self):
104        return sorted(list(set(
105            ['application_number',
106            'container_code','state','password'] + getFields(
107                self.iface).keys())))
108
109    def checkHeaders(self, headerfields, mode='create'):
110        cond1 = 'container_code' in headerfields
111        cond2 = 'application_number' in headerfields
112        cond3 = 'applicant_id' in headerfields
113        cond4 = 'reg_number' in headerfields
114        if mode == 'create':
115            if not cond1:
116                raise FatalCSVError(
117                    "Need at least container_code column!")
118            if cond3:
119                raise FatalCSVError(
120                    "applicant_id can't be imported in create mode!")
121            for field in self.required_fields:
122                if not field in headerfields:
123                    raise FatalCSVError(
124                        "Need at least columns %s for import!" %
125                        ', '.join(["'%s'" % x for x in self.required_fields]))
126        if mode in ('update', 'remove'):
127            if not cond3 and not cond4:
128                raise FatalCSVError(
129                    "Need at least column reg_number or applicant_id!")
130            if cond1 or cond2:
131                raise FatalCSVError(
132                    "container_code or application_number can't be imported " +
133                    "in update or remove mode!")
134        # Check for fields to be ignored...
135        not_ignored_fields = [x for x in headerfields
136                              if not x.startswith('--')]
137        if len(set(not_ignored_fields)) < len(not_ignored_fields):
138            raise FatalCSVError(
139                "Double headers: each column name may only appear once.")
140        return True
141
142    def getLocator(self, row):
143        if row.get('container_code', None) not in (IGNORE_MARKER, None):
144            # create, update or remove
145            return 'container_code'
146        elif row.get('applicant_id', None) not in (IGNORE_MARKER, None):
147            # update or remove
148            return 'applicant_id'
149        elif row.get('reg_number', None) not in (IGNORE_MARKER, None):
150            # update or remove
151            return 'reg_number'
152        else:
153            return None
154
155    def getParent(self, row, site):
156        result = None
157        if self.getLocator(row) == 'container_code':
158            result = site['applicants'].get(row['container_code'], None)
159        elif self.getLocator(row) == 'reg_number':
160            reg_number = row['reg_number']
161            cat = queryUtility(ICatalog, name='applicants_catalog')
162            results = list(
163                cat.searchResults(reg_number=(reg_number, reg_number)))
164            if results:
165                result = results[0].__parent__
166        elif self.getLocator(row) == 'applicant_id':
167            applicant_id = row['applicant_id']
168            cat = queryUtility(ICatalog, name='applicants_catalog')
169            results = list(
170                cat.searchResults(applicant_id=(applicant_id, applicant_id)))
171            if results:
172                result = results[0].__parent__
173        return result
174
175    def parentsExist(self, row, site):
176        return self.getParent(row, site) is not None
177
178    def getEntry(self, row, site):
179        if self.getLocator(row) == 'container_code':
180            if row.get('application_number', None) not in (IGNORE_MARKER, None):
181                if not self.parentsExist(row, site):
182                    return None
183                parent = self.getParent(row, site)
184                return parent.get(row['application_number'])
185            return None
186        if self.getLocator(row) == 'applicant_id':
187            applicant_id = row['applicant_id']
188            cat = queryUtility(ICatalog, name='applicants_catalog')
189            results = list(
190                cat.searchResults(applicant_id=(applicant_id, applicant_id)))
191            if results:
192                return results[0]
193        if self.getLocator(row) == 'reg_number':
194            reg_number = row['reg_number']
195            cat = queryUtility(ICatalog, name='applicants_catalog')
196            results = list(
197                cat.searchResults(reg_number=(reg_number, reg_number)))
198            if results:
199                return results[0]
200        return None
201
202    def entryExists(self, row, site):
203        return self.getEntry(row, site) is not None
204
205    def addEntry(self, obj, row, site):
206        parent = self.getParent(row, site)
207        parent.addApplicant(obj)
208        #parent.__parent__.logger.info(
209        #    'Applicant imported: %s' % obj.applicant_id)
210        history = IObjectHistory(obj)
211        history.addMessage(_('Application record imported'))
212        return
213
214    def delEntry(self, row, site):
215        applicant = self.getEntry(row, site)
216        if applicant is not None:
217            parent = applicant.__parent__
218            del parent[applicant.application_number]
219            #parent.__parent__.logger.info(
220            #    'Applicant removed: %s' % applicant.applicant_id)
221        pass
222
223    def updateEntry(self, obj, row, site):
224        """Update obj to the values given in row.
225        """
226        items_changed = ''
227        # Remove application_number from row if empty
228        if row.has_key('application_number') and row['application_number'] in (
229            None, IGNORE_MARKER):
230            row.pop('application_number')
231
232        # Update applicant_id fom application_number and container code
233        # if application_number is given
234        if row.has_key('application_number'):
235            obj.applicant_id = u'%s_%s' % (
236                row['container_code'], row['application_number'])
237            items_changed += ('%s=%s, ' % ('applicant_id', obj.applicant_id))
238            row.pop('application_number')
239
240        # Update password
241        if row.has_key('password'):
242            passwd = row.get('password', IGNORE_MARKER)
243            if passwd not in ('', IGNORE_MARKER):
244                if passwd.startswith('{SSHA}'):
245                    # already encrypted password
246                    obj.password = passwd
247                else:
248                    # not yet encrypted password
249                    IUserAccount(obj).setPassword(passwd)
250                items_changed += ('%s=%s, ' % ('password', passwd))
251            row.pop('password')
252
253        # Update registration state
254        if row.has_key('state'):
255            state = row.get('state', IGNORE_MARKER)
256            if state not in (IGNORE_MARKER, ''):
257                IWorkflowState(obj).setState(state)
258                msg = _("State '${a}' set", mapping = {'a':state})
259                history = IObjectHistory(obj)
260                history.addMessage(msg)
261                items_changed += ('%s=%s, ' % ('state', state))
262            row.pop('state')
263
264        # apply other values...
265        items_changed += super(ApplicantProcessor, self).updateEntry(
266            obj, row, site)
267
268        # Log actions...
269        parent = self.getParent(row, site)
270        if self.getLocator(row) == 'container_code':
271            # Update mode: the applicant exists and we can get the applicant_id
272            parent.__parent__.logger.info(
273                'Applicant imported: %s' % items_changed)
274        else:
275            # Create mode: the applicant does not yet exist
276            parent.__parent__.logger.info(
277                'Applicant updated: %s' % items_changed)
278        return items_changed
279
280    def getMapping(self, path, headerfields, mode):
281        """Get a mapping from CSV file headerfields to actually used fieldnames.
282        """
283        result = dict()
284        reader = csv.reader(open(path, 'rb'))
285        raw_header = reader.next()
286        for num, field in enumerate(headerfields):
287            if field not in ['applicant_id', 'reg_number'] and mode == 'remove':
288                continue
289            if field == u'--IGNORE--':
290                # Skip ignored columns in failed and finished data files.
291                continue
292            result[raw_header[num]] = field
293        return result
294
295    def checkConversion(self, row, mode='create'):
296        """Validates all values in row.
297        """
298        iface = self.iface
299        if self.getLocator(row) == 'reg_number' or mode == 'remove':
300            iface = self.iface_byregnumber
301        converter = IObjectConverter(iface)
302        errs, inv_errs, conv_dict =  converter.fromStringDict(
303            row, self.factory_name, mode=mode)
304        cert = conv_dict.get('course1', None)
305        if cert is not None and (mode in ('create', 'update')):
306            # course1 application category must match container's.
307            parent = self.getParent(row, self.site)
308            if cert.application_category != parent.application_category:
309                errs.append(('course1', 'wrong application category'))
310        if row.has_key('state') and \
311            not row['state'] in IMPORTABLE_STATES:
312            if row['state'] not in (IGNORE_MARKER, ''):
313                errs.append(('state','not allowed'))
314            else:
315                # state is an attribute of Applicant and must not
316                # be changed if empty
317                conv_dict['state'] = IGNORE_MARKER
318        application_number = row.get('application_number', None)
319        if application_number in (IGNORE_MARKER, ''):
320                conv_dict['application_number'] = IGNORE_MARKER
321        return errs, inv_errs, conv_dict
322
323    def checkUpdateRequirements(self, obj, row, site):
324        """Checks requirements the object must fulfill when being updated.
325
326        This method is not used in case of deleting or adding objects.
327
328        Returns error messages as strings in case of requirement
329        problems.
330        """
331        if obj.state == CREATED:
332            return 'Applicant is blocked.'
333        return None
334
335    def doImport(self, *args, **kw):
336        # XXX: Not thread-safe.  Parallel applicant imports into
337        # different sites could mean a mess.  Luckily this is not a
338        # typical use-case. On the other hand it spares thousands of
339        # site lookups during large imports.
340        # XXX: Maybe this should go into Importer base.
341        self.site = grok.getSite() # needed by checkConversion()
342        return super(ApplicantProcessor, self).doImport(*args, **kw)
Note: See TracBrowser for help on using the repository browser.