Ignore:
Timestamp:
15 Jan 2010, 23:35:40 (15 years ago)
Author:
uli
Message:
  • Catch exceptions when converter fails.
  • Output log of operation.
  • Add converter for courses.
  • Clean up.
File:
1 edited

Legend:

Unmodified
Added
Removed
  • waeup/branches/ulif-importers/src/waeup/utils/batching.py

    r4811 r4821  
    66import grok
    77import csv
     8import os
     9import sys
     10import time
    811from zope.component import createObject
    912from zope.interface import Interface
    1013from zope.schema import getFields
    1114from zope.schema.interfaces import IText
    12 from waeup.interfaces import (IImporter, IFacultyContainer, IFaculty,
     15from waeup.interfaces import (IImporter, IFacultyContainer, IFaculty, ICourse,
    1316                              IDepartment, ISchemaTypeConverter, FatalCSVError)
    1417
     
    118121        if '--IGNORE--' in row.keys():
    119122            del row['--IGNORE--']
     123        warnings = []
    120124        for key, value in row.items():
    121125            converter = converter_dict.get(key, None)
    122126            if converter:
    123                 row.update({key:converter_dict[key].fromString(value)})
    124         return row
     127                try:
     128                    row.update({key:converter_dict[key].fromString(value)})
     129                except:
     130                    msg = "conversion error: field %s: %r" % (
     131                        key, sys.exc_info()[1])
     132                    warnings.append(msg)
     133        return (row, warnings)
    125134
    126135    def callFactory(self, *args, **kw):
     
    156165        """
    157166        raise NotImplementedError('method not implemented')
    158    
    159     def doImport(self, path, headerfields, mode='create'):
     167
     168    def createLogfile(self, path, fail_path, num, warnings, mode, user):
     169        """Write a log file.
     170        """
     171        failmsg_path = "%s.%s.msg" % (path, mode)
     172        fd = open(failmsg_path, 'wb')
     173        fd.write("Source: %s\n" % path)
     174        fd.write("Mode: %s\n" % mode)
     175        fd.write("Date: %s\n" % time.ctime())
     176        fd.write("User: %s\n" % user)
     177        fd.write("Failed datasets: %s\n" % fail_path)
     178        fd.write("Processed: %s lines (%s successful/ %s failed)\n" % (
     179                num, num - len(warnings), len(warnings)
     180                ))
     181        fd.write("-" * 70 + '\n')
     182        failnum = 1
     183        for linenum, text in sorted(warnings.items()):
     184            fd.write("line %s (%s): %s\n" % (failnum, linenum, text))
     185            failnum += 1
     186        fd.close()
     187        return
     188   
     189    def doImport(self, path, headerfields, mode='create', user='Unknown'):
    160190        """Perform actual import.
    161191        """
     
    164194        converters = self.getFieldConverters(headerfields)
    165195        reader = csv.DictReader(open(path, 'rb'))
     196       
     197        failed_path = "%s.%s.pending" % (path, mode, )
     198        failed_headers = [x for x in mapping.values()
     199               if x != '--IGNORE--']
     200        failed_writer = csv.DictWriter(open(failed_path, 'wb'),
     201                                       failed_headers)
     202        failed_writer.writerow(dict([(x,x) for x in failed_headers]))
     203       
    166204        warnings = dict()
    167205        num =0
     
    170208            num += 1
    171209            string_row = self.applyMapping(raw_row, mapping)
    172             row = self.convertToTypes(string_row, converters)
     210            row, conv_warnings = self.convertToTypes(string_row, converters)
     211            if len(conv_warnings):
     212                warnings[num] = ', '.join(conv_warnings)
     213                failed_writer.writerow(row)
     214                continue
     215               
    173216            if mode == 'create':
    174217                if not self.parentsExist(row, site):
    175218                    warnings[num] = "Not all parents do exist yet. Skipping"
     219                    failed_writer.writerow(row)
    176220                    continue
    177221                if self.entryExists(row, site):
    178222                    warnings[num] = 'This object already exists. Skipping.'
     223                    failed_writer.writerow(row)
    179224                    continue
    180225                obj = self.callFactory()
     
    185230                if not self.entryExists(row, site):
    186231                    warnings[num] = "Cannot remove: no such entry."
     232                    failed_writer.writerow(row)
    187233                    continue
    188234                self.delEntry(row, site)
     
    191237                if obj is None:
    192238                    warnings[num] = "Cannot update: no such entry."
     239                    failed_writer.writerow(row)
    193240                    continue
    194241                self.updateEntry(obj, row, site)
    195                
    196         #for linenum, text in warnings.items():
    197         #    print "Line %s: %s" % (linenum, text)
     242
     243        if path.endswith('.pending'):
     244            os.unlink(path)
     245            os.rename(failed_path, path)
     246            failed_path = path
     247        self.createLogfile(path, failed_path, num, warnings, mode, user)
     248        if len(warnings) == 0:
     249            del failed_writer
     250            os.unlink(failed_path)
    198251        return (num, warnings)
    199252
     
    228281        parent = self.getParent(row, site)
    229282        parent.addFaculty(obj)
    230         print "ADD: ", obj
    231283        return
    232284
     
    234286        parent = self.getParent(row, site)
    235287        del parent[row['code']]
    236         print "REMOVE: ", row['code']
    237288        pass
    238289
     
    240291        for key, value in row.items():
    241292            setattr(obj, key, value)
    242         print "UPDATE: ", obj
    243293        pass
    244294
     
    278328        parent = self.getParent(row, site)
    279329        parent.addDepartment(obj)
    280         print "ADD: ", obj
    281330        return
    282331
     
    284333        parent = self.getParent(row, site)
    285334        del parent[row['code']]
    286         print "REMOVE: ", row['code']
    287         pass
     335        return
    288336
    289337    def updateEntry(self, obj, row, site):
    290338        for key, value in row.items():
    291339            setattr(obj, key, value)
    292         print "UPDATE: ", obj
    293         pass
     340        return
     341
     342class CourseImporter(BatchProcessor):
     343    grok.provides(IImporter)
     344    grok.context(Interface)
     345    util_name = 'courseimporter'
     346    grok.name(util_name)
     347
     348    name = u'Course importer'
     349    iface = ICourse
     350
     351    location_fields = ['code', 'faculty_code', 'department_code']
     352    factory_name = 'waeup.Course'
     353
     354    def parentsExist(self, row, site):
     355        if not 'faculties' in site.keys():
     356            return False
     357        if not row['faculty_code'] in site['faculties'].keys():
     358            return False
     359        faculty = site['faculties'][row['faculty_code']]
     360        return row['department_code'] in faculty.keys()
     361
     362    def entryExists(self, row, site):
     363        if not self.parentsExist(row, site):
     364            return False
     365        parent = self.getParent(row, site)
     366        return row['code'] in parent.keys()
     367
     368    def getParent(self, row, site):
     369        dept = site['faculties'][row['faculty_code']][row['department_code']]
     370        return dept.courses
     371
     372    def getEntry(self, row, site):
     373        if not self.entryExists(row, site):
     374            return None
     375        parent = self.getParent(row, site)
     376        return parent.get(row['code'])
     377   
     378    def addEntry(self, obj, row, site):
     379        parent = self.getParent(row, site)
     380        parent.addCourse(obj)
     381        return
     382
     383    def delEntry(self, row, site):
     384        parent = self.getParent(row, site)
     385        del parent[row['code']]
     386        return
     387
     388    def updateEntry(self, obj, row, site):
     389        for key, value in row.items():
     390            setattr(obj, key, value)
     391        return
     392
Note: See TracChangeset for help on using the changeset viewer.