## ## jambtables.py ## Login : ## Started on Tue Jun 22 06:31:42 2010 Uli Fouquet ## $Id$ ## ## Copyright (C) 2010 Uli Fouquet ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ## """WAeUP SIRP support for JAMB tables. JAMB tables are datasets delivered by JAMB. """ import csv import os import shutil import tempfile import grok from datetime import datetime from BTrees.OOBTree import OOBTree from BTrees.Length import Length from waeup.sirp.jambtables.interfaces import IJAMBDataTable #: The header fields required for a valid JAMB table CSV file. JAMB_DATA_HEADERS = [ 'firstname', 'lastname', 'middlenames', 'screening_type', 'screening_venue', 'reg_no', 'sex', 'course1', 'jamb_state', 'screening_date', 'jamb_lga', 'fst_sit_fname', 'date_of_birth'] def filter_data(datadict): """Filter items whose key contains 'ignore' The function will remove all fields whose name start with ``ignore``. All data will be convertetd to unicode except ``data_of_birth``, which is turned into a `dateteime` object. """ keys = datadict.keys() for key in keys: if not 'ignore' in key: continue del datadict[key] for name in JAMB_DATA_HEADERS: datadict[name] = unicode(datadict[name]) datadict['date_of_birth'] = datetime.strptime( datadict['date_of_birth'], '%d/%m/%Y' ).date() return datadict class JAMBDataTable(grok.Model): """A data table that contains JAMB data. JAMB data tables are plain but fast as they store nearly no data inside the ZODB. All data is held on-disk in CSV tables. As a consequence these tables are read-only. """ #: Implement :class:`IJAMBDataTable` grok.implements(IJAMBDataTable) #: The datetime when data was imported. import_datetime = None #: The username of the person that initiated the last import. importer_username = None def __init__(self): super(JAMBDataTable, self).__init__() self._datafile_path = None self._data_len = 0 self._temporary = False return def __del__(self): self.clear() def __iter__(self): reader = None if self._datafile_path is not None: reader = csv.DictReader(open(self._datafile_path, 'rb')) if reader is None: raise StopIteration for line in reader: data_dict = filter_data(line) yield data_dict def keys(self): """Get iterator over all registration numbers stored in table. """ for item in self: yield item['reg_no'] def items(self): """Get tuples of registration number and datasets for each entry in data table. """ for item in self: yield (item['reg_no'], item) def clear(self): """Remove all existing entries. Unsets also the :attr:`import_datetime` and :attr:`importer_username` attributes. """ self.import_datetime = None self.importer_username = None if self._datafile_path is None: return if self._temporary: if not os.path.exists(self._datafile_path): return shutil.rmtree(os.path.dirname(self._datafile_path)) self._datafile_path = None def importFromCSV(self, filepath, username=None): """Importing data from a CSV file means to copy the source to a safe location. If the username is set, it will be stored as well in :attr:`importer_username`. """ self.clear() self.import_datetime = datetime.now() self.importer_username = None if username is not None: self.importer_username = unicode(username) self._copyDataFile(filepath) def _copyDataFile(self, path): """Copy file in path to the JAMBData storage. See :meth:`_getJAMBTableStorage`. """ storage = self._getJAMBTableStorage() self._datafile_path = os.path.join( storage, os.path.basename(path) ) shutil.copy2(path, self._datafile_path) return def _getJAMBTableStorage(self): """Get a path to store copies of datatables. We normally store data in a ``jambdata`` subdir of datacenter, but if none exists, we create a temporary dir and set `temporary` to ``True``. Any not existent directory is created. Note, that temporary dirs will be deleted when the JAMBDataTable object is destroyed. Returns absolute path to the JAMB data storage. """ site = grok.getSite() if site is None: jambtable_storage = tempfile.mkdtemp() self._temporary = True else: datacenter = site['datacenter'] jambtable_storage = os.path.join(datacenter.storage, 'jambdata') if not os.path.isdir(jambtable_storage): os.mkdir(jambtable_storage) return os.path.abspath(jambtable_storage)