## ## jambtables.py ## Login : ## Started on Tue Jun 22 06:31:42 2010 Uli Fouquet ## $Id$ ## ## Copyright (C) 2010 Uli Fouquet ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ## import csv import os import shutil import tempfile import grok from datetime import datetime from BTrees.OOBTree import OOBTree from BTrees.Length import Length from waeup.sirp.jambtables.interfaces import IJAMBDataTable #: The header fields required for a valid JAMB table CSV file. JAMB_DATA_HEADERS = [ 'firstname', 'lastname', 'middlenames', 'screening_type', 'screening_venue', 'reg_no', 'sex', 'course1', 'jamb_state', 'screening_date', 'jamb_lga', 'fst_sit_fname', 'date_of_birth'] def filter_data(datadict): """Filter items whose key contains 'ignore' The function will remove all fields whose name start with ``ignore``. All data will be convertetd to unicode except ``data_of_birth``, which is turned into a `dateteime` object. """ keys = datadict.keys() for key in keys: if not 'ignore' in key: continue del datadict[key] for name in JAMB_DATA_HEADERS: datadict[name] = unicode(datadict[name]) datadict['date_of_birth'] = datetime.strptime( datadict['date_of_birth'], '%d/%m/%Y' ).date() return datadict class JAMBDataTable(grok.Model): """A data table that contains JAMB data. JAMB data tables are plain but fast as they store nearly no data inside the ZODB. All data is held on-disk in CSV tables. As a consequence these tables are read-only. """ grok.implements(IJAMBDataTable) #: The datetime when data was imported. import_datetime = None #: The username of the person that initiated the last import. importer_username = None def __init__(self): super(JAMBDataTable, self).__init__() self._datafile_path = None self._data_len = 0 self._temporary = False return def __del__(self): self.clear() def __iter__(self): reader = None if self._datafile_path is not None: reader = csv.DictReader(open(self._datafile_path, 'rb')) if reader is None: raise StopIteration for line in reader: data_dict = filter_data(line) yield data_dict def keys(self): """Get iterator over all registration numbers stored in table. """ for item in self: yield item['reg_no'] def items(self): """Get tuples of registration number and datasets for each entry in data table. """ for item in self: yield (item['reg_no'], item) def clear(self): """Remove all existing entries. Unsets also the :attr:`import_datetime` and :attr:`importer_username` attributes. """ self.import_datetime = None self.importer_username = None if self._datafile_path is None: return if self._temporary: if not os.path.exists(self._datafile_path): return shutil.rmtree(os.path.dirname(self._datafile_path)) self._datafile_path = None def importFromCSV(self, filepath, username=None): """Importing data from a CSV file means to copy the source to a safe location. If the username is set, it will be stored as well in :attr:`importer_username`. """ self.clear() self.import_datetime = datetime.now() self.importer_username = None if username is not None: self.importer_username = unicode(username) self._copyDataFile(filepath) def _copyDataFile(self, path): """Copy file in path to the JAMBData storage. See :meth:`_getJAMBTableStorage`. """ storage = self._getJAMBTableStorage() self._datafile_path = os.path.join( storage, os.path.basename(path) ) shutil.copy2(path, self._datafile_path) return def _getJAMBTableStorage(self): """Get a path to store copies of datatables. We normally store data in a ``jambdata`` subdir of datacenter, but if none exists, we create a temporary dir and set `temporary` to ``True``. Any not existent directory is created. Note, that temporary dirs will be deleted when the JAMBDataTable object is destroyed. Returns absolute path to the JAMB data storage. """ site = grok.getSite() if site is None: jambtable_storage = tempfile.mkdtemp() self._temporary = True else: datacenter = site['datacenter'] jambtable_storage = os.path.join(datacenter.storage, 'jambdata') if not os.path.isdir(jambtable_storage): os.mkdir(jambtable_storage) return os.path.abspath(jambtable_storage)