source: main/waeup.sirp/trunk/src/waeup/sirp/jambtables/jambtables.py @ 5400

Last change on this file since 5400 was 5328, checked in by uli, 14 years ago

Merge changes from ulif-fasttables back into trunk.

File size: 5.7 KB
Line 
1##
2## jambtables.py
3## Login : <uli@pu.smp.net>
4## Started on  Tue Jun 22 06:31:42 2010 Uli Fouquet
5## $Id$
6##
7## Copyright (C) 2010 Uli Fouquet
8## This program is free software; you can redistribute it and/or modify
9## it under the terms of the GNU General Public License as published by
10## the Free Software Foundation; either version 2 of the License, or
11## (at your option) any later version.
12##
13## This program is distributed in the hope that it will be useful,
14## but WITHOUT ANY WARRANTY; without even the implied warranty of
15## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16## GNU General Public License for more details.
17##
18## You should have received a copy of the GNU General Public License
19## along with this program; if not, write to the Free Software
20## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21##
22"""WAeUP SIRP support for JAMB tables.
23
24JAMB tables are datasets delivered by JAMB.
25"""
26import csv
27import os
28import shutil
29import tempfile
30import grok
31
32from datetime import datetime
33from BTrees.OOBTree import OOBTree
34from BTrees.Length import Length
35
36from waeup.sirp.jambtables.interfaces import IJAMBDataTable
37
38#: The header fields required for a valid JAMB table CSV file.
39JAMB_DATA_HEADERS = [
40    'firstname', 'lastname', 'middlenames', 'screening_type',
41    'screening_venue', 'reg_no', 'sex', 'course1', 'jamb_state',
42    'screening_date', 'jamb_lga', 'fst_sit_fname', 'date_of_birth']
43
44def filter_data(datadict):
45    """Filter items whose key contains 'ignore'
46
47    The function will remove all fields whose name start with
48    ``ignore``. All data will be convertetd to unicode except
49    ``data_of_birth``, which is turned into a `dateteime` object.
50    """
51    keys = datadict.keys()
52    for key in keys:
53        if not 'ignore' in key:
54            continue
55        del datadict[key]
56    for name in JAMB_DATA_HEADERS:
57        datadict[name] = unicode(datadict[name])
58    datadict['date_of_birth'] = datetime.strptime(
59        datadict['date_of_birth'],
60        '%d/%m/%Y'
61        ).date()
62   
63    return datadict
64
65
66class JAMBDataTable(grok.Model):
67    """A data table that contains JAMB data.
68
69    JAMB data tables are plain but fast as they store nearly no data
70    inside the ZODB. All data is held on-disk in CSV tables.
71
72    As a consequence these tables are read-only.
73    """
74    #: Implement :class:`IJAMBDataTable`
75    grok.implements(IJAMBDataTable)
76
77    #: The datetime when data was imported.
78    import_datetime = None
79
80    #: The username of the person that initiated the last import.
81    importer_username = None
82   
83    def __init__(self):
84        super(JAMBDataTable, self).__init__()
85        self._datafile_path = None
86        self._data_len = 0
87        self._temporary = False
88        return
89
90    def __del__(self):
91        self.clear()
92
93    def __iter__(self):
94        reader = None
95        if self._datafile_path is not None:
96            reader = csv.DictReader(open(self._datafile_path, 'rb'))
97        if reader is None:
98            raise StopIteration
99        for line in reader:
100            data_dict = filter_data(line)
101            yield data_dict
102
103    def keys(self):
104        """Get iterator over all registration numbers stored in table.
105        """
106        for item in self:
107            yield item['reg_no']
108
109    def items(self):
110        """Get tuples of registration number and datasets for each entry in
111           data table.
112        """
113        for item in self:
114            yield (item['reg_no'], item)
115           
116    def clear(self):
117        """Remove all existing entries.
118
119        Unsets also the :attr:`import_datetime` and
120        :attr:`importer_username` attributes.
121        """
122        self.import_datetime = None
123        self.importer_username = None
124        if self._datafile_path is None:
125            return
126        if self._temporary:
127            if not os.path.exists(self._datafile_path):
128                return
129            shutil.rmtree(os.path.dirname(self._datafile_path))
130        self._datafile_path = None
131           
132    def importFromCSV(self, filepath, username=None):
133        """Importing data from a CSV file means to copy the source to a safe
134           location.
135
136           If the username is set, it will be stored as well in
137           :attr:`importer_username`.
138        """
139        self.clear()
140        self.import_datetime = datetime.now()
141        self.importer_username = None
142        if username is not None:
143            self.importer_username = unicode(username)
144        self._copyDataFile(filepath)
145
146    def _copyDataFile(self, path):
147        """Copy file in path to the JAMBData storage.
148
149        See :meth:`_getJAMBTableStorage`.
150        """
151        storage = self._getJAMBTableStorage()
152        self._datafile_path = os.path.join(
153            storage, os.path.basename(path)
154            )
155        shutil.copy2(path, self._datafile_path)
156        return
157
158    def _getJAMBTableStorage(self):
159        """Get a path to store copies of datatables.
160
161        We normally store data in a ``jambdata`` subdir of datacenter,
162        but if none exists, we create a temporary dir and set
163        `temporary` to ``True``.
164
165        Any not existent directory is created.
166
167        Note, that temporary dirs will be deleted when the
168        JAMBDataTable object is destroyed.
169
170        Returns absolute path to the JAMB data storage.
171        """
172        site = grok.getSite()
173        if site is None:
174            jambtable_storage = tempfile.mkdtemp()
175            self._temporary = True
176        else:
177            datacenter = site['datacenter']
178            jambtable_storage = os.path.join(datacenter.storage, 'jambdata')
179        if not os.path.isdir(jambtable_storage):
180            os.mkdir(jambtable_storage)
181        return os.path.abspath(jambtable_storage)
Note: See TracBrowser for help on using the repository browser.