source: main/waeup.sirp/branches/ulif-fasttables/src/waeup/sirp/jambtables/jambtables.py @ 5248

Last change on this file since 5248 was 5248, checked in by uli, 14 years ago

Mark JAMB data tables with appropriate interfaces.

File size: 5.0 KB
Line 
1##
2## jambtables.py
3## Login : <uli@pu.smp.net>
4## Started on  Tue Jun 22 06:31:42 2010 Uli Fouquet
5## $Id$
6##
7## Copyright (C) 2010 Uli Fouquet
8## This program is free software; you can redistribute it and/or modify
9## it under the terms of the GNU General Public License as published by
10## the Free Software Foundation; either version 2 of the License, or
11## (at your option) any later version.
12##
13## This program is distributed in the hope that it will be useful,
14## but WITHOUT ANY WARRANTY; without even the implied warranty of
15## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16## GNU General Public License for more details.
17##
18## You should have received a copy of the GNU General Public License
19## along with this program; if not, write to the Free Software
20## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21##
22import csv
23import os
24import shutil
25import tempfile
26import grok
27
28from datetime import datetime
29from BTrees.OOBTree import OOBTree
30from BTrees.Length import Length
31
32from waeup.sirp.jambtables.interfaces import IJAMBDataTable
33
34#: The header fields required for a valid JAMB table CSV file.
35JAMB_DATA_HEADERS = [
36    'firstname', 'lastname', 'middlenames', 'screening_type',
37    'screening_venue', 'reg_no', 'sex', 'course1', 'jamb_state',
38    'screening_date', 'jamb_lga', 'fst_sit_fname', 'date_of_birth']
39
40def filter_data(datadict):
41    """Filter items whose key contains 'ignore'
42
43    The function will remove all fields whose name start with
44    ``ignore``. All data will be convertetd to unicode except
45    ``data_of_birth``, which is turned into a `dateteime` object.
46    """
47    keys = datadict.keys()
48    for key in keys:
49        if not 'ignore' in key:
50            continue
51        del datadict[key]
52    for name in JAMB_DATA_HEADERS:
53        datadict[name] = unicode(datadict[name])
54    datadict['date_of_birth'] = datetime.strptime(
55        datadict['date_of_birth'],
56        '%d/%m/%Y'
57        ).date()
58   
59    return datadict
60
61
62class JAMBDataTable(grok.Model):
63    """A data table that contains JAMB data.
64
65    JAMB data tables are plain but fast as they store nearly no data
66    inside the ZODB. All data is held on-disk in CSV tables.
67
68    As a consequence these tables are read-only.
69    """
70    grok.implements(IJAMBDataTable)
71
72    import_datetime = None
73   
74    def __init__(self):
75        super(JAMBDataTable, self).__init__()
76        self._datafile_path = None
77        self._data_len = 0
78        self._temporary = False
79        return
80
81    def __del__(self):
82        self.clear()
83
84    def __iter__(self):
85        reader = None
86        if self._datafile_path is not None:
87            reader = csv.DictReader(open(self._datafile_path, 'rb'))
88        if reader is None:
89            raise StopIteration
90        for line in reader:
91            data_dict = filter_data(line)
92            yield data_dict
93
94    def keys(self):
95        """Get iterator over all registration numbers stored in table.
96        """
97        for item in self:
98            yield item['reg_no']
99
100    def items(self):
101        """Get tuples of registration number and datasets for each entry in
102           data table.
103        """
104        for item in self:
105            yield (item['reg_no'], item)
106           
107    def clear(self):
108        """Remove all existing entries.
109        """
110        self.import_datetime = None
111        if self._datafile_path is None:
112            return
113        if self._temporary:
114            if not os.path.exists(self._datafile_path):
115                return
116            shutil.rmtree(os.path.dirname(self._datafile_path))
117        self._datafile_path = None
118           
119    def importFromCSV(self, filepath):
120        """Importing data from a CSV file means to copy the source to a safe
121           location.
122        """
123        self.clear()
124        self.import_datetime = datetime.now()
125        self._copyDataFile(filepath)
126
127    def _copyDataFile(self, path):
128        """Copy file in path to the JAMBData storage.
129
130        See :meth:`_getJAMBTableStorage`.
131        """
132        storage = self._getJAMBTableStorage()
133        self._datafile_path = os.path.join(
134            storage, os.path.basename(path)
135            )
136        shutil.copy2(path, self._datafile_path)
137        return
138
139    def _getJAMBTableStorage(self):
140        """Get a path to store copies of datatables.
141
142        We normally store data in a ``jambdata`` subdir of datacenter,
143        but if none exists, we create a temporary dir and set
144        `temporary` to ``True``.
145
146        Any not existent directory is created.
147
148        Note, that temporary dirs will be deleted when the
149        JAMBDataTable object is destroyed.
150
151        Returns absolute path to the JAMB data storage.
152        """
153        site = grok.getSite()
154        if site is None:
155            jambtable_storage = tempfile.mkdtemp()
156            self._temporary = True
157        else:
158            datacenter = site['datacenter']
159            jambtable_storage = os.path.join(datacenter.storage, 'jambdata')
160        if not os.path.isdir(jambtable_storage):
161            os.mkdir(jambtable_storage)
162        return os.path.abspath(jambtable_storage)
Note: See TracBrowser for help on using the repository browser.