source: main/waeup.sirp/branches/ulif-fasttables/src/waeup/sirp/jambtables/jambtables.py @ 5246

Last change on this file since 5246 was 5246, checked in by uli, 15 years ago

Remove unused add() method and explain better, what we're doing
when filtering JAMB data.

File size: 4.9 KB
Line 
1##
2## jambtables.py
3## Login : <uli@pu.smp.net>
4## Started on  Tue Jun 22 06:31:42 2010 Uli Fouquet
5## $Id$
6##
7## Copyright (C) 2010 Uli Fouquet
8## This program is free software; you can redistribute it and/or modify
9## it under the terms of the GNU General Public License as published by
10## the Free Software Foundation; either version 2 of the License, or
11## (at your option) any later version.
12##
13## This program is distributed in the hope that it will be useful,
14## but WITHOUT ANY WARRANTY; without even the implied warranty of
15## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16## GNU General Public License for more details.
17##
18## You should have received a copy of the GNU General Public License
19## along with this program; if not, write to the Free Software
20## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21##
22import csv
23import os
24import shutil
25import tempfile
26import grok
27
28from datetime import datetime
29from BTrees.OOBTree import OOBTree
30from BTrees.Length import Length
31
32#: The header fields required for a valid JAMB table CSV file.
33JAMB_DATA_HEADERS = [
34    'firstname', 'lastname', 'middlenames', 'screening_type',
35    'screening_venue', 'reg_no', 'sex', 'course1', 'jamb_state',
36    'screening_date', 'jamb_lga', 'fst_sit_fname', 'date_of_birth']
37
38def filter_data(datadict):
39    """Filter items whose key contains 'ignore'
40
41    The function will remove all fields whose name start with
42    ``ignore``. All data will be convertetd to unicode except
43    ``data_of_birth``, which is turned into a `dateteime` object.
44    """
45    keys = datadict.keys()
46    for key in keys:
47        if not 'ignore' in key:
48            continue
49        del datadict[key]
50    for name in JAMB_DATA_HEADERS:
51        datadict[name] = unicode(datadict[name])
52    datadict['date_of_birth'] = datetime.strptime(
53        datadict['date_of_birth'],
54        '%d/%m/%Y'
55        ).date()
56   
57    return datadict
58
59
60class JAMBDataTable(grok.Model):
61    """A data table that contains JAMB data.
62
63    JAMB data tables are plain but fast as they store nearly no data
64    inside the ZODB. All data is held on-disk in CSV tables.
65
66    As a consequence these tables are read-only.
67    """
68
69    import_datetime = None
70   
71    def __init__(self):
72        super(JAMBDataTable, self).__init__()
73        self._datafile_path = None
74        self._data_len = 0
75        self._temporary = False
76        return
77
78    def __del__(self):
79        self.clear()
80
81    def __iter__(self):
82        reader = None
83        if self._datafile_path is not None:
84            reader = csv.DictReader(open(self._datafile_path, 'rb'))
85        if reader is None:
86            raise StopIteration
87        for line in reader:
88            data_dict = filter_data(line)
89            yield data_dict
90
91    def keys(self):
92        """Get iterator over all registration numbers stored in table.
93        """
94        for item in self:
95            yield item['reg_no']
96
97    def items(self):
98        """Get tuples of registration number and datasets for each entry in
99           data table.
100        """
101        for item in self:
102            yield (item['reg_no'], item)
103           
104    def clear(self):
105        """Remove all existing entries.
106        """
107        self.import_datetime = None
108        if self._datafile_path is None:
109            return
110        if self._temporary:
111            if not os.path.exists(self._datafile_path):
112                return
113            shutil.rmtree(os.path.dirname(self._datafile_path))
114        self._datafile_path = None
115           
116    def importFromCSV(self, filepath):
117        """Importing data from a CSV file means to copy the source to a safe
118           location.
119        """
120        self.clear()
121        self.import_datetime = datetime.now()
122        self._copyDataFile(filepath)
123
124    def _copyDataFile(self, path):
125        """Copy file in path to the JAMBData storage.
126
127        See :meth:`_getJAMBTableStorage`.
128        """
129        storage = self._getJAMBTableStorage()
130        self._datafile_path = os.path.join(
131            storage, os.path.basename(path)
132            )
133        shutil.copy2(path, self._datafile_path)
134        return
135
136    def _getJAMBTableStorage(self):
137        """Get a path to store copies of datatables.
138
139        We normally store data in a ``jambdata`` subdir of datacenter,
140        but if none exists, we create a temporary dir and set
141        `temporary` to ``True``.
142
143        Any not existent directory is created.
144
145        Note, that temporary dirs will be deleted when the
146        JAMBDataTable object is destroyed.
147
148        Returns absolute path to the JAMB data storage.
149        """
150        site = grok.getSite()
151        if site is None:
152            jambtable_storage = tempfile.mkdtemp()
153            self._temporary = True
154        else:
155            datacenter = site['datacenter']
156            jambtable_storage = os.path.join(datacenter.storage, 'jambdata')
157        if not os.path.isdir(jambtable_storage):
158            os.mkdir(jambtable_storage)
159        return os.path.abspath(jambtable_storage)
Note: See TracBrowser for help on using the repository browser.