1 | ## |
---|
2 | ## jambtables.py |
---|
3 | ## Login : <uli@pu.smp.net> |
---|
4 | ## Started on Tue Jun 22 06:31:42 2010 Uli Fouquet |
---|
5 | ## $Id$ |
---|
6 | ## |
---|
7 | ## Copyright (C) 2010 Uli Fouquet |
---|
8 | ## This program is free software; you can redistribute it and/or modify |
---|
9 | ## it under the terms of the GNU General Public License as published by |
---|
10 | ## the Free Software Foundation; either version 2 of the License, or |
---|
11 | ## (at your option) any later version. |
---|
12 | ## |
---|
13 | ## This program is distributed in the hope that it will be useful, |
---|
14 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
15 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
16 | ## GNU General Public License for more details. |
---|
17 | ## |
---|
18 | ## You should have received a copy of the GNU General Public License |
---|
19 | ## along with this program; if not, write to the Free Software |
---|
20 | ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
---|
21 | ## |
---|
22 | import csv |
---|
23 | import os |
---|
24 | import shutil |
---|
25 | import tempfile |
---|
26 | import grok |
---|
27 | |
---|
28 | from datetime import datetime |
---|
29 | from BTrees.OOBTree import OOBTree |
---|
30 | from BTrees.Length import Length |
---|
31 | |
---|
32 | #: The header fields required for a valid JAMB table CSV file. |
---|
33 | JAMB_DATA_HEADERS = [ |
---|
34 | 'firstname', 'lastname', 'middlenames', 'screening_type', |
---|
35 | 'screening_venue', 'reg_no', 'sex', 'course1', 'jamb_state', |
---|
36 | 'screening_date', 'jamb_lga', 'fst_sit_fname', 'date_of_birth'] |
---|
37 | |
---|
38 | def filter_data(datadict): |
---|
39 | """Filter items whose key contains 'ignore' |
---|
40 | """ |
---|
41 | keys = datadict.keys() |
---|
42 | for key in keys: |
---|
43 | if not 'ignore' in key: |
---|
44 | continue |
---|
45 | del datadict[key] |
---|
46 | for name in JAMB_DATA_HEADERS: |
---|
47 | datadict[name] = unicode(datadict[name]) |
---|
48 | datadict['date_of_birth'] = datetime.strptime( |
---|
49 | datadict['date_of_birth'], |
---|
50 | '%d/%m/%Y' |
---|
51 | ).date() |
---|
52 | |
---|
53 | return datadict |
---|
54 | |
---|
55 | |
---|
56 | class JAMBDataTable(grok.Model): |
---|
57 | """A data table that contains JAMB data. |
---|
58 | |
---|
59 | JAMB data tables are plain but fast as they store nearly no data |
---|
60 | inside the ZODB. All data is held on-disk in CSV tables. |
---|
61 | |
---|
62 | As a consequence these tables are read-only. |
---|
63 | """ |
---|
64 | |
---|
65 | import_datetime = None |
---|
66 | |
---|
67 | def __init__(self): |
---|
68 | super(JAMBDataTable, self).__init__() |
---|
69 | self._datafile_path = None |
---|
70 | self._data_len = 0 |
---|
71 | self._temporary = False |
---|
72 | return |
---|
73 | |
---|
74 | def __del__(self): |
---|
75 | self.clear() |
---|
76 | |
---|
77 | def __iter__(self): |
---|
78 | reader = None |
---|
79 | if self._datafile_path is not None: |
---|
80 | reader = csv.DictReader(open(self._datafile_path, 'rb')) |
---|
81 | if reader is None: |
---|
82 | raise StopIteration |
---|
83 | for line in reader: |
---|
84 | data_dict = filter_data(line) |
---|
85 | yield data_dict |
---|
86 | |
---|
87 | def keys(self): |
---|
88 | """Get iterator over all registration numbers stored in table. |
---|
89 | """ |
---|
90 | for item in self: |
---|
91 | yield item['reg_no'] |
---|
92 | |
---|
93 | def items(self): |
---|
94 | """Get tuples of registration number and datasets for each entry in |
---|
95 | data table. |
---|
96 | """ |
---|
97 | for item in self: |
---|
98 | yield (item['reg_no'], item) |
---|
99 | |
---|
100 | def clear(self): |
---|
101 | """Remove all existing entries. |
---|
102 | """ |
---|
103 | self.import_datetime = None |
---|
104 | if self._datafile_path is None: |
---|
105 | return |
---|
106 | if self._temporary: |
---|
107 | if not os.path.exists(self._datafile_path): |
---|
108 | return |
---|
109 | shutil.rmtree(os.path.dirname(self._datafile_path)) |
---|
110 | self._datafile_path = None |
---|
111 | |
---|
112 | def importFromCSV(self, filepath): |
---|
113 | """Importing data from a CSV file means to copy the source to a safe |
---|
114 | location. |
---|
115 | """ |
---|
116 | self.clear() |
---|
117 | self.import_datetime = datetime.now() |
---|
118 | self._copyDataFile(filepath) |
---|
119 | |
---|
120 | #def add(self, reg_no, data_dict): |
---|
121 | # item = filter_data(data_dict) |
---|
122 | # item.__name__ = reg_no |
---|
123 | # item.__parent__ = self |
---|
124 | # #if send_events: |
---|
125 | # # objectEventNotify(ObjectWillBeAddedEvent(item, self, reg_no)) |
---|
126 | # #self._data[reg_no] = item |
---|
127 | # self._data_len += 1 |
---|
128 | # #if senf_event: |
---|
129 | # # objectEventNotify(ObjectAddedEvent(item, self, reg_no)) |
---|
130 | |
---|
131 | def _copyDataFile(self, path): |
---|
132 | """Copy file in path to the JAMBData storage. |
---|
133 | |
---|
134 | See :meth:`_getJAMBTableStorage`. |
---|
135 | """ |
---|
136 | storage = self._getJAMBTableStorage() |
---|
137 | self._datafile_path = os.path.join( |
---|
138 | storage, os.path.basename(path) |
---|
139 | ) |
---|
140 | shutil.copy2(path, self._datafile_path) |
---|
141 | return |
---|
142 | |
---|
143 | def _getJAMBTableStorage(self): |
---|
144 | """Get a path to store copies of datatables. |
---|
145 | |
---|
146 | We normally store data in a ``jambdata`` subdir of datacenter, |
---|
147 | but if none exists, we create a temporary dir and set |
---|
148 | `temporary` to ``True``. |
---|
149 | |
---|
150 | Any not existent directory is created. |
---|
151 | |
---|
152 | Note, that temporary dirs will be deleted when the |
---|
153 | JAMBDataTable object is destroyed. |
---|
154 | |
---|
155 | Returns absolute path to the JAMB data storage. |
---|
156 | """ |
---|
157 | site = grok.getSite() |
---|
158 | if site is None: |
---|
159 | jambtable_storage = tempfile.mkdtemp() |
---|
160 | self._temporary = True |
---|
161 | else: |
---|
162 | datacenter = site['datacenter'] |
---|
163 | jambtable_storage = os.path.join(datacenter.storage, 'jambdata') |
---|
164 | if not os.path.isdir(jambtable_storage): |
---|
165 | os.mkdir(jambtable_storage) |
---|
166 | return os.path.abspath(jambtable_storage) |
---|