Context navigation

source: main/waeup.sirp/branches/ulif-fasttables/src/waeup/sirp/jambtables/jambtables.py @ 5246

Last change on this file since 5246 was 5246, checked in by uli, 15 years ago
Remove unused add() method and explain better, what we're doing when filtering JAMB data.
File size: 4.9 KB

Line
1	##
2	## jambtables.py
3	## Login : <uli@pu.smp.net>
4	## Started on Tue Jun 22 06:31:42 2010 Uli Fouquet
5	## $Id$
6	##
7	## Copyright (C) 2010 Uli Fouquet
8	## This program is free software; you can redistribute it and/or modify
9	## it under the terms of the GNU General Public License as published by
10	## the Free Software Foundation; either version 2 of the License, or
11	## (at your option) any later version.
12	##
13	## This program is distributed in the hope that it will be useful,
14	## but WITHOUT ANY WARRANTY; without even the implied warranty of
15	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16	## GNU General Public License for more details.
17	##
18	## You should have received a copy of the GNU General Public License
19	## along with this program; if not, write to the Free Software
20	## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21	##
22	import csv
23	import os
24	import shutil
25	import tempfile
26	import grok
27
28	from datetime import datetime
29	from BTrees.OOBTree import OOBTree
30	from BTrees.Length import Length
31
32	#: The header fields required for a valid JAMB table CSV file.
33	JAMB_DATA_HEADERS = [
34	'firstname', 'lastname', 'middlenames', 'screening_type',
35	'screening_venue', 'reg_no', 'sex', 'course1', 'jamb_state',
36	'screening_date', 'jamb_lga', 'fst_sit_fname', 'date_of_birth']
37
38	def filter_data(datadict):
39	"""Filter items whose key contains 'ignore'
40
41	The function will remove all fields whose name start with
42	``ignore``. All data will be convertetd to unicode except
43	``data_of_birth``, which is turned into a `dateteime` object.
44	"""
45	keys = datadict.keys()
46	for key in keys:
47	if not 'ignore' in key:
48	continue
49	del datadict[key]
50	for name in JAMB_DATA_HEADERS:
51	datadict[name] = unicode(datadict[name])
52	datadict['date_of_birth'] = datetime.strptime(
53	datadict['date_of_birth'],
54	'%d/%m/%Y'
55	).date()
56
57	return datadict
58
59
60	class JAMBDataTable(grok.Model):
61	"""A data table that contains JAMB data.
62
63	JAMB data tables are plain but fast as they store nearly no data
64	inside the ZODB. All data is held on-disk in CSV tables.
65
66	As a consequence these tables are read-only.
67	"""
68
69	import_datetime = None
70
71	def __init__(self):
72	super(JAMBDataTable, self).__init__()
73	self._datafile_path = None
74	self._data_len = 0
75	self._temporary = False
76	return
77
78	def __del__(self):
79	self.clear()
80
81	def __iter__(self):
82	reader = None
83	if self._datafile_path is not None:
84	reader = csv.DictReader(open(self._datafile_path, 'rb'))
85	if reader is None:
86	raise StopIteration
87	for line in reader:
88	data_dict = filter_data(line)
89	yield data_dict
90
91	def keys(self):
92	"""Get iterator over all registration numbers stored in table.
93	"""
94	for item in self:
95	yield item['reg_no']
96
97	def items(self):
98	"""Get tuples of registration number and datasets for each entry in
99	data table.
100	"""
101	for item in self:
102	yield (item['reg_no'], item)
103
104	def clear(self):
105	"""Remove all existing entries.
106	"""
107	self.import_datetime = None
108	if self._datafile_path is None:
109	return
110	if self._temporary:
111	if not os.path.exists(self._datafile_path):
112	return
113	shutil.rmtree(os.path.dirname(self._datafile_path))
114	self._datafile_path = None
115
116	def importFromCSV(self, filepath):
117	"""Importing data from a CSV file means to copy the source to a safe
118	location.
119	"""
120	self.clear()
121	self.import_datetime = datetime.now()
122	self._copyDataFile(filepath)
123
124	def _copyDataFile(self, path):
125	"""Copy file in path to the JAMBData storage.
126
127	See :meth:`_getJAMBTableStorage`.
128	"""
129	storage = self._getJAMBTableStorage()
130	self._datafile_path = os.path.join(
131	storage, os.path.basename(path)
132	)
133	shutil.copy2(path, self._datafile_path)
134	return
135
136	def _getJAMBTableStorage(self):
137	"""Get a path to store copies of datatables.
138
139	We normally store data in a ``jambdata`` subdir of datacenter,
140	but if none exists, we create a temporary dir and set
141	`temporary` to ``True``.
142
143	Any not existent directory is created.
144
145	Note, that temporary dirs will be deleted when the
146	JAMBDataTable object is destroyed.
147
148	Returns absolute path to the JAMB data storage.
149	"""
150	site = grok.getSite()
151	if site is None:
152	jambtable_storage = tempfile.mkdtemp()
153	self._temporary = True
154	else:
155	datacenter = site['datacenter']
156	jambtable_storage = os.path.join(datacenter.storage, 'jambdata')
157	if not os.path.isdir(jambtable_storage):
158	os.mkdir(jambtable_storage)
159	return os.path.abspath(jambtable_storage)

Note: See TracBrowser for help on using the repository browser.

Download in other formats: