Context navigation

source: main/waeup.kofa/trunk/tools/fix_import_file.py @ 8938

Last change on this file since 8938 was 8938, checked in by Henrik Bettermann, 13 years ago
More cell conversions and column title editing.
Property svn:keywords set to `Id`
File size: 11.5 KB

Line
1	## $Id: fix_import_file.py 8938 2012-07-07 22:33:03Z henrik $
2	##
3	## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
4	## This program is free software; you can redistribute it and/or modify
5	## it under the terms of the GNU General Public License as published by
6	## the Free Software Foundation; either version 2 of the License, or
7	## (at your option) any later version.
8	##
9	## This program is distributed in the hope that it will be useful,
10	## but WITHOUT ANY WARRANTY; without even the implied warranty of
11	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	## GNU General Public License for more details.
13	##
14	## You should have received a copy of the GNU General Public License
15	## along with this program; if not, write to the Free Software
16	## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17	##
18	"""
19	Fix exports from old SRP portal and other data sources to make
20	them importable by current portal.
21
22	Usage:
23
24	Change into this directory, set the options below (files are assumed
25	to be in the same directory) and then run
26
27	python fix_import_file.py <filename>
28
29	Errors/warnings will be displayed on the shell, the output will be put
30	into the specified output file.
31
32
33	The lgas.py module must be copied into the same folder where this script
34	is started.
35	"""
36	import csv
37	import datetime
38	import re
39	import sys
40
41	try:
42	from lgas import LGAS
43	except:
44	print 'ERROR: lgas.py is missing.'
45	sys.exit(1)
46
47	def strip(string):
48	string = string.replace('_', '')
49	string = string.replace('/', '')
50	string = string.replace('-', '')
51	string = string.replace(' ', '')
52	string = string.lower()
53	return string
54
55	LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
56	LGAS_dict = dict(LGAS)
57
58	##
59	## CONFIGURATION SECTION
60	##
61	# keys are fieldnames in input file, values are methods of class
62	# Converter (see below)
63	OPTIONS = {
64	'student_id': 'student_id',
65	'sex': 'gender',
66	'birthday': 'date',
67	'request_date': 'datetime',
68	'marit_stat': 'marit_stat',
69	'session': 'session',
70	'entry_session': 'session',
71	'current_session': 'session',
72	'session_id': 'session',
73	'entry_mode': 'mode',
74	'reg_state': 'reg_state',
75	'password': 'password',
76	'phone': 'phone',
77	'nationality': 'nationality',
78	'level': 'level',
79	'start_level': 'level',
80	'end_level': 'level',
81	'level_id': 'level',
82	'current_level': 'level',
83	'semester': 'semester',
84	'application_category': 'application_category',
85	'lga': 'lga',
86	'order_id': 'no_int',
87	'uniben': 'former',
88	'nysc_year': 'year',
89	'alr_date': 'date',
90	'fst_sit_date': 'date',
91	'scd_sit_date': 'date',
92	'emp_start': 'date',
93	'emp_end': 'date',
94	'emp_start2': 'date',
95	'emp_end2': 'date',
96	'fst_sit_results': 'result',
97	'scd_sit_results': 'result',
98	'alr_results': 'result',
99	}
100
101	# Mapping input file colnames --> output file colnames
102	COLNAME_MAPPING = {
103	# base data
104	'reg_state': 'state',
105	'jamb_reg_no': 'reg_number',
106	'matric_no': 'matric_number',
107	'birthday': 'date_of_birth',
108	'clr_ac_pin': 'clr_code',
109	# study course
110	'study_course': 'certificate',
111	# study level
112	'session': 'level_session',
113	'verdict': 'level_verdict',
114	# course ticket
115	'level_id': 'level',
116	'core_or_elective': 'mandatory',
117	# payment ticket
118	'order_id': 'p_id',
119	'status': 'p_state',
120	'category': 'p_category',
121	'resp_pay_reference': 'r_pay_reference',
122	'resp_desc': 'r_desc',
123	'resp_approved_amount': 'r_amount_approved',
124	'item': 'p_item',
125	'amount': 'amount_auth',
126	'resp_card_num': 'r_card_num',
127	'resp_code': 'r_code',
128	'date': 'creation_date',
129	'surcharge': 'surcharge_1',
130	'session_id': 'p_session',
131	'hq_grade': 'hq_degree',
132	'uniben': 'former_matric',
133	'hq_type2': 'hq2_type',
134	'hq_grade2': 'hq2_degree',
135	'hq_school2': 'hq2_school',
136	'hq_matric_no2': 'hq2_matric_no',
137	'hq_session2': 'hq2_session',
138	'hq_disc2': 'hq2_disc',
139	'emp': 'employer',
140	'emp2': 'employer2',
141	'emp_position2': 'emp2_position',
142	'emp_start2': 'emp2_start',
143	'emp_end2': 'emp2_end',
144	'emp_reason2': 'emp2_reason',
145	}
146
147	# Mapping input state --> output state
148	REGSTATE_MAPPING = {
149	'student_created': 'created',
150	'admitted': 'admitted',
151	'objection_raised': 'clearance started',
152	'clearance_pin_entered': 'clearance started',
153	'clearance_requested': 'clearance requested',
154	'cleared_and_validated': 'cleared',
155	'school_fee_paid': 'school fee paid',
156	'returning': 'returning',
157	'courses_registered': 'courses registered',
158	'courses_validated': 'courses validated',
159	}
160
161	##
162	## END OF CONFIG
163	##
164
165	# Look for the first sequence of numbers
166	RE_PHONE = re.compile('[^\d](\d)[^\d]*')
167
168	def convert_fieldnames(fieldnames):
169	"""Replace input fieldnames by fieldnames of COLNAME_MAPPING.
170	"""
171	header = dict([(name, name) for name in fieldnames])
172	for in_name, out_name in COLNAME_MAPPING.items():
173	if in_name not in header:
174	continue
175	header[in_name] = out_name
176	return header
177
178	class Converters():
179	"""Converters to turn old-style values into new ones.
180	"""
181
182	@classmethod
183	def student_id(self, value):
184	""" 'A123456' --> 'EA123456'
185	"""
186	if len(value) == 7:
187	return 'E' + value
188	return value
189
190	@classmethod
191	def reg_state(self, value):
192	""" 'courses_validated' --> 'courses validated'
193	"""
194	return REGSTATE_MAPPING.get(value,value)
195
196	@classmethod
197	def level(self, value):
198	""" '000' --> '10'
199	"""
200	try:
201	number = int(value)
202	except ValueError:
203	return 9999
204	if number == 0:
205	return 10
206	return number
207
208	@classmethod
209	def semester(self, value):
210	""" '0' --> '9'
211	"""
212	try:
213	number = int(value)
214	except ValueError:
215	return 9999
216	if number == 0:
217	return 9
218	return number
219
220	@classmethod
221	def application_category(self, value):
222	""" '' --> 'no'
223	"""
224	if value == '':
225	return 'no'
226	return value
227
228	@classmethod
229	def lga(self, value):
230	""" Remove apostrophe
231	"""
232	if value == 'akwa_ibom_uru_offong_oruko':
233	return 'akwa_ibom_urue-offong-oruko'
234	try:
235	value = value.replace("'","")
236	except:
237	return ''
238	lower = value.lower()
239	if lower in LGAS_dict.keys():
240	return lower
241	# If real names are given, let's see if a similar value
242	# in LGAS exist.
243	value = LGAS_inverted_stripped.get(strip(lower), value)
244	return value
245
246
247	@classmethod
248	def session(self, value):
249	""" '08' --> '2008'
250	'2008/2009' --> '2008'
251	"""
252	if '/' in value:
253	numbers = value.split('/')
254	number = int(numbers[0])
255	if number in range(2000,2015):
256	return number
257	else:
258	return 9999
259	try:
260	number = int(value)
261	except ValueError:
262	#import pdb; pdb.set_trace()
263	return 9999
264	if number < 14:
265	return number + 2000
266	elif number in range(2000,2015):
267	return number
268	else:
269	return 9999
270
271	@classmethod
272	def former(self, value):
273	""" True --> yes
274	'2008/2009' --> '2008'
275	"""
276	if value == 'True':
277	return 'yes'
278	return
279
280	@classmethod
281	def year(self, value):
282	""" '0' --> ''
283	"""
284	if value == '0':
285	return
286	if value == 'None':
287	return
288	return value
289
290
291	@classmethod
292	def marit_stat(self, value):
293	""" 'True'/'False' --> 'married'/'unmarried'
294	"""
295	if value in ('True','married'):
296	value = 'married'
297	elif value in ('False','unmarried'):
298	value = 'unmarried'
299	else:
300	value = ''
301	return value
302
303	@classmethod
304	def gender(self, value):
305	""" 'True'/'False' --> 'f'/'m'
306	"""
307	if value.strip() in ('F', 'True','f'):
308	value = 'f'
309	elif value.strip() in ('M', 'False','m'):
310	value = 'm'
311	else:
312	value = ''
313	return value
314
315	@classmethod
316	def date(self, value):
317	""" 'yyyy/mm/dd' --> 'yyyy-mm-dd'
318	"""
319	if value == "None":
320	value = ""
321	elif value == "":
322	value = ""
323	else:
324	value = value.replace('/', '-')
325	# We add the hash symbol to avoid automatic date transformation
326	# in Excel and Calc for further processing
327	value += '#'
328	return value
329
330	@classmethod
331	def no_int(self, value):
332	""" Add hash.
333	"""
334	# We add the hash symbol to avoid automatic number transformation
335	# in Excel and Calc for further processing
336	value += '#'
337	return value
338
339
340	@classmethod
341	def datetime(self, value):
342	""" 'yyyy/mm/dd' --> 'yyyy-mm-dd'
343	"""
344	#print "IN: ", value
345	if value == "None":
346	value = ""
347	elif value == "":
348	value = ""
349	else:
350	#value = datetime.datetime.strptime(value, '%Y/%m/%d')
351	#value = datetime.datetime.strftime(value, '%Y-%m-%d')
352	pass
353	#print "OUT: ", value
354	return value
355
356	@classmethod
357	def mode(self, value):
358	if value == "transfer_fulltime":
359	return "transfer_ft"
360	if value == "ume_ft":
361	return "utme_ft"
362	return value
363
364	@classmethod
365	def password(self, value):
366	if value == "not set":
367	return ""
368	return value
369
370	@classmethod
371	def nationality(self, value):
372	if value == "nigeria":
373	return "NG"
374	return value
375
376	@classmethod
377	def phone(self, value):
378	""" '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
379
380	Dashes and slashes are removed before looking for sequences
381	of numbers.
382	"""
383	if not value:
384	return
385	value = value.replace('-', '')
386	value = value.replace('/', '')
387	match = RE_PHONE.match(value)
388	phone = match.groups()[0]
389	value = '-%s' % phone
390	return value + '#'
391
392	@classmethod
393	def result(self, value):
394	try:
395	liste = eval(value)
396	except:
397	return
398	if isinstance(liste,list):
399	return [(i[0].lower(),i[1]) for i in liste]
400	return
401
402	def main():
403	input_file = '%s' % sys.argv[1]
404	output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
405	reader = csv.DictReader(open(input_file, 'rb'))
406	writer = None
407
408	for num, row in enumerate(reader):
409	if num == 0:
410	writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
411	print "FIELDS: "
412	for x, y in enumerate(reader.fieldnames):
413	print x, y
414	header = convert_fieldnames(reader.fieldnames)
415	writer.writerow(header)
416	for key, value in row.items():
417	if not key in OPTIONS.keys():
418	continue
419	conv_name = OPTIONS[key]
420	converter = getattr(Converters, conv_name, None)
421	if converter is None:
422	print "WARNING: cannot find converter %s" % conv_name
423	continue
424	row[key] = converter(row[key])
425	try:
426	writer.writerow(row)
427	except:
428	print row['student_id']
429
430	print "Output written to %s" % output_file
431
432
433	if __name__ == '__main__':
434	if len(sys.argv) != 2:
435	print 'Usage: %s <filename>' % __file__
436	sys.exit(1)
437	main()

Note: See TracBrowser for help on using the repository browser.

Download in other formats: