Context navigation

source: main/waeup.kofa/backup-trunk/tools/fix_import_file.py @ 9736

Last change on this file since 9736 was 9194, checked in by Henrik Bettermann, 12 years ago
Add converter for boolean.
Property svn:keywords set to `Id`
File size: 14.1 KB

Line
1	## $Id: fix_import_file.py 9194 2012-09-17 08:06:47Z henrik $
2	##
3	## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
4	## This program is free software; you can redistribute it and/or modify
5	## it under the terms of the GNU General Public License as published by
6	## the Free Software Foundation; either version 2 of the License, or
7	## (at your option) any later version.
8	##
9	## This program is distributed in the hope that it will be useful,
10	## but WITHOUT ANY WARRANTY; without even the implied warranty of
11	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	## GNU General Public License for more details.
13	##
14	## You should have received a copy of the GNU General Public License
15	## along with this program; if not, write to the Free Software
16	## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17	##
18	"""
19	Fix exports from old SRP portal and other data sources to make
20	them importable by current portal.
21
22	Usage:
23
24	Change into this directory, set the options below (files are assumed
25	to be in the same directory) and then run
26
27	python fix_import_file.py <filename>
28
29	Errors/warnings will be displayed on the shell, the output will be put
30	into the specified output file.
31
32
33	The lgas.py module must be copied into the same folder where this script
34	is started.
35	"""
36	import csv
37	import datetime
38	import re
39	import sys
40
41	try:
42	from lgas import LGAS
43	except:
44	print 'ERROR: lgas.py is missing.'
45	sys.exit(1)
46
47	def strip(string):
48	string = string.replace('_', '')
49	string = string.replace('/', '')
50	string = string.replace('-', '')
51	string = string.replace(' ', '')
52	string = string.lower()
53	return string
54
55	LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
56	LGAS_dict = dict(LGAS)
57
58	##
59	## CONFIGURATION SECTION
60	##
61	# keys are fieldnames in input file, values are methods of class
62	# Converter (see below)
63	OPTIONS = {
64	'student_id': 'student_id',
65	'sex': 'gender',
66	'birthday': 'date',
67	'marit_stat': 'marit_stat',
68	'session': 'session',
69	'entry_session': 'session',
70	'current_session': 'session',
71	'session_id': 'session',
72	'entry_mode': 'mode',
73	'reg_state': 'reg_state',
74	'password': 'password',
75	'phone': 'phone',
76	'nationality': 'nationality',
77	'level': 'level',
78	'start_level': 'level',
79	'end_level': 'level',
80	'level_id': 'level',
81	'current_level': 'level',
82	'semester': 'semester',
83	'application_category': 'application_category',
84	'lga': 'lga',
85	'order_id': 'no_int',
86	'uniben': 'former',
87	'nysc_year': 'year',
88	'alr_date': 'date',
89	'fst_sit_date': 'date',
90	'scd_sit_date': 'date',
91	'emp_start': 'date',
92	'emp_end': 'date',
93	'emp_start2': 'date',
94	'emp_end2': 'date',
95	'fst_sit_results': 'result',
96	'scd_sit_results': 'result',
97	'alr_results': 'result',
98	'email': 'email',
99	'fst_sit_type': 'sittype',
100	'scd_sit_type': 'sittype',
101	'resp_pay_reference': 'no_int',
102	'type': 'company',
103	'date': 'date',
104	'core_or_elective': 'bool',
105	'category': 'p_category',
106	'reg_transition': 'reg_state', # we completely change this column,
107	# since reg_state import is usually intended
108	'transition': 'reg_transition',
109	'payment_date': 'date',
110	'validation_date': 'date',
111	}
112
113	# Mapping input file colnames --> output file colnames
114	COLNAME_MAPPING = {
115	# base data
116	'id': 'student_id',
117	'reg_state': 'state',
118	'reg_transition': 'state',
119	'jamb_reg_no': 'reg_number',
120	'matric_no': 'matric_number',
121	'birthday': 'date_of_birth',
122	'clr_ac_pin': 'clr_code',
123	# clearance
124	'hq_grade': 'hq_degree',
125	'uniben': 'former_matric',
126	'hq_type2': 'hq2_type',
127	'hq_grade2': 'hq2_degree',
128	'hq_school2': 'hq2_school',
129	'hq_matric_no2': 'hq2_matric_no',
130	'hq_session2': 'hq2_session',
131	'hq_disc2': 'hq2_disc',
132	'emp': 'employer',
133	'emp2': 'employer2',
134	'emp_position2': 'emp2_position',
135	'emp_start2': 'emp2_start',
136	'emp_end2': 'emp2_end',
137	'emp_reason2': 'emp2_reason',
138	# study course
139	'study_course': 'certificate',
140	# study level
141	'session': 'level_session',
142	'verdict': 'level_verdict',
143	# course ticket
144	'level_id': 'level',
145	'core_or_elective': 'mandatory',
146	# payment ticket
147	'order_id': 'p_id',
148	'status': 'p_state',
149	'category': 'p_category',
150	'resp_pay_reference': 'r_pay_reference',
151	'resp_desc': 'r_desc',
152	'resp_approved_amount': 'r_amount_approved',
153	'item': 'p_item',
154	'amount': 'amount_auth',
155	'resp_card_num': 'r_card_num',
156	'resp_code': 'r_code',
157	'date': 'creation_date',
158	'surcharge': 'surcharge_1',
159	'session_id': 'p_session',
160	'type': 'r_company',
161	}
162
163	# Mapping input state --> output state
164	REGSTATE_MAPPING = {
165	'student_created': 'created',
166	'admitted': 'admitted',
167	'objection_raised': 'clearance started',
168	'clearance_pin_entered': 'clearance started',
169	'clearance_requested': 'clearance requested',
170	'cleared_and_validated': 'cleared',
171	'school_fee_paid': 'school fee paid',
172	'returning': 'returning',
173	'courses_registered': 'courses registered',
174	'courses_validated': 'courses validated',
175	'admit': 'admitted',
176	'return': 'returning'
177	}
178
179	##
180	## END OF CONFIG
181	##
182
183	# Look for the first sequence of numbers
184	RE_PHONE = re.compile('[^\d](\d)[^\d]*')
185
186	def convert_fieldnames(fieldnames):
187	"""Replace input fieldnames by fieldnames of COLNAME_MAPPING.
188	"""
189	# Remove whitespaces
190	header = dict([(name, name.strip()) for name in fieldnames])
191	for in_name, out_name in COLNAME_MAPPING.items():
192	if in_name not in header.values():
193	continue
194	# Inverse dictionary lookup
195	key = [key for key,value in header.items() if value==in_name][0]
196	header[key] = out_name
197	return header
198
199	class Converters():
200	"""Converters to turn old-style values into new ones.
201	"""
202
203	@classmethod
204	def student_id(self, value, row):
205	""" 'A123456' --> 'EA123456'
206	"""
207	if len(value) == 7:
208	return 'K' + value
209	return value
210
211	@classmethod
212	def reg_state(self, value, row):
213	""" 'courses_validated' --> 'courses validated'
214	"""
215	return REGSTATE_MAPPING.get(value,value)
216
217	@classmethod
218	def reg_transition(self, value, row):
219	if value == "admitted":
220	return "admit"
221	if value == "returning":
222	return "return"
223	return value
224
225	@classmethod
226	def level(self, value, row):
227	""" '000' --> '10'
228	'800' --> '999' if pg student
229	"""
230	try:
231	number = int(value)
232	except ValueError:
233	return 9999
234	if number == 0:
235	return 10
236	if row.get('entry_mode') and row.get('entry_mode').startswith('pg'):
237	return 999
238	return number
239
240	@classmethod
241	def semester(self, value, row):
242	""" '0' --> '9'
243	"""
244	try:
245	number = int(value)
246	except ValueError:
247	return 9999
248	if number == 0:
249	return 9
250	return number
251
252	@classmethod
253	def application_category(self, value, row):
254	""" '' --> 'no'
255	"""
256	if value == '':
257	return 'no'
258	return value
259
260	@classmethod
261	def lga(self, value, row):
262	""" Remove apostrophe
263	"""
264	if value == 'akwa_ibom_uru_offong_oruko':
265	return 'akwa_ibom_urue-offong-oruko'
266	if value == 'edo_ohionmwon':
267	return 'edo_orhionmwon'
268
269	if value == 'nassarawa_nassarawa':
270	return 'nassarawa_nassawara'
271
272	if value == 'kogi_mopa-muro-mopi':
273	return 'kogi_mopa-muro'
274
275	if value == 'delta_osimili-north':
276	return 'delta_oshielli-north'
277
278	if value == 'delta_osimili':
279	return 'delta_oshimili'
280
281	if value == 'delta_osimili-south':
282	return 'delta_oshimili-south'
283	try:
284	value = value.replace("'","")
285	except:
286	return ''
287	lower = value.lower()
288	if lower in LGAS_dict.keys():
289	return lower
290	# If real names are given, let's see if a similar value
291	# in LGAS exist.
292	value = LGAS_inverted_stripped.get(strip(lower), value)
293	return value
294
295
296	@classmethod
297	def session(self, value, row):
298	""" '08' --> '2008'
299	'2008/2009' --> '2008'
300	"""
301	if '/' in value:
302	numbers = value.split('/')
303	number = int(numbers[0])
304	if number in range(2000,2015):
305	return number
306	else:
307	return 9999
308	try:
309	number = int(value)
310	except ValueError:
311	#import pdb; pdb.set_trace()
312	return 9999
313	if number < 14:
314	return number + 2000
315	elif number in range(2000,2015):
316	return number
317	else:
318	return 9999
319
320	@classmethod
321	def former(self, value, row):
322	""" True --> yes
323	'2008/2009' --> '2008'
324	"""
325	if value == 'True':
326	return 'yes'
327	return
328
329	@classmethod
330	def bool(self, value, row):
331	""" True --> 1
332	"""
333	if value in ('TRUE', 'True'):
334	return '1'
335	elif value in ('FALSE', 'False'):
336	return '0'
337	return
338
339	@classmethod
340	def year(self, value, row):
341	""" '0' --> ''
342	"""
343	if value == '0':
344	return
345	if value == 'None':
346	return
347	return value
348
349
350	@classmethod
351	def marit_stat(self, value, row):
352	""" 'True'/'False' --> 'married'/'unmarried'
353	"""
354	if value in ('True','married'):
355	value = 'married'
356	elif value in ('False','unmarried'):
357	value = 'unmarried'
358	else:
359	value = ''
360	return value
361
362	@classmethod
363	def gender(self, value, row):
364	""" 'True'/'False' --> 'f'/'m'
365	"""
366	if value.strip() in ('F', 'True','f'):
367	value = 'f'
368	elif value.strip() in ('M', 'False','m'):
369	value = 'm'
370	else:
371	value = ''
372	return value
373
374	@classmethod
375	def date(self, value, row):
376	""" 'yyyy/mm/dd' --> 'yyyy-mm-dd'
377	"""
378	if value == "None":
379	value = ""
380	elif value == "":
381	value = ""
382	else:
383	value = value.replace('/', '-')
384	# We add the hash symbol to avoid automatic date transformation
385	# in Excel and Calc for further processing
386	value += '#'
387	return value
388
389	@classmethod
390	def no_int(self, value, row):
391	""" Add hash and skip numbers starting with 999999
392	"""
393	# We add the hash symbol to avoid automatic number transformation
394	# in Excel and Calc for further processing
395	try:
396	intvalue = int(value)
397	value += '#'
398	except:
399	pass
400	if value.startswith('999999'):
401	return
402	return value
403
404	@classmethod
405	def mode(self, value, row):
406	if value == "transfer_fulltime":
407	return "transfer_ft"
408	if value == "ume_ft":
409	return "utme_ft"
410	return value
411
412	@classmethod
413	def password(self, value, row):
414	if value == "not set":
415	return ""
416	return value
417
418	@classmethod
419	def nationality(self, value, row):
420	if value == "nigeria":
421	return "NG"
422	if value == "niger":
423	return "NE"
424	return value
425
426	@classmethod
427	def sittype(self, value, row):
428	if value == "nabtec":
429	return "nabteb"
430	return value
431
432	@classmethod
433	def company(self, value, row):
434	if value == "online":
435	return "interswitch"
436	return value
437
438	@classmethod
439	def p_category(self, value, row):
440	if value == "acceptance":
441	return "clearance"
442	return value
443
444	@classmethod
445	def email(self, value, row):
446	return value.strip()
447
448	@classmethod
449	def phone(self, value, row):
450	""" '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
451
452	Dashes and slashes are removed before looking for sequences
453	of numbers.
454	"""
455	if not value:
456	return
457	value = value.strip('#')
458	value = value.replace('-', '')
459	value = value.replace('/', '')
460	match = RE_PHONE.match(value)
461	phone = match.groups()[0]
462	if value.startswith('234'):
463	value = '+' + value[:3] + '-' + value[3:]
464	else:
465	value = '-%s' % phone
466	return value + '#'
467
468	@classmethod
469	def result(self, value, row):
470	try:
471	liste = eval(value)
472	except:
473	return
474	if isinstance(liste,list):
475	return [(i[0].lower(),i[1]) for i in liste]
476	return
477
478
479	def main():
480	input_file = '%s' % sys.argv[1]
481	output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
482	reader = csv.DictReader(open(input_file, 'rb'))
483	writer = None
484
485	for num, row in enumerate(reader):
486	if num == 0:
487	writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
488	print "FIELDS: "
489	for x, y in enumerate(reader.fieldnames):
490	print x, y
491	header = convert_fieldnames(reader.fieldnames)
492	writer.writerow(header)
493	if row.get('reg_state') == 'student_created':
494	# We do not reimport student records which have never been accessed.
495	continue
496	if row.get('status') == 'started':
497	# We do not reimport started payments.
498	continue
499	for key, value in row.items():
500	# Remove unwanted whitespaces.
501	row[key] = row[key].strip()
502	if not key in OPTIONS.keys():
503	continue
504	conv_name = OPTIONS[key]
505	converter = getattr(Converters, conv_name, None)
506	if converter is None:
507	print "WARNING: cannot find converter %s" % conv_name
508	continue
509	row[key] = converter(row[key], row)
510	try:
511	writer.writerow(row)
512	except:
513	print row['student_id']
514
515	print "Output written to %s" % output_file
516
517
518	if __name__ == '__main__':
519	if len(sys.argv) != 2:
520	print 'Usage: %s <filename>' % __file__
521	sys.exit(1)
522	main()

Note: See TracBrowser for help on using the repository browser.

Download in other formats: