Context navigation

source: main/waeup.kofa/trunk/tools/fix_import_file.py @ 9158

Last change on this file since 9158 was 9158, checked in by Henrik Bettermann, 13 years ago
Fix if statement.
Property svn:keywords set to `Id`
File size: 13.8 KB

Line
1	## $Id: fix_import_file.py 9158 2012-09-05 08:22:01Z henrik $
2	##
3	## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
4	## This program is free software; you can redistribute it and/or modify
5	## it under the terms of the GNU General Public License as published by
6	## the Free Software Foundation; either version 2 of the License, or
7	## (at your option) any later version.
8	##
9	## This program is distributed in the hope that it will be useful,
10	## but WITHOUT ANY WARRANTY; without even the implied warranty of
11	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	## GNU General Public License for more details.
13	##
14	## You should have received a copy of the GNU General Public License
15	## along with this program; if not, write to the Free Software
16	## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17	##
18	"""
19	Fix exports from old SRP portal and other data sources to make
20	them importable by current portal.
21
22	Usage:
23
24	Change into this directory, set the options below (files are assumed
25	to be in the same directory) and then run
26
27	python fix_import_file.py <filename>
28
29	Errors/warnings will be displayed on the shell, the output will be put
30	into the specified output file.
31
32
33	The lgas.py module must be copied into the same folder where this script
34	is started.
35	"""
36	import csv
37	import datetime
38	import re
39	import sys
40
41	try:
42	from lgas import LGAS
43	except:
44	print 'ERROR: lgas.py is missing.'
45	sys.exit(1)
46
47	def strip(string):
48	string = string.replace('_', '')
49	string = string.replace('/', '')
50	string = string.replace('-', '')
51	string = string.replace(' ', '')
52	string = string.lower()
53	return string
54
55	LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
56	LGAS_dict = dict(LGAS)
57
58	##
59	## CONFIGURATION SECTION
60	##
61	# keys are fieldnames in input file, values are methods of class
62	# Converter (see below)
63	OPTIONS = {
64	'student_id': 'student_id',
65	'sex': 'gender',
66	'birthday': 'date',
67	'marit_stat': 'marit_stat',
68	'session': 'session',
69	'entry_session': 'session',
70	'current_session': 'session',
71	'session_id': 'session',
72	'entry_mode': 'mode',
73	'reg_state': 'reg_state',
74	'password': 'password',
75	'phone': 'phone',
76	'nationality': 'nationality',
77	'level': 'level',
78	'start_level': 'level',
79	'end_level': 'level',
80	'level_id': 'level',
81	'current_level': 'level',
82	'semester': 'semester',
83	'application_category': 'application_category',
84	'lga': 'lga',
85	'order_id': 'no_int',
86	'uniben': 'former',
87	'nysc_year': 'year',
88	'alr_date': 'date',
89	'fst_sit_date': 'date',
90	'scd_sit_date': 'date',
91	'emp_start': 'date',
92	'emp_end': 'date',
93	'emp_start2': 'date',
94	'emp_end2': 'date',
95	'fst_sit_results': 'result',
96	'scd_sit_results': 'result',
97	'alr_results': 'result',
98	'email': 'email',
99	'fst_sit_type': 'sittype',
100	'scd_sit_type': 'sittype',
101	'resp_pay_reference': 'no_int',
102	'type': 'company',
103	'date': 'date',
104	'category': 'p_category',
105	'reg_transition': 'reg_state', # we completely change this column,
106	# since reg_state import is usually intended
107	'transition': 'reg_transition',
108	'payment_date': 'date',
109	}
110
111	# Mapping input file colnames --> output file colnames
112	COLNAME_MAPPING = {
113	# base data
114	'id': 'student_id',
115	'reg_state': 'state',
116	'reg_transition': 'state',
117	'jamb_reg_no': 'reg_number',
118	'matric_no': 'matric_number',
119	'birthday': 'date_of_birth',
120	'clr_ac_pin': 'clr_code',
121	# clearance
122	'hq_grade': 'hq_degree',
123	'uniben': 'former_matric',
124	'hq_type2': 'hq2_type',
125	'hq_grade2': 'hq2_degree',
126	'hq_school2': 'hq2_school',
127	'hq_matric_no2': 'hq2_matric_no',
128	'hq_session2': 'hq2_session',
129	'hq_disc2': 'hq2_disc',
130	'emp': 'employer',
131	'emp2': 'employer2',
132	'emp_position2': 'emp2_position',
133	'emp_start2': 'emp2_start',
134	'emp_end2': 'emp2_end',
135	'emp_reason2': 'emp2_reason',
136	# study course
137	'study_course': 'certificate',
138	# study level
139	'session': 'level_session',
140	'verdict': 'level_verdict',
141	# course ticket
142	'level_id': 'level',
143	'core_or_elective': 'mandatory',
144	# payment ticket
145	'order_id': 'p_id',
146	'status': 'p_state',
147	'category': 'p_category',
148	'resp_pay_reference': 'r_pay_reference',
149	'resp_desc': 'r_desc',
150	'resp_approved_amount': 'r_amount_approved',
151	'item': 'p_item',
152	'amount': 'amount_auth',
153	'resp_card_num': 'r_card_num',
154	'resp_code': 'r_code',
155	'date': 'creation_date',
156	'surcharge': 'surcharge_1',
157	'session_id': 'p_session',
158	'type': 'r_company',
159	}
160
161	# Mapping input state --> output state
162	REGSTATE_MAPPING = {
163	'student_created': 'created',
164	'admitted': 'admitted',
165	'objection_raised': 'clearance started',
166	'clearance_pin_entered': 'clearance started',
167	'clearance_requested': 'clearance requested',
168	'cleared_and_validated': 'cleared',
169	'school_fee_paid': 'school fee paid',
170	'returning': 'returning',
171	'courses_registered': 'courses registered',
172	'courses_validated': 'courses validated',
173	'admit': 'admitted',
174	'return': 'returning'
175	}
176
177	##
178	## END OF CONFIG
179	##
180
181	# Look for the first sequence of numbers
182	RE_PHONE = re.compile('[^\d](\d)[^\d]*')
183
184	def convert_fieldnames(fieldnames):
185	"""Replace input fieldnames by fieldnames of COLNAME_MAPPING.
186	"""
187	# Remove whitespaces
188	header = dict([(name, name.strip()) for name in fieldnames])
189	for in_name, out_name in COLNAME_MAPPING.items():
190	if in_name not in header.values():
191	continue
192	# Inverse dictionary lookup
193	key = [key for key,value in header.items() if value==in_name][0]
194	header[key] = out_name
195	return header
196
197	class Converters():
198	"""Converters to turn old-style values into new ones.
199	"""
200
201	@classmethod
202	def student_id(self, value, row):
203	""" 'A123456' --> 'EA123456'
204	"""
205	if len(value) == 7:
206	return 'B' + value
207	return value
208
209	@classmethod
210	def reg_state(self, value, row):
211	""" 'courses_validated' --> 'courses validated'
212	"""
213	return REGSTATE_MAPPING.get(value,value)
214
215	@classmethod
216	def reg_transition(self, value, row):
217	if value == "admitted":
218	return "admit"
219	if value == "returning":
220	return "return"
221	return value
222
223	@classmethod
224	def level(self, value, row):
225	""" '000' --> '10'
226	'800' --> '999' if pg student
227	"""
228	try:
229	number = int(value)
230	except ValueError:
231	return 9999
232	if number == 0:
233	return 10
234	if row.get('entry_mode') and row.get('entry_mode').startswith('pg'):
235	return 999
236	return number
237
238	@classmethod
239	def semester(self, value, row):
240	""" '0' --> '9'
241	"""
242	try:
243	number = int(value)
244	except ValueError:
245	return 9999
246	if number == 0:
247	return 9
248	return number
249
250	@classmethod
251	def application_category(self, value, row):
252	""" '' --> 'no'
253	"""
254	if value == '':
255	return 'no'
256	return value
257
258	@classmethod
259	def lga(self, value, row):
260	""" Remove apostrophe
261	"""
262	if value == 'akwa_ibom_uru_offong_oruko':
263	return 'akwa_ibom_urue-offong-oruko'
264	if value == 'edo_ohionmwon':
265	return 'edo_orhionmwon'
266
267	if value == 'nassarawa_nassarawa':
268	return 'nassarawa_nassawara'
269
270	if value == 'kogi_mopa-muro-mopi':
271	return 'kogi_mopa-muro'
272
273	if value == 'delta_osimili-north':
274	return 'delta_oshielli-north'
275
276	if value == 'delta_osimili':
277	return 'delta_oshimili'
278
279	if value == 'delta_osimili-south':
280	return 'delta_oshimili-south'
281	try:
282	value = value.replace("'","")
283	except:
284	return ''
285	lower = value.lower()
286	if lower in LGAS_dict.keys():
287	return lower
288	# If real names are given, let's see if a similar value
289	# in LGAS exist.
290	value = LGAS_inverted_stripped.get(strip(lower), value)
291	return value
292
293
294	@classmethod
295	def session(self, value, row):
296	""" '08' --> '2008'
297	'2008/2009' --> '2008'
298	"""
299	if '/' in value:
300	numbers = value.split('/')
301	number = int(numbers[0])
302	if number in range(2000,2015):
303	return number
304	else:
305	return 9999
306	try:
307	number = int(value)
308	except ValueError:
309	#import pdb; pdb.set_trace()
310	return 9999
311	if number < 14:
312	return number + 2000
313	elif number in range(2000,2015):
314	return number
315	else:
316	return 9999
317
318	@classmethod
319	def former(self, value, row):
320	""" True --> yes
321	'2008/2009' --> '2008'
322	"""
323	if value == 'True':
324	return 'yes'
325	return
326
327	@classmethod
328	def year(self, value, row):
329	""" '0' --> ''
330	"""
331	if value == '0':
332	return
333	if value == 'None':
334	return
335	return value
336
337
338	@classmethod
339	def marit_stat(self, value, row):
340	""" 'True'/'False' --> 'married'/'unmarried'
341	"""
342	if value in ('True','married'):
343	value = 'married'
344	elif value in ('False','unmarried'):
345	value = 'unmarried'
346	else:
347	value = ''
348	return value
349
350	@classmethod
351	def gender(self, value, row):
352	""" 'True'/'False' --> 'f'/'m'
353	"""
354	if value.strip() in ('F', 'True','f'):
355	value = 'f'
356	elif value.strip() in ('M', 'False','m'):
357	value = 'm'
358	else:
359	value = ''
360	return value
361
362	@classmethod
363	def date(self, value, row):
364	""" 'yyyy/mm/dd' --> 'yyyy-mm-dd'
365	"""
366	if value == "None":
367	value = ""
368	elif value == "":
369	value = ""
370	else:
371	value = value.replace('/', '-')
372	# We add the hash symbol to avoid automatic date transformation
373	# in Excel and Calc for further processing
374	value += '#'
375	return value
376
377	@classmethod
378	def no_int(self, value, row):
379	""" Add hash and skip numbers starting with 999999
380	"""
381	# We add the hash symbol to avoid automatic number transformation
382	# in Excel and Calc for further processing
383	try:
384	intvalue = int(value)
385	value += '#'
386	except:
387	pass
388	if value.startswith('999999'):
389	return
390	return value
391
392	@classmethod
393	def mode(self, value, row):
394	if value == "transfer_fulltime":
395	return "transfer_ft"
396	if value == "ume_ft":
397	return "utme_ft"
398	return value
399
400	@classmethod
401	def password(self, value, row):
402	if value == "not set":
403	return ""
404	return value
405
406	@classmethod
407	def nationality(self, value, row):
408	if value == "nigeria":
409	return "NG"
410	if value == "niger":
411	return "NE"
412	return value
413
414	@classmethod
415	def sittype(self, value, row):
416	if value == "nabtec":
417	return "nabteb"
418	return value
419
420	@classmethod
421	def company(self, value, row):
422	if value == "online":
423	return "interswitch"
424	return value
425
426	@classmethod
427	def p_category(self, value, row):
428	if value == "acceptance":
429	return "clearance"
430	return value
431
432	@classmethod
433	def email(self, value, row):
434	return value.strip()
435
436	@classmethod
437	def phone(self, value, row):
438	""" '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
439
440	Dashes and slashes are removed before looking for sequences
441	of numbers.
442	"""
443	if not value:
444	return
445	value = value.strip('#')
446	value = value.replace('-', '')
447	value = value.replace('/', '')
448	match = RE_PHONE.match(value)
449	phone = match.groups()[0]
450	if value.startswith('234'):
451	value = '+' + value[:3] + '-' + value[3:]
452	else:
453	value = '-%s' % phone
454	return value + '#'
455
456	@classmethod
457	def result(self, value, row):
458	try:
459	liste = eval(value)
460	except:
461	return
462	if isinstance(liste,list):
463	return [(i[0].lower(),i[1]) for i in liste]
464	return
465
466
467	def main():
468	input_file = '%s' % sys.argv[1]
469	output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
470	reader = csv.DictReader(open(input_file, 'rb'))
471	writer = None
472
473	for num, row in enumerate(reader):
474	if num == 0:
475	writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
476	print "FIELDS: "
477	for x, y in enumerate(reader.fieldnames):
478	print x, y
479	header = convert_fieldnames(reader.fieldnames)
480	writer.writerow(header)
481	if row.get('reg_state') == 'student_created':
482	# We do not reimport student records which have never been accessed.
483	continue
484	if row.get('status') == 'started':
485	# We do not reimport started payments.
486	continue
487	for key, value in row.items():
488	# Remove unwanted whitespaces.
489	row[key] = row[key].strip()
490	if not key in OPTIONS.keys():
491	continue
492	conv_name = OPTIONS[key]
493	converter = getattr(Converters, conv_name, None)
494	if converter is None:
495	print "WARNING: cannot find converter %s" % conv_name
496	continue
497	row[key] = converter(row[key], row)
498	try:
499	writer.writerow(row)
500	except:
501	print row['student_id']
502
503	print "Output written to %s" % output_file
504
505
506	if __name__ == '__main__':
507	if len(sys.argv) != 2:
508	print 'Usage: %s <filename>' % __file__
509	sys.exit(1)
510	main()

Note: See TracBrowser for help on using the repository browser.

Download in other formats: