Context navigation

source: main/waeup.kofa/trunk/tools/fix_import_file.py @ 9007

Last change on this file since 9007 was 9007, checked in by Henrik Bettermann, 13 years ago

Extend import file repair script.

Add shell script for downloading export files.

Property svn:keywords set to Id

File size: 13.2 KB

Line
1	## $Id: fix_import_file.py 9007 2012-07-17 04:36:27Z henrik $
2	##
3	## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
4	## This program is free software; you can redistribute it and/or modify
5	## it under the terms of the GNU General Public License as published by
6	## the Free Software Foundation; either version 2 of the License, or
7	## (at your option) any later version.
8	##
9	## This program is distributed in the hope that it will be useful,
10	## but WITHOUT ANY WARRANTY; without even the implied warranty of
11	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	## GNU General Public License for more details.
13	##
14	## You should have received a copy of the GNU General Public License
15	## along with this program; if not, write to the Free Software
16	## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17	##
18	"""
19	Fix exports from old SRP portal and other data sources to make
20	them importable by current portal.
21
22	Usage:
23
24	Change into this directory, set the options below (files are assumed
25	to be in the same directory) and then run
26
27	python fix_import_file.py <filename>
28
29	Errors/warnings will be displayed on the shell, the output will be put
30	into the specified output file.
31
32
33	The lgas.py module must be copied into the same folder where this script
34	is started.
35	"""
36	import csv
37	import datetime
38	import re
39	import sys
40
41	try:
42	from lgas import LGAS
43	except:
44	print 'ERROR: lgas.py is missing.'
45	sys.exit(1)
46
47	def strip(string):
48	string = string.replace('_', '')
49	string = string.replace('/', '')
50	string = string.replace('-', '')
51	string = string.replace(' ', '')
52	string = string.lower()
53	return string
54
55	LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
56	LGAS_dict = dict(LGAS)
57
58	##
59	## CONFIGURATION SECTION
60	##
61	# keys are fieldnames in input file, values are methods of class
62	# Converter (see below)
63	OPTIONS = {
64	'student_id': 'student_id',
65	'sex': 'gender',
66	'birthday': 'date',
67	'marit_stat': 'marit_stat',
68	'session': 'session',
69	'entry_session': 'session',
70	'current_session': 'session',
71	'session_id': 'session',
72	'entry_mode': 'mode',
73	'reg_state': 'reg_state',
74	'password': 'password',
75	'phone': 'phone',
76	'nationality': 'nationality',
77	'level': 'level',
78	'start_level': 'level',
79	'end_level': 'level',
80	'level_id': 'level',
81	'current_level': 'level',
82	'semester': 'semester',
83	'application_category': 'application_category',
84	'lga': 'lga',
85	'order_id': 'no_int',
86	'uniben': 'former',
87	'nysc_year': 'year',
88	'alr_date': 'date',
89	'fst_sit_date': 'date',
90	'scd_sit_date': 'date',
91	'emp_start': 'date',
92	'emp_end': 'date',
93	'emp_start2': 'date',
94	'emp_end2': 'date',
95	'fst_sit_results': 'result',
96	'scd_sit_results': 'result',
97	'alr_results': 'result',
98	'email': 'email',
99	'fst_sit_type': 'sittype',
100	'scd_sit_type': 'sittype',
101	'resp_pay_reference': 'no_int',
102	'type': 'company',
103	'date': 'date',
104	'category': 'p_category',
105	'reg_transition': 'reg_state', # we completely change this column,
106	# since reg_state import is usually intended
107	'transition': 'reg_transition',
108	}
109
110	# Mapping input file colnames --> output file colnames
111	COLNAME_MAPPING = {
112	# base data
113	'reg_state': 'state',
114	'reg_transition': 'state',
115	'jamb_reg_no': 'reg_number',
116	'matric_no': 'matric_number',
117	'birthday': 'date_of_birth',
118	'clr_ac_pin': 'clr_code',
119	# clearance
120	'hq_grade': 'hq_degree',
121	'uniben': 'former_matric',
122	'hq_type2': 'hq2_type',
123	'hq_grade2': 'hq2_degree',
124	'hq_school2': 'hq2_school',
125	'hq_matric_no2': 'hq2_matric_no',
126	'hq_session2': 'hq2_session',
127	'hq_disc2': 'hq2_disc',
128	'emp': 'employer',
129	'emp2': 'employer2',
130	'emp_position2': 'emp2_position',
131	'emp_start2': 'emp2_start',
132	'emp_end2': 'emp2_end',
133	'emp_reason2': 'emp2_reason',
134	# study course
135	'study_course': 'certificate',
136	# study level
137	'session': 'level_session',
138	'verdict': 'level_verdict',
139	# course ticket
140	'level_id': 'level',
141	'core_or_elective': 'mandatory',
142	# payment ticket
143	'order_id': 'p_id',
144	'status': 'p_state',
145	'category': 'p_category',
146	'resp_pay_reference': 'r_pay_reference',
147	'resp_desc': 'r_desc',
148	'resp_approved_amount': 'r_amount_approved',
149	'item': 'p_item',
150	'amount': 'amount_auth',
151	'resp_card_num': 'r_card_num',
152	'resp_code': 'r_code',
153	'date': 'creation_date',
154	'surcharge': 'surcharge_1',
155	'session_id': 'p_session',
156	'type': 'r_company',
157	}
158
159	# Mapping input state --> output state
160	REGSTATE_MAPPING = {
161	'student_created': 'created',
162	'admitted': 'admitted',
163	'objection_raised': 'clearance started',
164	'clearance_pin_entered': 'clearance started',
165	'clearance_requested': 'clearance requested',
166	'cleared_and_validated': 'cleared',
167	'school_fee_paid': 'school fee paid',
168	'returning': 'returning',
169	'courses_registered': 'courses registered',
170	'courses_validated': 'courses validated',
171	'admit': 'admitted',
172	'return': 'returning'
173	}
174
175	##
176	## END OF CONFIG
177	##
178
179	# Look for the first sequence of numbers
180	RE_PHONE = re.compile('[^\d](\d)[^\d]*')
181
182	def convert_fieldnames(fieldnames):
183	"""Replace input fieldnames by fieldnames of COLNAME_MAPPING.
184	"""
185	header = dict([(name, name) for name in fieldnames])
186	for in_name, out_name in COLNAME_MAPPING.items():
187	if in_name not in header:
188	continue
189	header[in_name] = out_name
190	return header
191
192	class Converters():
193	"""Converters to turn old-style values into new ones.
194	"""
195
196	@classmethod
197	def student_id(self, value):
198	""" 'A123456' --> 'EA123456'
199	"""
200	if len(value) == 7:
201	return 'B' + value
202	return value
203
204	@classmethod
205	def reg_state(self, value):
206	""" 'courses_validated' --> 'courses validated'
207	"""
208	return REGSTATE_MAPPING.get(value,value)
209
210	@classmethod
211	def reg_transition(self, value):
212	if value == "admitted":
213	return "admit"
214	if value == "returning":
215	return "return"
216	return value
217
218	@classmethod
219	def level(self, value):
220	""" '000' --> '10'
221	"""
222	try:
223	number = int(value)
224	except ValueError:
225	return 9999
226	if number == 0:
227	return 10
228	return number
229
230	@classmethod
231	def semester(self, value):
232	""" '0' --> '9'
233	"""
234	try:
235	number = int(value)
236	except ValueError:
237	return 9999
238	if number == 0:
239	return 9
240	return number
241
242	@classmethod
243	def application_category(self, value):
244	""" '' --> 'no'
245	"""
246	if value == '':
247	return 'no'
248	return value
249
250	@classmethod
251	def lga(self, value):
252	""" Remove apostrophe
253	"""
254	if value == 'akwa_ibom_uru_offong_oruko':
255	return 'akwa_ibom_urue-offong-oruko'
256	if value == 'edo_ohionmwon':
257	return 'edo_orhionmwon'
258
259	if value == 'nassarawa_nassarawa':
260	return 'nassarawa_nassawara'
261
262	if value == 'kogi_mopa-muro-mopi':
263	return 'kogi_mopa-muro'
264
265	if value == 'delta_osimili-north':
266	return 'delta_oshielli-north'
267
268	if value == 'delta_osimili':
269	return 'delta_oshimili'
270
271	if value == 'delta_osimili-south':
272	return 'delta_oshimili-south'
273	try:
274	value = value.replace("'","")
275	except:
276	return ''
277	lower = value.lower()
278	if lower in LGAS_dict.keys():
279	return lower
280	# If real names are given, let's see if a similar value
281	# in LGAS exist.
282	value = LGAS_inverted_stripped.get(strip(lower), value)
283	return value
284
285
286	@classmethod
287	def session(self, value):
288	""" '08' --> '2008'
289	'2008/2009' --> '2008'
290	"""
291	if '/' in value:
292	numbers = value.split('/')
293	number = int(numbers[0])
294	if number in range(2000,2015):
295	return number
296	else:
297	return 9999
298	try:
299	number = int(value)
300	except ValueError:
301	#import pdb; pdb.set_trace()
302	return 9999
303	if number < 14:
304	return number + 2000
305	elif number in range(2000,2015):
306	return number
307	else:
308	return 9999
309
310	@classmethod
311	def former(self, value):
312	""" True --> yes
313	'2008/2009' --> '2008'
314	"""
315	if value == 'True':
316	return 'yes'
317	return
318
319	@classmethod
320	def year(self, value):
321	""" '0' --> ''
322	"""
323	if value == '0':
324	return
325	if value == 'None':
326	return
327	return value
328
329
330	@classmethod
331	def marit_stat(self, value):
332	""" 'True'/'False' --> 'married'/'unmarried'
333	"""
334	if value in ('True','married'):
335	value = 'married'
336	elif value in ('False','unmarried'):
337	value = 'unmarried'
338	else:
339	value = ''
340	return value
341
342	@classmethod
343	def gender(self, value):
344	""" 'True'/'False' --> 'f'/'m'
345	"""
346	if value.strip() in ('F', 'True','f'):
347	value = 'f'
348	elif value.strip() in ('M', 'False','m'):
349	value = 'm'
350	else:
351	value = ''
352	return value
353
354	@classmethod
355	def date(self, value):
356	""" 'yyyy/mm/dd' --> 'yyyy-mm-dd'
357	"""
358	if value == "None":
359	value = ""
360	elif value == "":
361	value = ""
362	else:
363	value = value.replace('/', '-')
364	# We add the hash symbol to avoid automatic date transformation
365	# in Excel and Calc for further processing
366	value += '#'
367	return value
368
369	@classmethod
370	def no_int(self, value):
371	""" Add hash and skip numbers starting with 999999
372	"""
373	# We add the hash symbol to avoid automatic number transformation
374	# in Excel and Calc for further processing
375	try:
376	intvalue = int(value)
377	value += '#'
378	except:
379	pass
380	if value.startswith('999999'):
381	return
382	return value
383
384	@classmethod
385	def mode(self, value):
386	if value == "transfer_fulltime":
387	return "transfer_ft"
388	if value == "ume_ft":
389	return "utme_ft"
390	return value
391
392	@classmethod
393	def password(self, value):
394	if value == "not set":
395	return ""
396	return value
397
398	@classmethod
399	def nationality(self, value):
400	if value == "nigeria":
401	return "NG"
402	if value == "niger":
403	return "NE"
404	return value
405
406	@classmethod
407	def sittype(self, value):
408	if value == "nabtec":
409	return "nabteb"
410	return value
411
412	@classmethod
413	def company(self, value):
414	if value == "online":
415	return "interswitch"
416	return value
417
418	@classmethod
419	def p_category(self, value):
420	if value == "acceptance":
421	return "clearance"
422	return value
423
424	@classmethod
425	def email(self, value):
426	return value.strip()
427
428	@classmethod
429	def phone(self, value):
430	""" '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
431
432	Dashes and slashes are removed before looking for sequences
433	of numbers.
434	"""
435	if not value:
436	return
437	value = value.strip('#')
438	value = value.replace('-', '')
439	value = value.replace('/', '')
440	match = RE_PHONE.match(value)
441	phone = match.groups()[0]
442	if value.startswith('234'):
443	value = '+' + value[:3] + '-' + value[3:]
444	else:
445	value = '-%s' % phone
446	return value + '#'
447
448	@classmethod
449	def result(self, value):
450	try:
451	liste = eval(value)
452	except:
453	return
454	if isinstance(liste,list):
455	return [(i[0].lower(),i[1]) for i in liste]
456	return
457
458
459	def main():
460	input_file = '%s' % sys.argv[1]
461	output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
462	reader = csv.DictReader(open(input_file, 'rb'))
463	writer = None
464
465	for num, row in enumerate(reader):
466	if num == 0:
467	writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
468	print "FIELDS: "
469	for x, y in enumerate(reader.fieldnames):
470	print x, y
471	header = convert_fieldnames(reader.fieldnames)
472	writer.writerow(header)
473	if row.get('reg_state') == 'student_created':
474	continue
475	if row.get('status') == 'started':
476	# We do not use started payments
477	continue
478	for key, value in row.items():
479	if not key in OPTIONS.keys():
480	continue
481	conv_name = OPTIONS[key]
482	converter = getattr(Converters, conv_name, None)
483	if converter is None:
484	print "WARNING: cannot find converter %s" % conv_name
485	continue
486	row[key] = converter(row[key])
487	try:
488	writer.writerow(row)
489	except:
490	print row['student_id']
491
492	print "Output written to %s" % output_file
493
494
495	if __name__ == '__main__':
496	if len(sys.argv) != 2:
497	print 'Usage: %s <filename>' % __file__
498	sys.exit(1)
499	main()

Note: See TracBrowser for help on using the repository browser.

Download in other formats: