Context navigation

source: main/waeup.kofa/trunk/tools/fix_import_file.py @ 9090

Last change on this file since 9090 was 9019, checked in by Henrik Bettermann, 13 years ago
Avoid that script has to be run twice.
Property svn:keywords set to `Id`
File size: 13.8 KB

Line
1	## $Id: fix_import_file.py 9019 2012-07-18 14:16:21Z henrik $
2	##
3	## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
4	## This program is free software; you can redistribute it and/or modify
5	## it under the terms of the GNU General Public License as published by
6	## the Free Software Foundation; either version 2 of the License, or
7	## (at your option) any later version.
8	##
9	## This program is distributed in the hope that it will be useful,
10	## but WITHOUT ANY WARRANTY; without even the implied warranty of
11	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	## GNU General Public License for more details.
13	##
14	## You should have received a copy of the GNU General Public License
15	## along with this program; if not, write to the Free Software
16	## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17	##
18	"""
19	Fix exports from old SRP portal and other data sources to make
20	them importable by current portal.
21
22	Usage:
23
24	Change into this directory, set the options below (files are assumed
25	to be in the same directory) and then run
26
27	python fix_import_file.py <filename>
28
29	Errors/warnings will be displayed on the shell, the output will be put
30	into the specified output file.
31
32
33	The lgas.py module must be copied into the same folder where this script
34	is started.
35	"""
36	import csv
37	import datetime
38	import re
39	import sys
40
41	try:
42	from lgas import LGAS
43	except:
44	print 'ERROR: lgas.py is missing.'
45	sys.exit(1)
46
47	def strip(string):
48	string = string.replace('_', '')
49	string = string.replace('/', '')
50	string = string.replace('-', '')
51	string = string.replace(' ', '')
52	string = string.lower()
53	return string
54
55	LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
56	LGAS_dict = dict(LGAS)
57
58	##
59	## CONFIGURATION SECTION
60	##
61	# keys are fieldnames in input file, values are methods of class
62	# Converter (see below)
63	OPTIONS = {
64	'student_id': 'student_id',
65	'sex': 'gender',
66	'birthday': 'date',
67	'marit_stat': 'marit_stat',
68	'session': 'session',
69	'entry_session': 'session',
70	'current_session': 'session',
71	'session_id': 'session',
72	'entry_mode': 'mode',
73	'reg_state': 'reg_state',
74	'password': 'password',
75	'phone': 'phone',
76	'nationality': 'nationality',
77	'level': 'level',
78	'start_level': 'level',
79	'end_level': 'level',
80	'level_id': 'level',
81	'current_level': 'level',
82	'semester': 'semester',
83	'application_category': 'application_category',
84	'lga': 'lga',
85	'order_id': 'no_int',
86	'uniben': 'former',
87	'nysc_year': 'year',
88	'alr_date': 'date',
89	'fst_sit_date': 'date',
90	'scd_sit_date': 'date',
91	'emp_start': 'date',
92	'emp_end': 'date',
93	'emp_start2': 'date',
94	'emp_end2': 'date',
95	'fst_sit_results': 'result',
96	'scd_sit_results': 'result',
97	'alr_results': 'result',
98	'email': 'email',
99	'fst_sit_type': 'sittype',
100	'scd_sit_type': 'sittype',
101	'resp_pay_reference': 'no_int',
102	'type': 'company',
103	'date': 'date',
104	'category': 'p_category',
105	'reg_transition': 'reg_state', # we completely change this column,
106	# since reg_state import is usually intended
107	'transition': 'reg_transition',
108	}
109
110	# Mapping input file colnames --> output file colnames
111	COLNAME_MAPPING = {
112	# base data
113	'id': 'student_id',
114	'reg_state': 'state',
115	'reg_transition': 'state',
116	'jamb_reg_no': 'reg_number',
117	'matric_no': 'matric_number',
118	'birthday': 'date_of_birth',
119	'clr_ac_pin': 'clr_code',
120	# clearance
121	'hq_grade': 'hq_degree',
122	'uniben': 'former_matric',
123	'hq_type2': 'hq2_type',
124	'hq_grade2': 'hq2_degree',
125	'hq_school2': 'hq2_school',
126	'hq_matric_no2': 'hq2_matric_no',
127	'hq_session2': 'hq2_session',
128	'hq_disc2': 'hq2_disc',
129	'emp': 'employer',
130	'emp2': 'employer2',
131	'emp_position2': 'emp2_position',
132	'emp_start2': 'emp2_start',
133	'emp_end2': 'emp2_end',
134	'emp_reason2': 'emp2_reason',
135	# study course
136	'study_course': 'certificate',
137	# study level
138	'session': 'level_session',
139	'verdict': 'level_verdict',
140	# course ticket
141	'level_id': 'level',
142	'core_or_elective': 'mandatory',
143	# payment ticket
144	'order_id': 'p_id',
145	'status': 'p_state',
146	'category': 'p_category',
147	'resp_pay_reference': 'r_pay_reference',
148	'resp_desc': 'r_desc',
149	'resp_approved_amount': 'r_amount_approved',
150	'item': 'p_item',
151	'amount': 'amount_auth',
152	'resp_card_num': 'r_card_num',
153	'resp_code': 'r_code',
154	'date': 'creation_date',
155	'surcharge': 'surcharge_1',
156	'session_id': 'p_session',
157	'type': 'r_company',
158	}
159
160	# Mapping input state --> output state
161	REGSTATE_MAPPING = {
162	'student_created': 'created',
163	'admitted': 'admitted',
164	'objection_raised': 'clearance started',
165	'clearance_pin_entered': 'clearance started',
166	'clearance_requested': 'clearance requested',
167	'cleared_and_validated': 'cleared',
168	'school_fee_paid': 'school fee paid',
169	'returning': 'returning',
170	'courses_registered': 'courses registered',
171	'courses_validated': 'courses validated',
172	'admit': 'admitted',
173	'return': 'returning'
174	}
175
176	##
177	## END OF CONFIG
178	##
179
180	# Look for the first sequence of numbers
181	RE_PHONE = re.compile('[^\d](\d)[^\d]*')
182
183	def convert_fieldnames(fieldnames):
184	"""Replace input fieldnames by fieldnames of COLNAME_MAPPING.
185	"""
186	# Remove whitespaces
187	header = dict([(name, name.strip()) for name in fieldnames])
188	for in_name, out_name in COLNAME_MAPPING.items():
189	if in_name not in header.values():
190	continue
191	# Inverse dictionary lookup
192	key = [key for key,value in header.items() if value==in_name][0]
193	header[key] = out_name
194	return header
195
196	class Converters():
197	"""Converters to turn old-style values into new ones.
198	"""
199
200	@classmethod
201	def student_id(self, value, row):
202	""" 'A123456' --> 'EA123456'
203	"""
204	if len(value) == 7:
205	return 'B' + value
206	return value
207
208	@classmethod
209	def reg_state(self, value, row):
210	""" 'courses_validated' --> 'courses validated'
211	"""
212	return REGSTATE_MAPPING.get(value,value)
213
214	@classmethod
215	def reg_transition(self, value, row):
216	if value == "admitted":
217	return "admit"
218	if value == "returning":
219	return "return"
220	return value
221
222	@classmethod
223	def level(self, value, row):
224	""" '000' --> '10'
225	'800' --> '999' if pg student
226	"""
227	try:
228	number = int(value)
229	except ValueError:
230	return 9999
231	if number == 0:
232	return 10
233	if row.get('entry_mode').startswith('pg'):
234	return 999
235	return number
236
237	@classmethod
238	def semester(self, value, row):
239	""" '0' --> '9'
240	"""
241	try:
242	number = int(value)
243	except ValueError:
244	return 9999
245	if number == 0:
246	return 9
247	return number
248
249	@classmethod
250	def application_category(self, value, row):
251	""" '' --> 'no'
252	"""
253	if value == '':
254	return 'no'
255	return value
256
257	@classmethod
258	def lga(self, value, row):
259	""" Remove apostrophe
260	"""
261	if value == 'akwa_ibom_uru_offong_oruko':
262	return 'akwa_ibom_urue-offong-oruko'
263	if value == 'edo_ohionmwon':
264	return 'edo_orhionmwon'
265
266	if value == 'nassarawa_nassarawa':
267	return 'nassarawa_nassawara'
268
269	if value == 'kogi_mopa-muro-mopi':
270	return 'kogi_mopa-muro'
271
272	if value == 'delta_osimili-north':
273	return 'delta_oshielli-north'
274
275	if value == 'delta_osimili':
276	return 'delta_oshimili'
277
278	if value == 'delta_osimili-south':
279	return 'delta_oshimili-south'
280	try:
281	value = value.replace("'","")
282	except:
283	return ''
284	lower = value.lower()
285	if lower in LGAS_dict.keys():
286	return lower
287	# If real names are given, let's see if a similar value
288	# in LGAS exist.
289	value = LGAS_inverted_stripped.get(strip(lower), value)
290	return value
291
292
293	@classmethod
294	def session(self, value, row):
295	""" '08' --> '2008'
296	'2008/2009' --> '2008'
297	"""
298	if '/' in value:
299	numbers = value.split('/')
300	number = int(numbers[0])
301	if number in range(2000,2015):
302	return number
303	else:
304	return 9999
305	try:
306	number = int(value)
307	except ValueError:
308	#import pdb; pdb.set_trace()
309	return 9999
310	if number < 14:
311	return number + 2000
312	elif number in range(2000,2015):
313	return number
314	else:
315	return 9999
316
317	@classmethod
318	def former(self, value, row):
319	""" True --> yes
320	'2008/2009' --> '2008'
321	"""
322	if value == 'True':
323	return 'yes'
324	return
325
326	@classmethod
327	def year(self, value, row):
328	""" '0' --> ''
329	"""
330	if value == '0':
331	return
332	if value == 'None':
333	return
334	return value
335
336
337	@classmethod
338	def marit_stat(self, value, row):
339	""" 'True'/'False' --> 'married'/'unmarried'
340	"""
341	if value in ('True','married'):
342	value = 'married'
343	elif value in ('False','unmarried'):
344	value = 'unmarried'
345	else:
346	value = ''
347	return value
348
349	@classmethod
350	def gender(self, value, row):
351	""" 'True'/'False' --> 'f'/'m'
352	"""
353	if value.strip() in ('F', 'True','f'):
354	value = 'f'
355	elif value.strip() in ('M', 'False','m'):
356	value = 'm'
357	else:
358	value = ''
359	return value
360
361	@classmethod
362	def date(self, value, row):
363	""" 'yyyy/mm/dd' --> 'yyyy-mm-dd'
364	"""
365	if value == "None":
366	value = ""
367	elif value == "":
368	value = ""
369	else:
370	value = value.replace('/', '-')
371	# We add the hash symbol to avoid automatic date transformation
372	# in Excel and Calc for further processing
373	value += '#'
374	return value
375
376	@classmethod
377	def no_int(self, value, row):
378	""" Add hash and skip numbers starting with 999999
379	"""
380	# We add the hash symbol to avoid automatic number transformation
381	# in Excel and Calc for further processing
382	try:
383	intvalue = int(value)
384	value += '#'
385	except:
386	pass
387	if value.startswith('999999'):
388	return
389	return value
390
391	@classmethod
392	def mode(self, value, row):
393	if value == "transfer_fulltime":
394	return "transfer_ft"
395	if value == "ume_ft":
396	return "utme_ft"
397	return value
398
399	@classmethod
400	def password(self, value, row):
401	if value == "not set":
402	return ""
403	return value
404
405	@classmethod
406	def nationality(self, value, row):
407	if value == "nigeria":
408	return "NG"
409	if value == "niger":
410	return "NE"
411	return value
412
413	@classmethod
414	def sittype(self, value, row):
415	if value == "nabtec":
416	return "nabteb"
417	return value
418
419	@classmethod
420	def company(self, value, row):
421	if value == "online":
422	return "interswitch"
423	return value
424
425	@classmethod
426	def p_category(self, value, row):
427	if value == "acceptance":
428	return "clearance"
429	return value
430
431	@classmethod
432	def email(self, value, row):
433	return value.strip()
434
435	@classmethod
436	def phone(self, value, row):
437	""" '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
438
439	Dashes and slashes are removed before looking for sequences
440	of numbers.
441	"""
442	if not value:
443	return
444	value = value.strip('#')
445	value = value.replace('-', '')
446	value = value.replace('/', '')
447	match = RE_PHONE.match(value)
448	phone = match.groups()[0]
449	if value.startswith('234'):
450	value = '+' + value[:3] + '-' + value[3:]
451	else:
452	value = '-%s' % phone
453	return value + '#'
454
455	@classmethod
456	def result(self, value, row):
457	try:
458	liste = eval(value)
459	except:
460	return
461	if isinstance(liste,list):
462	return [(i[0].lower(),i[1]) for i in liste]
463	return
464
465
466	def main():
467	input_file = '%s' % sys.argv[1]
468	output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
469	reader = csv.DictReader(open(input_file, 'rb'))
470	writer = None
471
472	for num, row in enumerate(reader):
473	if num == 0:
474	writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
475	print "FIELDS: "
476	for x, y in enumerate(reader.fieldnames):
477	print x, y
478	header = convert_fieldnames(reader.fieldnames)
479	writer.writerow(header)
480	if row.get('reg_state') == 'student_created':
481	# We do not reimport student records which have never been accessed.
482	continue
483	if row.get('status') == 'started':
484	# We do not reimport started payments.
485	continue
486	for key, value in row.items():
487	# Remove unwanted whitespaces.
488	row[key] = row[key].strip()
489	if not key in OPTIONS.keys():
490	continue
491	conv_name = OPTIONS[key]
492	converter = getattr(Converters, conv_name, None)
493	if converter is None:
494	print "WARNING: cannot find converter %s" % conv_name
495	continue
496	row[key] = converter(row[key], row)
497	try:
498	writer.writerow(row)
499	except:
500	print row['student_id']
501
502	print "Output written to %s" % output_file
503
504
505	if __name__ == '__main__':
506	if len(sys.argv) != 2:
507	print 'Usage: %s <filename>' % __file__
508	sys.exit(1)
509	main()

Note: See TracBrowser for help on using the repository browser.

Download in other formats: