Context navigation

source: main/waeup.kofa/trunk/tools/fix_import_file.py @ 9270

Last change on this file since 9270 was 9270, checked in by Henrik Bettermann, 12 years ago
Update fix_import_file.py.
Property svn:keywords set to `Id`
File size: 14.1 KB

Line
1	## $Id: fix_import_file.py 9270 2012-10-02 08:24:05Z henrik $
2	##
3	## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
4	## This program is free software; you can redistribute it and/or modify
5	## it under the terms of the GNU General Public License as published by
6	## the Free Software Foundation; either version 2 of the License, or
7	## (at your option) any later version.
8	##
9	## This program is distributed in the hope that it will be useful,
10	## but WITHOUT ANY WARRANTY; without even the implied warranty of
11	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	## GNU General Public License for more details.
13	##
14	## You should have received a copy of the GNU General Public License
15	## along with this program; if not, write to the Free Software
16	## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17	##
18	"""
19	Fix exports from old SRP portal and other data sources to make
20	them importable by current portal.
21
22	Usage:
23
24	Change into this directory, set the options below (files are assumed
25	to be in the same directory) and then run
26
27	python fix_import_file.py <filename>
28
29	Errors/warnings will be displayed on the shell, the output will be put
30	into the specified output file.
31
32
33	The lgas.py module must be copied into the same folder where this script
34	is started.
35	"""
36	import csv
37	import datetime
38	import re
39	import sys
40
41	try:
42	from lgas import LGAS
43	except:
44	print 'ERROR: lgas.py is missing.'
45	sys.exit(1)
46
47	def strip(string):
48	string = string.replace('_', '')
49	string = string.replace('/', '')
50	string = string.replace('-', '')
51	string = string.replace(' ', '')
52	string = string.lower()
53	return string
54
55	LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
56	LGAS_dict = dict(LGAS)
57
58	##
59	## CONFIGURATION SECTION
60	##
61	# keys are fieldnames in input file, values are methods of class
62	# Converter (see below)
63	OPTIONS = {
64	'student_id': 'student_id',
65	'sex': 'gender',
66	'birthday': 'date',
67	'marit_stat': 'marit_stat',
68	'session': 'session',
69	'entry_session': 'session',
70	'current_session': 'session',
71	'session_id': 'session',
72	'entry_mode': 'mode',
73	'reg_state': 'reg_state',
74	'password': 'password',
75	'phone': 'phone',
76	'nationality': 'nationality',
77	'level': 'level',
78	'start_level': 'level',
79	'end_level': 'level',
80	'level_id': 'level',
81	'current_level': 'level',
82	'semester': 'semester',
83	'application_category': 'application_category',
84	'lga': 'lga',
85	'order_id': 'no_int',
86	'uniben': 'former',
87	'nysc_year': 'year',
88	'alr_date': 'date',
89	'fst_sit_date': 'date',
90	'scd_sit_date': 'date',
91	'emp_start': 'date',
92	'emp_end': 'date',
93	'emp_start2': 'date',
94	'emp_end2': 'date',
95	'fst_sit_results': 'result',
96	'scd_sit_results': 'result',
97	'alr_results': 'result',
98	'email': 'email',
99	'fst_sit_type': 'sittype',
100	'scd_sit_type': 'sittype',
101	'resp_pay_reference': 'no_int',
102	'type': 'company',
103	'date': 'date',
104	'core_or_elective': 'bool',
105	'category': 'p_category',
106	'reg_transition': 'reg_state', # we completely change this column,
107	# since reg_state import is usually intended
108	'transition': 'reg_transition',
109	'payment_date': 'date',
110	'validation_date': 'date',
111	}
112
113	# Mapping input file colnames --> output file colnames
114	COLNAME_MAPPING = {
115	# base data
116	'id': 'student_id',
117	'reg_state': 'state',
118	'reg_transition': 'state',
119	'jamb_reg_no': 'reg_number',
120	'matric_no': 'matric_number',
121	'birthday': 'date_of_birth',
122	'clr_ac_pin': 'clr_code',
123	# clearance
124	'hq_grade': 'hq_degree',
125	'uniben': 'former_matric',
126	'hq_type2': 'hq2_type',
127	'hq_grade2': 'hq2_degree',
128	'hq_school2': 'hq2_school',
129	'hq_matric_no2': 'hq2_matric_no',
130	'hq_session2': 'hq2_session',
131	'hq_disc2': 'hq2_disc',
132	'emp': 'employer',
133	'emp2': 'employer2',
134	'emp_position2': 'emp2_position',
135	'emp_start2': 'emp2_start',
136	'emp_end2': 'emp2_end',
137	'emp_reason2': 'emp2_reason',
138	# study course
139	'study_course': 'certificate',
140	# study level
141	'session': 'level_session',
142	'verdict': 'level_verdict',
143	# course ticket
144	'level_id': 'level',
145	'core_or_elective': 'mandatory',
146	# payment ticket
147	'order_id': 'p_id',
148	'status': 'p_state',
149	'category': 'p_category',
150	'resp_pay_reference': 'r_pay_reference',
151	'resp_desc': 'r_desc',
152	'resp_approved_amount': 'r_amount_approved',
153	'item': 'p_item',
154	'amount': 'amount_auth',
155	'resp_card_num': 'r_card_num',
156	'resp_code': 'r_code',
157	'date': 'creation_date',
158	'surcharge': 'surcharge_1',
159	'session_id': 'p_session',
160	'type': 'r_company',
161	}
162
163	# Mapping input state --> output state
164	REGSTATE_MAPPING = {
165	'student_created': 'created',
166	'admitted': 'admitted',
167	'objection_raised': 'clearance started',
168	'clearance_pin_entered': 'clearance started',
169	'clearance_requested': 'clearance requested',
170	'cleared_and_validated': 'cleared',
171	'school_fee_paid': 'school fee paid',
172	'returning': 'returning',
173	'courses_registered': 'courses registered',
174	'courses_validated': 'courses validated',
175	'admit': 'admitted',
176	'return': 'returning'
177	}
178
179	##
180	## END OF CONFIG
181	##
182
183	# Look for the first sequence of numbers
184	RE_PHONE = re.compile('[^\d](\d)[^\d]*')
185
186	def convert_fieldnames(fieldnames):
187	"""Replace input fieldnames by fieldnames of COLNAME_MAPPING.
188	"""
189	# Remove whitespaces
190	header = dict([(name, name.strip()) for name in fieldnames])
191	for in_name, out_name in COLNAME_MAPPING.items():
192	if in_name not in header.values():
193	continue
194	# Inverse dictionary lookup
195	key = [key for key,value in header.items() if value==in_name][0]
196	header[key] = out_name
197	return header
198
199	class Converters():
200	"""Converters to turn old-style values into new ones.
201	"""
202
203	@classmethod
204	def student_id(self, value, row):
205	""" 'A123456' --> 'EA123456'
206	"""
207	if len(value) == 7:
208	return 'M' + value
209	return value
210
211	@classmethod
212	def reg_state(self, value, row):
213	""" 'courses_validated' --> 'courses validated'
214	"""
215	return REGSTATE_MAPPING.get(value,value)
216
217	@classmethod
218	def reg_transition(self, value, row):
219	if value == "admitted":
220	return "admit"
221	if value == "returning":
222	return "return"
223	return value
224
225	@classmethod
226	def level(self, value, row):
227	""" '000' --> '10'
228	'800' --> '999' if pg student
229	"""
230	try:
231	number = int(value)
232	except ValueError:
233	return 9999
234	if number == 0:
235	return 10
236	if row.get('entry_mode') and row.get('entry_mode').startswith('pg'):
237	return 999
238	return number
239
240	@classmethod
241	def semester(self, value, row):
242	""" '0' --> '9'
243	"""
244	try:
245	number = int(value)
246	except ValueError:
247	return 9999
248	if number == 0:
249	return 9
250	return number
251
252	@classmethod
253	def application_category(self, value, row):
254	""" '' --> 'no'
255	"""
256	if value == '':
257	return 'no'
258	return value
259
260	@classmethod
261	def lga(self, value, row):
262	""" Remove apostrophe
263	"""
264	if value == 'akwa_ibom_uru_offong_oruko':
265	return 'akwa_ibom_urue-offong-oruko'
266	if value == 'edo_ohionmwon':
267	return 'edo_orhionmwon'
268
269	if value == 'nassarawa_nassarawa':
270	return 'nassarawa_nassawara'
271
272	if value == 'kogi_mopa-muro-mopi':
273	return 'kogi_mopa-muro'
274
275	if value == 'delta_osimili-north':
276	return 'delta_oshielli-north'
277
278	if value == 'delta_osimili':
279	return 'delta_oshimili'
280
281	if value == 'delta_osimili-south':
282	return 'delta_oshimili-south'
283	try:
284	value = value.replace("'","")
285	except:
286	return ''
287	lower = value.lower()
288	if lower in LGAS_dict.keys():
289	return lower
290	# If real names are given, let's see if a similar value
291	# in LGAS exist.
292	value = LGAS_inverted_stripped.get(strip(lower), value)
293	return value
294
295
296	@classmethod
297	def session(self, value, row):
298	""" '08' --> '2008'
299	'2008/2009' --> '2008'
300	"""
301	if '/' in value:
302	numbers = value.split('/')
303	number = int(numbers[0])
304	if number in range(2000,2015):
305	return number
306	else:
307	return 9999
308	try:
309	number = int(value)
310	except ValueError:
311	return 9999
312	if number < 14:
313	return number + 2000
314	elif number in range(2000,2015):
315	return number
316	else:
317	return 9999
318
319	@classmethod
320	def former(self, value, row):
321	""" True --> yes
322	'2008/2009' --> '2008'
323	"""
324	if value == 'True':
325	return 'yes'
326	return
327
328	@classmethod
329	def bool(self, value, row):
330	""" True --> 1
331	"""
332	if value in ('TRUE', 'True'):
333	return '1'
334	elif value in ('FALSE', 'False'):
335	return '0'
336	return
337
338	@classmethod
339	def year(self, value, row):
340	""" '0' --> ''
341	"""
342	if value == '0':
343	return
344	if value == 'None':
345	return
346	return value
347
348
349	@classmethod
350	def marit_stat(self, value, row):
351	""" 'True'/'False' --> 'married'/'unmarried'
352	"""
353	if value in ('True','married'):
354	value = 'married'
355	elif value in ('False','unmarried'):
356	value = 'unmarried'
357	else:
358	value = ''
359	return value
360
361	@classmethod
362	def gender(self, value, row):
363	""" 'True'/'False' --> 'f'/'m'
364	"""
365	if value.strip() in ('F', 'True','f'):
366	value = 'f'
367	elif value.strip() in ('M', 'False','m'):
368	value = 'm'
369	else:
370	value = ''
371	return value
372
373	@classmethod
374	def date(self, value, row):
375	""" 'yyyy/mm/dd' --> 'yyyy-mm-dd'
376	"""
377	if value == "None":
378	value = ""
379	elif value == "":
380	value = ""
381	else:
382	value = value.replace('/', '-')
383	# We add the hash symbol to avoid automatic date transformation
384	# in Excel and Calc for further processing
385	value += '#'
386	return value
387
388	@classmethod
389	def no_int(self, value, row):
390	""" Add hash and skip numbers starting with 999999
391	"""
392	# We add the hash symbol to avoid automatic number transformation
393	# in Excel and Calc for further processing
394	try:
395	intvalue = int(value)
396	value += '#'
397	except:
398	pass
399	if value.startswith('999999'):
400	return
401	return value
402
403	@classmethod
404	def mode(self, value, row):
405	if value == "transfer_fulltime":
406	return "transfer_ft"
407	if value == "ume_ft":
408	return "utme_ft"
409	return value
410
411	@classmethod
412	def password(self, value, row):
413	if value == "not set":
414	return ""
415	return value
416
417	@classmethod
418	def nationality(self, value, row):
419	if value in ('nigeria', 'Nigeria'):
420	return "NG"
421	if value in ('niger', 'Niger'):
422	return "NE"
423	return value
424
425	@classmethod
426	def sittype(self, value, row):
427	if value == "nabtec":
428	return "nabteb"
429	return value
430
431	@classmethod
432	def company(self, value, row):
433	if value == "online":
434	return "interswitch"
435	return value
436
437	@classmethod
438	def p_category(self, value, row):
439	if value == "acceptance":
440	return "clearance"
441	return value
442
443	@classmethod
444	def email(self, value, row):
445	return value.strip()
446
447	@classmethod
448	def phone(self, value, row):
449	""" '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
450
451	Dashes and slashes are removed before looking for sequences
452	of numbers.
453	"""
454	if not value:
455	return
456	value = value.strip('#')
457	value = value.replace('-', '')
458	value = value.replace('/', '')
459	match = RE_PHONE.match(value)
460	phone = match.groups()[0]
461	if value.startswith('234'):
462	value = '+' + value[:3] + '-' + value[3:]
463	else:
464	value = '-%s' % phone
465	return value + '#'
466
467	@classmethod
468	def result(self, value, row):
469	try:
470	liste = eval(value)
471	except:
472	return
473	if isinstance(liste,list):
474	return [(i[0].lower(),i[1]) for i in liste]
475	return
476
477
478	def main():
479	input_file = '%s' % sys.argv[1]
480	output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
481	reader = csv.DictReader(open(input_file, 'rb'))
482	writer = None
483
484	for num, row in enumerate(reader):
485	if num == 0:
486	writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
487	print "FIELDS: "
488	for x, y in enumerate(reader.fieldnames):
489	print x, y
490	header = convert_fieldnames(reader.fieldnames)
491	writer.writerow(header)
492	if row.get('reg_state') == 'student_created':
493	# We do not reimport student records which have never been accessed.
494	continue
495	if row.get('status') == 'started':
496	# We do not reimport started payments.
497	continue
498	for key, value in row.items():
499	# Remove unwanted whitespaces.
500	row[key] = row[key].strip()
501	if not key in OPTIONS.keys():
502	continue
503	conv_name = OPTIONS[key]
504	converter = getattr(Converters, conv_name, None)
505	if converter is None:
506	print "WARNING: cannot find converter %s" % conv_name
507	continue
508	row[key] = converter(row[key], row)
509	try:
510	writer.writerow(row)
511	except:
512	print row['student_id']
513
514	print "Output written to %s" % output_file
515
516
517	if __name__ == '__main__':
518	if len(sys.argv) != 2:
519	print 'Usage: %s <filename>' % __file__
520	sys.exit(1)
521	main()

Note: See TracBrowser for help on using the repository browser.

Download in other formats: