Context navigation

source: main/waeup.kofa/branches/uli-async-update/tools/fix_import_file.py @ 9174

Last change on this file since 9174 was 9169, checked in by uli, 12 years ago
Merge changes from trunk, r8786-HEAD
Property svn:keywords set to `Id`
File size: 13.9 KB

Line
1	## $Id: fix_import_file.py 9169 2012-09-10 11:05:07Z uli $
2	##
3	## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
4	## This program is free software; you can redistribute it and/or modify
5	## it under the terms of the GNU General Public License as published by
6	## the Free Software Foundation; either version 2 of the License, or
7	## (at your option) any later version.
8	##
9	## This program is distributed in the hope that it will be useful,
10	## but WITHOUT ANY WARRANTY; without even the implied warranty of
11	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	## GNU General Public License for more details.
13	##
14	## You should have received a copy of the GNU General Public License
15	## along with this program; if not, write to the Free Software
16	## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17	##
18	"""
19	Fix exports from old SRP portal and other data sources to make
20	them importable by current portal.
21
22	Usage:
23
24	Change into this directory, set the options below (files are assumed
25	to be in the same directory) and then run
26
27	python fix_import_file.py <filename>
28
29	Errors/warnings will be displayed on the shell, the output will be put
30	into the specified output file.
31
32
33	The lgas.py module must be copied into the same folder where this script
34	is started.
35	"""
36	import csv
37	import datetime
38	import re
39	import sys
40
41	try:
42	from lgas import LGAS
43	except:
44	print 'ERROR: lgas.py is missing.'
45	sys.exit(1)
46
47	def strip(string):
48	string = string.replace('_', '')
49	string = string.replace('/', '')
50	string = string.replace('-', '')
51	string = string.replace(' ', '')
52	string = string.lower()
53	return string
54
55	LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
56	LGAS_dict = dict(LGAS)
57
58	##
59	## CONFIGURATION SECTION
60	##
61	# keys are fieldnames in input file, values are methods of class
62	# Converter (see below)
63	OPTIONS = {
64	'student_id': 'student_id',
65	'sex': 'gender',
66	'birthday': 'date',
67	'marit_stat': 'marit_stat',
68	'session': 'session',
69	'entry_session': 'session',
70	'current_session': 'session',
71	'session_id': 'session',
72	'entry_mode': 'mode',
73	'reg_state': 'reg_state',
74	'password': 'password',
75	'phone': 'phone',
76	'nationality': 'nationality',
77	'level': 'level',
78	'start_level': 'level',
79	'end_level': 'level',
80	'level_id': 'level',
81	'current_level': 'level',
82	'semester': 'semester',
83	'application_category': 'application_category',
84	'lga': 'lga',
85	'order_id': 'no_int',
86	'uniben': 'former',
87	'nysc_year': 'year',
88	'alr_date': 'date',
89	'fst_sit_date': 'date',
90	'scd_sit_date': 'date',
91	'emp_start': 'date',
92	'emp_end': 'date',
93	'emp_start2': 'date',
94	'emp_end2': 'date',
95	'fst_sit_results': 'result',
96	'scd_sit_results': 'result',
97	'alr_results': 'result',
98	'email': 'email',
99	'fst_sit_type': 'sittype',
100	'scd_sit_type': 'sittype',
101	'resp_pay_reference': 'no_int',
102	'type': 'company',
103	'date': 'date',
104	'category': 'p_category',
105	'reg_transition': 'reg_state', # we completely change this column,
106	# since reg_state import is usually intended
107	'transition': 'reg_transition',
108	'payment_date': 'date',
109	'validation_date': 'date',
110	}
111
112	# Mapping input file colnames --> output file colnames
113	COLNAME_MAPPING = {
114	# base data
115	'id': 'student_id',
116	'reg_state': 'state',
117	'reg_transition': 'state',
118	'jamb_reg_no': 'reg_number',
119	'matric_no': 'matric_number',
120	'birthday': 'date_of_birth',
121	'clr_ac_pin': 'clr_code',
122	# clearance
123	'hq_grade': 'hq_degree',
124	'uniben': 'former_matric',
125	'hq_type2': 'hq2_type',
126	'hq_grade2': 'hq2_degree',
127	'hq_school2': 'hq2_school',
128	'hq_matric_no2': 'hq2_matric_no',
129	'hq_session2': 'hq2_session',
130	'hq_disc2': 'hq2_disc',
131	'emp': 'employer',
132	'emp2': 'employer2',
133	'emp_position2': 'emp2_position',
134	'emp_start2': 'emp2_start',
135	'emp_end2': 'emp2_end',
136	'emp_reason2': 'emp2_reason',
137	# study course
138	'study_course': 'certificate',
139	# study level
140	'session': 'level_session',
141	'verdict': 'level_verdict',
142	# course ticket
143	'level_id': 'level',
144	'core_or_elective': 'mandatory',
145	# payment ticket
146	'order_id': 'p_id',
147	'status': 'p_state',
148	'category': 'p_category',
149	'resp_pay_reference': 'r_pay_reference',
150	'resp_desc': 'r_desc',
151	'resp_approved_amount': 'r_amount_approved',
152	'item': 'p_item',
153	'amount': 'amount_auth',
154	'resp_card_num': 'r_card_num',
155	'resp_code': 'r_code',
156	'date': 'creation_date',
157	'surcharge': 'surcharge_1',
158	'session_id': 'p_session',
159	'type': 'r_company',
160	}
161
162	# Mapping input state --> output state
163	REGSTATE_MAPPING = {
164	'student_created': 'created',
165	'admitted': 'admitted',
166	'objection_raised': 'clearance started',
167	'clearance_pin_entered': 'clearance started',
168	'clearance_requested': 'clearance requested',
169	'cleared_and_validated': 'cleared',
170	'school_fee_paid': 'school fee paid',
171	'returning': 'returning',
172	'courses_registered': 'courses registered',
173	'courses_validated': 'courses validated',
174	'admit': 'admitted',
175	'return': 'returning'
176	}
177
178	##
179	## END OF CONFIG
180	##
181
182	# Look for the first sequence of numbers
183	RE_PHONE = re.compile('[^\d](\d)[^\d]*')
184
185	def convert_fieldnames(fieldnames):
186	"""Replace input fieldnames by fieldnames of COLNAME_MAPPING.
187	"""
188	# Remove whitespaces
189	header = dict([(name, name.strip()) for name in fieldnames])
190	for in_name, out_name in COLNAME_MAPPING.items():
191	if in_name not in header.values():
192	continue
193	# Inverse dictionary lookup
194	key = [key for key,value in header.items() if value==in_name][0]
195	header[key] = out_name
196	return header
197
198	class Converters():
199	"""Converters to turn old-style values into new ones.
200	"""
201
202	@classmethod
203	def student_id(self, value, row):
204	""" 'A123456' --> 'EA123456'
205	"""
206	if len(value) == 7:
207	return 'B' + value
208	return value
209
210	@classmethod
211	def reg_state(self, value, row):
212	""" 'courses_validated' --> 'courses validated'
213	"""
214	return REGSTATE_MAPPING.get(value,value)
215
216	@classmethod
217	def reg_transition(self, value, row):
218	if value == "admitted":
219	return "admit"
220	if value == "returning":
221	return "return"
222	return value
223
224	@classmethod
225	def level(self, value, row):
226	""" '000' --> '10'
227	'800' --> '999' if pg student
228	"""
229	try:
230	number = int(value)
231	except ValueError:
232	return 9999
233	if number == 0:
234	return 10
235	if row.get('entry_mode') and row.get('entry_mode').startswith('pg'):
236	return 999
237	return number
238
239	@classmethod
240	def semester(self, value, row):
241	""" '0' --> '9'
242	"""
243	try:
244	number = int(value)
245	except ValueError:
246	return 9999
247	if number == 0:
248	return 9
249	return number
250
251	@classmethod
252	def application_category(self, value, row):
253	""" '' --> 'no'
254	"""
255	if value == '':
256	return 'no'
257	return value
258
259	@classmethod
260	def lga(self, value, row):
261	""" Remove apostrophe
262	"""
263	if value == 'akwa_ibom_uru_offong_oruko':
264	return 'akwa_ibom_urue-offong-oruko'
265	if value == 'edo_ohionmwon':
266	return 'edo_orhionmwon'
267
268	if value == 'nassarawa_nassarawa':
269	return 'nassarawa_nassawara'
270
271	if value == 'kogi_mopa-muro-mopi':
272	return 'kogi_mopa-muro'
273
274	if value == 'delta_osimili-north':
275	return 'delta_oshielli-north'
276
277	if value == 'delta_osimili':
278	return 'delta_oshimili'
279
280	if value == 'delta_osimili-south':
281	return 'delta_oshimili-south'
282	try:
283	value = value.replace("'","")
284	except:
285	return ''
286	lower = value.lower()
287	if lower in LGAS_dict.keys():
288	return lower
289	# If real names are given, let's see if a similar value
290	# in LGAS exist.
291	value = LGAS_inverted_stripped.get(strip(lower), value)
292	return value
293
294
295	@classmethod
296	def session(self, value, row):
297	""" '08' --> '2008'
298	'2008/2009' --> '2008'
299	"""
300	if '/' in value:
301	numbers = value.split('/')
302	number = int(numbers[0])
303	if number in range(2000,2015):
304	return number
305	else:
306	return 9999
307	try:
308	number = int(value)
309	except ValueError:
310	#import pdb; pdb.set_trace()
311	return 9999
312	if number < 14:
313	return number + 2000
314	elif number in range(2000,2015):
315	return number
316	else:
317	return 9999
318
319	@classmethod
320	def former(self, value, row):
321	""" True --> yes
322	'2008/2009' --> '2008'
323	"""
324	if value == 'True':
325	return 'yes'
326	return
327
328	@classmethod
329	def year(self, value, row):
330	""" '0' --> ''
331	"""
332	if value == '0':
333	return
334	if value == 'None':
335	return
336	return value
337
338
339	@classmethod
340	def marit_stat(self, value, row):
341	""" 'True'/'False' --> 'married'/'unmarried'
342	"""
343	if value in ('True','married'):
344	value = 'married'
345	elif value in ('False','unmarried'):
346	value = 'unmarried'
347	else:
348	value = ''
349	return value
350
351	@classmethod
352	def gender(self, value, row):
353	""" 'True'/'False' --> 'f'/'m'
354	"""
355	if value.strip() in ('F', 'True','f'):
356	value = 'f'
357	elif value.strip() in ('M', 'False','m'):
358	value = 'm'
359	else:
360	value = ''
361	return value
362
363	@classmethod
364	def date(self, value, row):
365	""" 'yyyy/mm/dd' --> 'yyyy-mm-dd'
366	"""
367	if value == "None":
368	value = ""
369	elif value == "":
370	value = ""
371	else:
372	value = value.replace('/', '-')
373	# We add the hash symbol to avoid automatic date transformation
374	# in Excel and Calc for further processing
375	value += '#'
376	return value
377
378	@classmethod
379	def no_int(self, value, row):
380	""" Add hash and skip numbers starting with 999999
381	"""
382	# We add the hash symbol to avoid automatic number transformation
383	# in Excel and Calc for further processing
384	try:
385	intvalue = int(value)
386	value += '#'
387	except:
388	pass
389	if value.startswith('999999'):
390	return
391	return value
392
393	@classmethod
394	def mode(self, value, row):
395	if value == "transfer_fulltime":
396	return "transfer_ft"
397	if value == "ume_ft":
398	return "utme_ft"
399	return value
400
401	@classmethod
402	def password(self, value, row):
403	if value == "not set":
404	return ""
405	return value
406
407	@classmethod
408	def nationality(self, value, row):
409	if value == "nigeria":
410	return "NG"
411	if value == "niger":
412	return "NE"
413	return value
414
415	@classmethod
416	def sittype(self, value, row):
417	if value == "nabtec":
418	return "nabteb"
419	return value
420
421	@classmethod
422	def company(self, value, row):
423	if value == "online":
424	return "interswitch"
425	return value
426
427	@classmethod
428	def p_category(self, value, row):
429	if value == "acceptance":
430	return "clearance"
431	return value
432
433	@classmethod
434	def email(self, value, row):
435	return value.strip()
436
437	@classmethod
438	def phone(self, value, row):
439	""" '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
440
441	Dashes and slashes are removed before looking for sequences
442	of numbers.
443	"""
444	if not value:
445	return
446	value = value.strip('#')
447	value = value.replace('-', '')
448	value = value.replace('/', '')
449	match = RE_PHONE.match(value)
450	phone = match.groups()[0]
451	if value.startswith('234'):
452	value = '+' + value[:3] + '-' + value[3:]
453	else:
454	value = '-%s' % phone
455	return value + '#'
456
457	@classmethod
458	def result(self, value, row):
459	try:
460	liste = eval(value)
461	except:
462	return
463	if isinstance(liste,list):
464	return [(i[0].lower(),i[1]) for i in liste]
465	return
466
467
468	def main():
469	input_file = '%s' % sys.argv[1]
470	output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
471	reader = csv.DictReader(open(input_file, 'rb'))
472	writer = None
473
474	for num, row in enumerate(reader):
475	if num == 0:
476	writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
477	print "FIELDS: "
478	for x, y in enumerate(reader.fieldnames):
479	print x, y
480	header = convert_fieldnames(reader.fieldnames)
481	writer.writerow(header)
482	if row.get('reg_state') == 'student_created':
483	# We do not reimport student records which have never been accessed.
484	continue
485	if row.get('status') == 'started':
486	# We do not reimport started payments.
487	continue
488	for key, value in row.items():
489	# Remove unwanted whitespaces.
490	row[key] = row[key].strip()
491	if not key in OPTIONS.keys():
492	continue
493	conv_name = OPTIONS[key]
494	converter = getattr(Converters, conv_name, None)
495	if converter is None:
496	print "WARNING: cannot find converter %s" % conv_name
497	continue
498	row[key] = converter(row[key], row)
499	try:
500	writer.writerow(row)
501	except:
502	print row['student_id']
503
504	print "Output written to %s" % output_file
505
506
507	if __name__ == '__main__':
508	if len(sys.argv) != 2:
509	print 'Usage: %s <filename>' % __file__
510	sys.exit(1)
511	main()

Note: See TracBrowser for help on using the repository browser.

Download in other formats: