Context navigation

fix_import_file.py @ 13599

Last change on this file since 13599 was 9478, checked in by Henrik Bettermann, 12 years ago
Add converter for amounts and other things.
Property svn:keywords set to `Id`
File size: 15.5 KB

Line
1	## $Id: fix_import_file.py 9478 2012-10-30 20:19:06Z henrik $
2	##
3	## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
4	## This program is free software; you can redistribute it and/or modify
5	## it under the terms of the GNU General Public License as published by
6	## the Free Software Foundation; either version 2 of the License, or
7	## (at your option) any later version.
8	##
9	## This program is distributed in the hope that it will be useful,
10	## but WITHOUT ANY WARRANTY; without even the implied warranty of
11	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	## GNU General Public License for more details.
13	##
14	## You should have received a copy of the GNU General Public License
15	## along with this program; if not, write to the Free Software
16	## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17	##
18	"""
19	Fix exports from old SRP portal and other data sources to make
20	them importable by current portal.
21
22	Usage:
23
24	Change into this directory, set the options below (files are assumed
25	to be in the same directory) and then run
26
27	python fix_import_file.py <filename>
28
29	Errors/warnings will be displayed on the shell, the output will be put
30	into the specified output file.
31
32
33	The lgas.py module must be copied into the same folder where this script
34	is started.
35	"""
36	import csv
37	import datetime
38	import os
39	import re
40	import sys
41
42	try:
43	from lgas import LGAS
44	except:
45	print 'ERROR: lgas.py is missing.'
46	sys.exit(1)
47
48	def strip(string):
49	string = string.replace('_', '')
50	string = string.replace('/', '')
51	string = string.replace('-', '')
52	string = string.replace(' ', '')
53	string = string.lower()
54	return string
55
56	LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
57	LGAS_dict = dict(LGAS)
58
59	##
60	## CONFIGURATION SECTION
61	##
62	# keys are fieldnames in input file, values are methods of class
63	# Converter (see below)
64	OPTIONS = {
65	'student_id': 'student_id',
66	'sex': 'gender',
67	'birthday': 'date',
68	'marit_stat': 'marit_stat',
69	'session': 'session',
70	'entry_session': 'session',
71	'current_session': 'session',
72	'session_id': 'session',
73	'entry_mode': 'mode',
74	'reg_state': 'reg_state',
75	'password': 'password',
76	'phone': 'phone',
77	'nationality': 'nationality',
78	'level': 'level',
79	'start_level': 'level',
80	'end_level': 'level',
81	'level_id': 'level',
82	'current_level': 'level',
83	'semester': 'semester',
84	'application_category': 'application_category',
85	'lga': 'lga',
86	'order_id': 'no_int',
87	'uniben': 'former',
88	'nysc_year': 'year',
89	'alr_date': 'date',
90	'fst_sit_date': 'date',
91	'scd_sit_date': 'date',
92	'emp_start': 'date',
93	'emp_end': 'date',
94	'emp_start2': 'date',
95	'emp_end2': 'date',
96	'fst_sit_results': 'result',
97	'scd_sit_results': 'result',
98	'alr_results': 'result',
99	'email': 'email',
100	'fst_sit_type': 'sittype',
101	'scd_sit_type': 'sittype',
102	'resp_pay_reference': 'no_int',
103	'type': 'company',
104	'date': 'date',
105	'core_or_elective': 'bool',
106	'category': 'p_category',
107	'reg_transition': 'reg_state', # we completely change this column,
108	# since reg_state import is usually intended
109	'transition': 'reg_transition',
110	'payment_date': 'date',
111	'validation_date': 'date',
112	'resp_approved_amount': 'amount',
113	'amount': 'amount',
114	'amount_auth': 'amount',
115	'surcharge': 'amount'
116	}
117
118	# Mapping input file colnames --> output file colnames
119	COLNAME_MAPPING = {
120	# base data
121	'id': 'student_id',
122	'reg_state': 'state',
123	'reg_transition': 'state',
124	'jamb_reg_no': 'reg_number',
125	'matric_no': 'matric_number',
126	'birthday': 'date_of_birth',
127	'clr_ac_pin': 'clr_code',
128	# clearance
129	'hq_grade': 'hq_degree',
130	'uniben': 'former_matric',
131	'hq_type2': 'hq2_type',
132	'hq_grade2': 'hq2_degree',
133	'hq_school2': 'hq2_school',
134	'hq_matric_no2': 'hq2_matric_no',
135	'hq_session2': 'hq2_session',
136	'hq_disc2': 'hq2_disc',
137	'emp': 'employer',
138	'emp2': 'employer2',
139	'emp_position2': 'emp2_position',
140	'emp_start2': 'emp2_start',
141	'emp_end2': 'emp2_end',
142	'emp_reason2': 'emp2_reason',
143	# study course
144	'study_course': 'certificate',
145	# study level
146	'session': 'level_session',
147	'verdict': 'level_verdict',
148	# course ticket
149	'level_id': 'level',
150	'core_or_elective': 'mandatory',
151	# payment ticket
152	'order_id': 'p_id',
153	'status': 'p_state',
154	'category': 'p_category',
155	'resp_pay_reference': 'r_pay_reference',
156	'resp_desc': 'r_desc',
157	'resp_approved_amount': 'r_amount_approved',
158	'item': 'p_item',
159	'amount': 'amount_auth',
160	'resp_card_num': 'r_card_num',
161	'resp_code': 'r_code',
162	'date': 'creation_date',
163	'surcharge': 'surcharge_1',
164	'session_id': 'p_session',
165	'type': 'r_company',
166	'old_id': 'old_id',
167	}
168
169	# Mapping input state --> output state
170	REGSTATE_MAPPING = {
171	'student_created': 'created',
172	'admitted': 'admitted',
173	'objection_raised': 'clearance started',
174	'clearance_pin_entered': 'clearance started',
175	'clearance_requested': 'clearance requested',
176	'cleared_and_validated': 'cleared',
177	'school_fee_paid': 'school fee paid',
178	'returning': 'returning',
179	'courses_registered': 'courses registered',
180	'courses_validated': 'courses validated',
181	'admit': 'admitted',
182	'return': 'returning'
183	}
184
185	# Mapping of special cases, where new id is not deductible from old id
186	# Set to `None`, if no such special cases should be considered.
187	ID_MAP_CSV = None
188	ID_MAP_CSV = "id_mapping.csv"
189
190	##
191	## END OF CONFIG
192	##
193
194	# Look for the first sequence of numbers
195	RE_PHONE = re.compile('[^\d](\d)[^\d]*')
196
197	def get_id_mapping():
198	"""Returns a dict mapping from old (SRP) ids to new ids.
199
200	The dict is read from ID_MAP_CSV file. If this var is set to
201	``None`` an empty dict is returned. The ID_MAP_CSV contains only
202	the student ids of those students, for which the standard method
203	(new_id=CHAR+old_id) does not work.
204	"""
205	if ID_MAP_CSV is None:
206	return {}
207	if not os.path.isfile(ID_MAP_CSV):
208	raise IOError(
209	"No such file for mapping old to new ids: %s" % ID_MAP_CSV)
210	result = dict()
211	reader = csv.DictReader(open(ID_MAP_CSV, 'rb'))
212	for row in reader:
213	result[row['student_id']] = row['new_id']
214	return result
215
216
217	def convert_fieldnames(fieldnames):
218	"""Replace input fieldnames by fieldnames of COLNAME_MAPPING.
219	"""
220	# Remove whitespaces
221	header = dict([(name, name.strip()) for name in fieldnames])
222	for in_name, out_name in COLNAME_MAPPING.items():
223	if in_name not in header.values():
224	continue
225	# Inverse dictionary lookup
226	key = [key for key,value in header.items() if value==in_name][0]
227	header[key] = out_name
228	return header
229
230	class Converters():
231	"""Converters to turn old-style values into new ones.
232	"""
233
234	old_new_id_map = get_id_mapping()
235
236	@classmethod
237	def student_id(cls, value, row):
238	""" 'A123456' --> 'EA123456'
239	"""
240	value = cls.old_new_id_map.get(value, value)
241	if len(value) == 7:
242	return 'B' + value
243	return value
244
245	@classmethod
246	def reg_state(self, value, row):
247	""" 'courses_validated' --> 'courses validated'
248	"""
249	return REGSTATE_MAPPING.get(value,value)
250
251	@classmethod
252	def reg_transition(self, value, row):
253	if value == "admitted":
254	return "admit"
255	if value == "returning":
256	return "return"
257	return value
258
259	@classmethod
260	def level(self, value, row):
261	""" '000' --> '10'
262	'800' --> '999' if pg student
263	"""
264	try:
265	number = int(value)
266	except ValueError:
267	return 9999
268	if number == 0:
269	return 10
270	if row.get('entry_mode') and row.get('entry_mode').startswith('pg'):
271	return 999
272	return number
273
274	@classmethod
275	def semester(self, value, row):
276	""" '0' --> '9'
277	"""
278	try:
279	number = int(value)
280	except ValueError:
281	return 9999
282	if number == 0:
283	return 9
284	return number
285
286	@classmethod
287	def application_category(self, value, row):
288	""" '' --> 'no'
289	"""
290	if value == '':
291	return 'no'
292	return value
293
294	@classmethod
295	def lga(self, value, row):
296	""" Remove apostrophe
297	"""
298	if value == 'akwa_ibom_uru_offong_oruko':
299	return 'akwa_ibom_urue-offong-oruko'
300	if value == 'edo_ohionmwon':
301	return 'edo_orhionmwon'
302
303	if value == 'nassarawa_nassarawa':
304	return 'nassarawa_nassawara'
305
306	if value == 'kogi_mopa-muro-mopi':
307	return 'kogi_mopa-muro'
308
309	if value == 'delta_osimili-north':
310	return 'delta_oshielli-north'
311
312	if value == 'delta_osimili':
313	return 'delta_oshimili'
314
315	if value == 'delta_osimili-south':
316	return 'delta_oshimili-south'
317	try:
318	value = value.replace("'","")
319	except:
320	return ''
321	lower = value.lower()
322	if lower in LGAS_dict.keys():
323	return lower
324	# If real names are given, let's see if a similar value
325	# in LGAS exist.
326	value = value.replace('_',' ')
327	value = LGAS_inverted_stripped.get(strip(lower), value)
328	return value
329
330
331	@classmethod
332	def session(self, value, row):
333	""" '08' --> '2008'
334	'2008/2009' --> '2008'
335	"""
336	if '/' in value:
337	numbers = value.split('/')
338	number = int(numbers[0])
339	if number in range(2000,2015):
340	return number
341	else:
342	return 9999
343	try:
344	number = int(value)
345	except ValueError:
346	return 9999
347	if number < 14:
348	return number + 2000
349	elif number in range(2000,2015):
350	return number
351	else:
352	return 9999
353
354	@classmethod
355	def former(self, value, row):
356	""" True --> yes
357	'2008/2009' --> '2008'
358	"""
359	if value == 'True':
360	return 'yes'
361	return
362
363	@classmethod
364	def bool(self, value, row):
365	""" True --> 1
366	"""
367	if value in ('TRUE', 'True'):
368	return '1'
369	elif value in ('FALSE', 'False'):
370	return '0'
371	return
372
373	@classmethod
374	def year(self, value, row):
375	""" '0' --> ''
376	"""
377	if value == '0':
378	return
379	if value == 'None':
380	return
381	return value
382
383
384	@classmethod
385	def marit_stat(self, value, row):
386	""" 'True'/'False' --> 'married'/'unmarried'
387	"""
388	if value in ('True','married'):
389	value = 'married'
390	elif value in ('False','unmarried'):
391	value = 'unmarried'
392	else:
393	value = ''
394	return value
395
396	@classmethod
397	def gender(self, value, row):
398	""" 'True'/'False' --> 'f'/'m'
399	"""
400	if value.strip() in ('F', 'True','f'):
401	value = 'f'
402	elif value.strip() in ('M', 'False','m'):
403	value = 'm'
404	else:
405	value = ''
406	return value
407
408	@classmethod
409	def date(self, value, row):
410	""" 'yyyy/mm/dd' --> 'yyyy-mm-dd'
411	"""
412	if value == "None":
413	value = ""
414	elif value == "":
415	value = ""
416	else:
417	value = value.replace('/', '-')
418	# We add the hash symbol to avoid automatic date transformation
419	# in Excel and Calc for further processing
420	value += '#'
421	return value
422
423	@classmethod
424	def no_int(self, value, row):
425	""" Add hash and skip numbers starting with 999999
426	"""
427	# We add the hash symbol to avoid automatic number transformation
428	# in Excel and Calc for further processing
429	try:
430	intvalue = int(value)
431	value += '#'
432	except:
433	pass
434	if value.startswith('999999'):
435	return
436	return value
437
438	@classmethod
439	def amount(self, value, row):
440	""" Amounts must be integers
441	"""
442	try:
443	return float(value)
444	except:
445	return
446
447	@classmethod
448	def mode(self, value, row):
449	if value == "transfer_fulltime":
450	return "transfer_ft"
451	if value == "ume_ft":
452	return "utme_ft"
453	return value
454
455	@classmethod
456	def password(self, value, row):
457	if value == "not set":
458	return ""
459	return value
460
461	@classmethod
462	def nationality(self, value, row):
463	if value in ('nigeria', 'Nigeria'):
464	return "NG"
465	if value in ('niger', 'Niger'):
466	return "NE"
467	return value
468
469	@classmethod
470	def sittype(self, value, row):
471	if value == "nabtec":
472	return "nabteb"
473	return value
474
475	@classmethod
476	def company(self, value, row):
477	if value == "online":
478	return "interswitch"
479	return value
480
481	@classmethod
482	def p_category(self, value, row):
483	if value == "acceptance":
484	return "clearance"
485	return value
486
487	@classmethod
488	def email(self, value, row):
489	return value.strip()
490
491	@classmethod
492	def phone(self, value, row):
493	""" '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
494
495	Dashes and slashes are removed before looking for sequences
496	of numbers.
497	"""
498	if not value:
499	return
500	if len(value) < 5:
501	return
502	value = value.strip('#')
503	value = value.replace('-', '')
504	value = value.replace('/', '')
505	match = RE_PHONE.match(value)
506	phone = match.groups()[0]
507	if value.startswith('234'):
508	value = '+' + value[:3] + '-' + value[3:]
509	elif value.startswith('+234'):
510	value = value[:4] + '-' + value[4:]
511	else:
512	value = '-%s' % phone
513	return value + '#'
514
515	@classmethod
516	def result(self, value, row):
517	try:
518	liste = eval(value)
519	except:
520	return
521	if isinstance(liste,list):
522	return [(i[0].lower(),i[1]) for i in liste]
523	return
524
525
526	def main():
527	input_file = '%s' % sys.argv[1]
528	output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
529	reader = csv.DictReader(open(input_file, 'rb'))
530	writer = None
531
532	for num, row in enumerate(reader):
533	if num == 0:
534	writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
535	print "FIELDS: "
536	for x, y in enumerate(reader.fieldnames):
537	print x, y
538	header = convert_fieldnames(reader.fieldnames)
539	writer.writerow(header)
540	#if row.get('reg_state') == 'student_created':
541	# # We do not reimport student records which have never been accessed.
542	# continue
543	if row.get('status') == 'started':
544	# We do not reimport started payments.
545	continue
546	for key, value in row.items():
547	# Remove unwanted whitespaces.
548	row[key] = row[key].strip()
549	if not key in OPTIONS.keys():
550	continue
551	conv_name = OPTIONS[key]
552	converter = getattr(Converters, conv_name, None)
553	if converter is None:
554	print "WARNING: cannot find converter %s" % conv_name
555	continue
556	row[key] = converter(row[key], row)
557	try:
558	writer.writerow(row)
559	except:
560	print row['student_id']
561
562	print "Output written to %s" % output_file
563
564
565	if __name__ == '__main__':
566	if len(sys.argv) != 2:
567	print 'Usage: %s <filename>' % __file__
568	sys.exit(1)
569	main()

Note: See TracBrowser for help on using the repository browser.

Context navigation

source: main/waeup.kofa/branches/uli-stud-utils-cleanup/tools/fix_import_file.py @ 13599

Download in other formats: