Context navigation

source: main/waeup.kofa/branches/uli-async-update/tools/fix_import_file.py @ 10687

Last change on this file since 10687 was 9208, checked in by uli, 12 years ago
Merge changes from trunk r9171:9207.
Property svn:keywords set to `Id`
File size: 14.1 KB

Rev	Line
[7512]	1	## $Id: fix_import_file.py 9208 2012-09-20 08:22:52Z uli $
	2	##
[7518]	3	## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
[7512]	4	## This program is free software; you can redistribute it and/or modify
	5	## it under the terms of the GNU General Public License as published by
	6	## the Free Software Foundation; either version 2 of the License, or
	7	## (at your option) any later version.
	8	##
	9	## This program is distributed in the hope that it will be useful,
	10	## but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	## GNU General Public License for more details.
	13	##
	14	## You should have received a copy of the GNU General Public License
	15	## along with this program; if not, write to the Free Software
	16	## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	17	##
	18	"""
[8781]	19	Fix exports from old SRP portal and other data sources to make
	20	them importable by current portal.
[7512]	21
	22	Usage:
	23
	24	Change into this directory, set the options below (files are assumed
	25	to be in the same directory) and then run
	26
[7518]	27	python fix_import_file.py <filename>
[7512]	28
	29	Errors/warnings will be displayed on the shell, the output will be put
	30	into the specified output file.
[8781]	31
	32
	33	The lgas.py module must be copied into the same folder where this script
	34	is started.
[7512]	35	"""
[7573]	36	import csv
	37	import datetime
[7575]	38	import re
[7573]	39	import sys
[7512]	40
[8781]	41	try:
	42	from lgas import LGAS
	43	except:
	44	print 'ERROR: lgas.py is missing.'
	45	sys.exit(1)
	46
	47	def strip(string):
	48	string = string.replace('_', '')
	49	string = string.replace('/', '')
	50	string = string.replace('-', '')
	51	string = string.replace(' ', '')
	52	string = string.lower()
	53	return string
	54
	55	LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
	56	LGAS_dict = dict(LGAS)
	57
[7512]	58	##
	59	## CONFIGURATION SECTION
	60	##
	61	# keys are fieldnames in input file, values are methods of class
	62	# Converter (see below)
	63	OPTIONS = {
[8478]	64	'student_id': 'student_id',
[7512]	65	'sex': 'gender',
	66	'birthday': 'date',
[7514]	67	'marit_stat': 'marit_stat',
[7537]	68	'session': 'session',
[7514]	69	'entry_session': 'session',
	70	'current_session': 'session',
[7602]	71	'session_id': 'session',
[7632]	72	'entry_mode': 'mode',
[7526]	73	'reg_state': 'reg_state',
	74	'password': 'password',
[7575]	75	'phone': 'phone',
[8478]	76	'nationality': 'nationality',
[7602]	77	'level': 'level',
	78	'start_level': 'level',
	79	'end_level': 'level',
	80	'level_id': 'level',
	81	'current_level': 'level',
	82	'semester': 'semester',
	83	'application_category': 'application_category',
[7610]	84	'lga': 'lga',
[7628]	85	'order_id': 'no_int',
[9169]	86	'uniben': 'former',
	87	'nysc_year': 'year',
	88	'alr_date': 'date',
	89	'fst_sit_date': 'date',
	90	'scd_sit_date': 'date',
	91	'emp_start': 'date',
	92	'emp_end': 'date',
	93	'emp_start2': 'date',
	94	'emp_end2': 'date',
	95	'fst_sit_results': 'result',
	96	'scd_sit_results': 'result',
	97	'alr_results': 'result',
	98	'email': 'email',
	99	'fst_sit_type': 'sittype',
	100	'scd_sit_type': 'sittype',
	101	'resp_pay_reference': 'no_int',
	102	'type': 'company',
	103	'date': 'date',
[9208]	104	'core_or_elective': 'bool',
[9169]	105	'category': 'p_category',
	106	'reg_transition': 'reg_state', # we completely change this column,
	107	# since reg_state import is usually intended
	108	'transition': 'reg_transition',
	109	'payment_date': 'date',
	110	'validation_date': 'date',
[7512]	111	}
[7516]	112
	113	# Mapping input file colnames --> output file colnames
	114	COLNAME_MAPPING = {
[7666]	115	# base data
[9169]	116	'id': 'student_id',
[8478]	117	'reg_state': 'state',
[9169]	118	'reg_transition': 'state',
[7518]	119	'jamb_reg_no': 'reg_number',
[7645]	120	'matric_no': 'matric_number',
[7516]	121	'birthday': 'date_of_birth',
[7526]	122	'clr_ac_pin': 'clr_code',
[9169]	123	# clearance
	124	'hq_grade': 'hq_degree',
	125	'uniben': 'former_matric',
	126	'hq_type2': 'hq2_type',
	127	'hq_grade2': 'hq2_degree',
	128	'hq_school2': 'hq2_school',
	129	'hq_matric_no2': 'hq2_matric_no',
	130	'hq_session2': 'hq2_session',
	131	'hq_disc2': 'hq2_disc',
	132	'emp': 'employer',
	133	'emp2': 'employer2',
	134	'emp_position2': 'emp2_position',
	135	'emp_start2': 'emp2_start',
	136	'emp_end2': 'emp2_end',
	137	'emp_reason2': 'emp2_reason',
[7550]	138	# study course
[7530]	139	'study_course': 'certificate',
[7550]	140	# study level
[7537]	141	'session': 'level_session',
	142	'verdict': 'level_verdict',
[7550]	143	# course ticket
	144	'level_id': 'level',
[7666]	145	'core_or_elective': 'mandatory',
[7628]	146	# payment ticket
	147	'order_id': 'p_id',
	148	'status': 'p_state',
	149	'category': 'p_category',
	150	'resp_pay_reference': 'r_pay_reference',
	151	'resp_desc': 'r_desc',
	152	'resp_approved_amount': 'r_amount_approved',
	153	'item': 'p_item',
	154	'amount': 'amount_auth',
	155	'resp_card_num': 'r_card_num',
	156	'resp_code': 'r_code',
	157	'date': 'creation_date',
	158	'surcharge': 'surcharge_1',
	159	'session_id': 'p_session',
[9169]	160	'type': 'r_company',
[7516]	161	}
[7526]	162
[8478]	163	# Mapping input state --> output state
[7526]	164	REGSTATE_MAPPING = {
	165	'student_created': 'created',
	166	'admitted': 'admitted',
[7610]	167	'objection_raised': 'clearance started',
[7526]	168	'clearance_pin_entered': 'clearance started',
	169	'clearance_requested': 'clearance requested',
	170	'cleared_and_validated': 'cleared',
	171	'school_fee_paid': 'school fee paid',
	172	'returning': 'returning',
	173	'courses_registered': 'courses registered',
	174	'courses_validated': 'courses validated',
[9169]	175	'admit': 'admitted',
	176	'return': 'returning'
[7526]	177	}
	178
[7512]	179	##
	180	## END OF CONFIG
	181	##
	182
[7575]	183	# Look for the first sequence of numbers
	184	RE_PHONE = re.compile('[^\d](\d)[^\d]*')
	185
[7516]	186	def convert_fieldnames(fieldnames):
	187	"""Replace input fieldnames by fieldnames of COLNAME_MAPPING.
	188	"""
[9169]	189	# Remove whitespaces
	190	header = dict([(name, name.strip()) for name in fieldnames])
[7516]	191	for in_name, out_name in COLNAME_MAPPING.items():
[9169]	192	if in_name not in header.values():
[7516]	193	continue
[9169]	194	# Inverse dictionary lookup
	195	key = [key for key,value in header.items() if value==in_name][0]
	196	header[key] = out_name
[7514]	197	return header
	198
[7512]	199	class Converters():
	200	"""Converters to turn old-style values into new ones.
	201	"""
[8478]	202
[7512]	203	@classmethod
[9169]	204	def student_id(self, value, row):
[8478]	205	""" 'A123456' --> 'EA123456'
	206	"""
	207	if len(value) == 7:
[9208]	208	return 'K' + value
[8478]	209	return value
	210
	211	@classmethod
[9169]	212	def reg_state(self, value, row):
[7526]	213	""" 'courses_validated' --> 'courses validated'
	214	"""
	215	return REGSTATE_MAPPING.get(value,value)
	216
	217	@classmethod
[9169]	218	def reg_transition(self, value, row):
	219	if value == "admitted":
	220	return "admit"
	221	if value == "returning":
	222	return "return"
	223	return value
	224
	225	@classmethod
	226	def level(self, value, row):
[7602]	227	""" '000' --> '10'
[9169]	228	'800' --> '999' if pg student
[7602]	229	"""
	230	try:
	231	number = int(value)
	232	except ValueError:
	233	return 9999
	234	if number == 0:
	235	return 10
[9169]	236	if row.get('entry_mode') and row.get('entry_mode').startswith('pg'):
	237	return 999
[7602]	238	return number
	239
	240	@classmethod
[9169]	241	def semester(self, value, row):
[7602]	242	""" '0' --> '9'
	243	"""
	244	try:
	245	number = int(value)
	246	except ValueError:
	247	return 9999
	248	if number == 0:
	249	return 9
	250	return number
	251
	252	@classmethod
[9169]	253	def application_category(self, value, row):
[7602]	254	""" '' --> 'no'
	255	"""
	256	if value == '':
	257	return 'no'
	258	return value
	259
[7610]	260	@classmethod
[9169]	261	def lga(self, value, row):
[7610]	262	""" Remove apostrophe
	263	"""
	264	if value == 'akwa_ibom_uru_offong_oruko':
	265	return 'akwa_ibom_urue-offong-oruko'
[9169]	266	if value == 'edo_ohionmwon':
	267	return 'edo_orhionmwon'
	268
	269	if value == 'nassarawa_nassarawa':
	270	return 'nassarawa_nassawara'
	271
	272	if value == 'kogi_mopa-muro-mopi':
	273	return 'kogi_mopa-muro'
	274
	275	if value == 'delta_osimili-north':
	276	return 'delta_oshielli-north'
	277
	278	if value == 'delta_osimili':
	279	return 'delta_oshimili'
	280
	281	if value == 'delta_osimili-south':
	282	return 'delta_oshimili-south'
[7610]	283	try:
	284	value = value.replace("'","")
	285	except:
	286	return ''
[8784]	287	lower = value.lower()
	288	if lower in LGAS_dict.keys():
	289	return lower
[8781]	290	# If real names are given, let's see if a similar value
	291	# in LGAS exist.
[8784]	292	value = LGAS_inverted_stripped.get(strip(lower), value)
[7610]	293	return value
[7602]	294
[7610]	295
[7602]	296	@classmethod
[9169]	297	def session(self, value, row):
[7514]	298	""" '08' --> '2008'
[8478]	299	'2008/2009' --> '2008'
[7514]	300	"""
[8478]	301	if '/' in value:
	302	numbers = value.split('/')
	303	number = int(numbers[0])
	304	if number in range(2000,2015):
	305	return number
	306	else:
	307	return 9999
[7514]	308	try:
	309	number = int(value)
	310	except ValueError:
[7602]	311	#import pdb; pdb.set_trace()
[7514]	312	return 9999
	313	if number < 14:
	314	return number + 2000
	315	elif number in range(2000,2015):
	316	return number
	317	else:
	318	return 9999
	319
	320	@classmethod
[9169]	321	def former(self, value, row):
	322	""" True --> yes
	323	'2008/2009' --> '2008'
	324	"""
	325	if value == 'True':
	326	return 'yes'
	327	return
	328
	329	@classmethod
[9208]	330	def bool(self, value, row):
	331	""" True --> 1
	332	"""
	333	if value in ('TRUE', 'True'):
	334	return '1'
	335	elif value in ('FALSE', 'False'):
	336	return '0'
	337	return
	338
	339	@classmethod
[9169]	340	def year(self, value, row):
	341	""" '0' --> ''
	342	"""
	343	if value == '0':
	344	return
	345	if value == 'None':
	346	return
	347	return value
	348
	349
	350	@classmethod
	351	def marit_stat(self, value, row):
[7514]	352	""" 'True'/'False' --> 'married'/'unmarried'
	353	"""
[7610]	354	if value in ('True','married'):
[7514]	355	value = 'married'
[7610]	356	elif value in ('False','unmarried'):
[7514]	357	value = 'unmarried'
	358	else:
	359	value = ''
	360	return value
	361
	362	@classmethod
[9169]	363	def gender(self, value, row):
[7526]	364	""" 'True'/'False' --> 'f'/'m'
[7514]	365	"""
[8785]	366	if value.strip() in ('F', 'True','f'):
[7526]	367	value = 'f'
[8785]	368	elif value.strip() in ('M', 'False','m'):
[7526]	369	value = 'm'
[7514]	370	else:
	371	value = ''
	372	return value
	373
	374	@classmethod
[9169]	375	def date(self, value, row):
[7512]	376	""" 'yyyy/mm/dd' --> 'yyyy-mm-dd'
	377	"""
	378	if value == "None":
	379	value = ""
	380	elif value == "":
	381	value = ""
	382	else:
	383	value = value.replace('/', '-')
[7514]	384	# We add the hash symbol to avoid automatic date transformation
	385	# in Excel and Calc for further processing
	386	value += '#'
[7512]	387	return value
	388
	389	@classmethod
[9169]	390	def no_int(self, value, row):
	391	""" Add hash and skip numbers starting with 999999
[7628]	392	"""
	393	# We add the hash symbol to avoid automatic number transformation
	394	# in Excel and Calc for further processing
[9169]	395	try:
	396	intvalue = int(value)
	397	value += '#'
	398	except:
[7512]	399	pass
[9169]	400	if value.startswith('999999'):
	401	return
[7512]	402	return value
	403
[7526]	404	@classmethod
[9169]	405	def mode(self, value, row):
[7632]	406	if value == "transfer_fulltime":
	407	return "transfer_ft"
[8522]	408	if value == "ume_ft":
	409	return "utme_ft"
[7632]	410	return value
	411
	412	@classmethod
[9169]	413	def password(self, value, row):
[7526]	414	if value == "not set":
	415	return ""
	416	return value
	417
[7575]	418	@classmethod
[9169]	419	def nationality(self, value, row):
[8478]	420	if value == "nigeria":
	421	return "NG"
[9169]	422	if value == "niger":
	423	return "NE"
[8478]	424	return value
	425
	426	@classmethod
[9169]	427	def sittype(self, value, row):
	428	if value == "nabtec":
	429	return "nabteb"
	430	return value
	431
	432	@classmethod
	433	def company(self, value, row):
	434	if value == "online":
	435	return "interswitch"
	436	return value
	437
	438	@classmethod
	439	def p_category(self, value, row):
	440	if value == "acceptance":
	441	return "clearance"
	442	return value
	443
	444	@classmethod
	445	def email(self, value, row):
	446	return value.strip()
	447
	448	@classmethod
	449	def phone(self, value, row):
[7575]	450	""" '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
[7526]	451
[7575]	452	Dashes and slashes are removed before looking for sequences
	453	of numbers.
	454	"""
[9169]	455	if not value:
	456	return
	457	value = value.strip('#')
[7575]	458	value = value.replace('-', '')
	459	value = value.replace('/', '')
	460	match = RE_PHONE.match(value)
	461	phone = match.groups()[0]
[9169]	462	if value.startswith('234'):
	463	value = '+' + value[:3] + '-' + value[3:]
	464	else:
	465	value = '-%s' % phone
	466	return value + '#'
[7575]	467
[9169]	468	@classmethod
	469	def result(self, value, row):
	470	try:
	471	liste = eval(value)
	472	except:
	473	return
	474	if isinstance(liste,list):
	475	return [(i[0].lower(),i[1]) for i in liste]
	476	return
[7575]	477
[9169]	478
[7572]	479	def main():
[7573]	480	input_file = '%s' % sys.argv[1]
	481	output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
	482	reader = csv.DictReader(open(input_file, 'rb'))
[7572]	483	writer = None
[7512]	484
[7572]	485	for num, row in enumerate(reader):
	486	if num == 0:
[7573]	487	writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
[7572]	488	print "FIELDS: "
	489	for x, y in enumerate(reader.fieldnames):
	490	print x, y
	491	header = convert_fieldnames(reader.fieldnames)
	492	writer.writerow(header)
[9169]	493	if row.get('reg_state') == 'student_created':
	494	# We do not reimport student records which have never been accessed.
	495	continue
	496	if row.get('status') == 'started':
	497	# We do not reimport started payments.
	498	continue
[7572]	499	for key, value in row.items():
[9169]	500	# Remove unwanted whitespaces.
	501	row[key] = row[key].strip()
[7572]	502	if not key in OPTIONS.keys():
	503	continue
	504	conv_name = OPTIONS[key]
	505	converter = getattr(Converters, conv_name, None)
	506	if converter is None:
	507	print "WARNING: cannot find converter %s" % conv_name
	508	continue
[9169]	509	row[key] = converter(row[key], row)
[7602]	510	try:
	511	writer.writerow(row)
	512	except:
	513	print row['student_id']
[7512]	514
[7573]	515	print "Output written to %s" % output_file
[7572]	516
	517
	518	if __name__ == '__main__':
	519	if len(sys.argv) != 2:
	520	print 'Usage: %s <filename>' % __file__
	521	sys.exit(1)
	522	main()

Note: See TracBrowser for help on using the repository browser.

Download in other formats: