Context navigation

source: main/waeup.kofa/trunk/tools/fix_import_file.py @ 8943

Last change on this file since 8943 was 8941, checked in by Henrik Bettermann, 13 years ago

More conversions.

Skip 'started' payments.

Property svn:keywords set to Id

File size: 13.1 KB

Rev	Line
[7512]	1	## $Id: fix_import_file.py 8941 2012-07-08 08:58:45Z henrik $
	2	##
[7518]	3	## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
[7512]	4	## This program is free software; you can redistribute it and/or modify
	5	## it under the terms of the GNU General Public License as published by
	6	## the Free Software Foundation; either version 2 of the License, or
	7	## (at your option) any later version.
	8	##
	9	## This program is distributed in the hope that it will be useful,
	10	## but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	## GNU General Public License for more details.
	13	##
	14	## You should have received a copy of the GNU General Public License
	15	## along with this program; if not, write to the Free Software
	16	## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	17	##
	18	"""
[8781]	19	Fix exports from old SRP portal and other data sources to make
	20	them importable by current portal.
[7512]	21
	22	Usage:
	23
	24	Change into this directory, set the options below (files are assumed
	25	to be in the same directory) and then run
	26
[7518]	27	python fix_import_file.py <filename>
[7512]	28
	29	Errors/warnings will be displayed on the shell, the output will be put
	30	into the specified output file.
[8781]	31
	32
	33	The lgas.py module must be copied into the same folder where this script
	34	is started.
[7512]	35	"""
[7573]	36	import csv
	37	import datetime
[7575]	38	import re
[7573]	39	import sys
[7512]	40
[8781]	41	try:
	42	from lgas import LGAS
	43	except:
	44	print 'ERROR: lgas.py is missing.'
	45	sys.exit(1)
	46
	47	def strip(string):
	48	string = string.replace('_', '')
	49	string = string.replace('/', '')
	50	string = string.replace('-', '')
	51	string = string.replace(' ', '')
	52	string = string.lower()
	53	return string
	54
	55	LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
	56	LGAS_dict = dict(LGAS)
	57
[7512]	58	##
	59	## CONFIGURATION SECTION
	60	##
	61	# keys are fieldnames in input file, values are methods of class
	62	# Converter (see below)
	63	OPTIONS = {
[8478]	64	'student_id': 'student_id',
[7512]	65	'sex': 'gender',
	66	'birthday': 'date',
	67	'request_date': 'datetime',
[7514]	68	'marit_stat': 'marit_stat',
[7537]	69	'session': 'session',
[7514]	70	'entry_session': 'session',
	71	'current_session': 'session',
[7602]	72	'session_id': 'session',
[7632]	73	'entry_mode': 'mode',
[7526]	74	'reg_state': 'reg_state',
	75	'password': 'password',
[7575]	76	'phone': 'phone',
[8478]	77	'nationality': 'nationality',
[7602]	78	'level': 'level',
	79	'start_level': 'level',
	80	'end_level': 'level',
	81	'level_id': 'level',
	82	'current_level': 'level',
	83	'semester': 'semester',
	84	'application_category': 'application_category',
[7610]	85	'lga': 'lga',
[7628]	86	'order_id': 'no_int',
[8938]	87	'uniben': 'former',
	88	'nysc_year': 'year',
	89	'alr_date': 'date',
	90	'fst_sit_date': 'date',
	91	'scd_sit_date': 'date',
	92	'emp_start': 'date',
	93	'emp_end': 'date',
	94	'emp_start2': 'date',
	95	'emp_end2': 'date',
	96	'fst_sit_results': 'result',
	97	'scd_sit_results': 'result',
	98	'alr_results': 'result',
[8939]	99	'email': 'email',
	100	'fst_sit_type': 'sittype',
	101	'scd_sit_type': 'sittype',
[8941]	102	'resp_pay_reference': 'no_int',
	103	'type': 'company',
	104	'date': 'date',
	105	'p_category': 'p_category',
[7512]	106	}
[7516]	107
	108	# Mapping input file colnames --> output file colnames
	109	COLNAME_MAPPING = {
[7666]	110	# base data
[8478]	111	'reg_state': 'state',
[7518]	112	'jamb_reg_no': 'reg_number',
[7645]	113	'matric_no': 'matric_number',
[7516]	114	'birthday': 'date_of_birth',
[7526]	115	'clr_ac_pin': 'clr_code',
[8941]	116	# clearance
	117	'hq_grade': 'hq_degree',
	118	'uniben': 'former_matric',
	119	'hq_type2': 'hq2_type',
	120	'hq_grade2': 'hq2_degree',
	121	'hq_school2': 'hq2_school',
	122	'hq_matric_no2': 'hq2_matric_no',
	123	'hq_session2': 'hq2_session',
	124	'hq_disc2': 'hq2_disc',
	125	'emp': 'employer',
	126	'emp2': 'employer2',
	127	'emp_position2': 'emp2_position',
	128	'emp_start2': 'emp2_start',
	129	'emp_end2': 'emp2_end',
	130	'emp_reason2': 'emp2_reason',
[7550]	131	# study course
[7530]	132	'study_course': 'certificate',
[7550]	133	# study level
[7537]	134	'session': 'level_session',
	135	'verdict': 'level_verdict',
[7550]	136	# course ticket
	137	'level_id': 'level',
[7666]	138	'core_or_elective': 'mandatory',
[7628]	139	# payment ticket
	140	'order_id': 'p_id',
	141	'status': 'p_state',
	142	'category': 'p_category',
	143	'resp_pay_reference': 'r_pay_reference',
	144	'resp_desc': 'r_desc',
	145	'resp_approved_amount': 'r_amount_approved',
	146	'item': 'p_item',
	147	'amount': 'amount_auth',
	148	'resp_card_num': 'r_card_num',
	149	'resp_code': 'r_code',
	150	'date': 'creation_date',
	151	'surcharge': 'surcharge_1',
	152	'session_id': 'p_session',
[8941]	153	'type': 'r_company',
[7516]	154	}
[7526]	155
[8478]	156	# Mapping input state --> output state
[7526]	157	REGSTATE_MAPPING = {
	158	'student_created': 'created',
	159	'admitted': 'admitted',
[7610]	160	'objection_raised': 'clearance started',
[7526]	161	'clearance_pin_entered': 'clearance started',
	162	'clearance_requested': 'clearance requested',
	163	'cleared_and_validated': 'cleared',
	164	'school_fee_paid': 'school fee paid',
	165	'returning': 'returning',
	166	'courses_registered': 'courses registered',
	167	'courses_validated': 'courses validated',
	168	}
	169
[7512]	170	##
	171	## END OF CONFIG
	172	##
	173
[7575]	174	# Look for the first sequence of numbers
	175	RE_PHONE = re.compile('[^\d](\d)[^\d]*')
	176
[7516]	177	def convert_fieldnames(fieldnames):
	178	"""Replace input fieldnames by fieldnames of COLNAME_MAPPING.
	179	"""
	180	header = dict([(name, name) for name in fieldnames])
	181	for in_name, out_name in COLNAME_MAPPING.items():
	182	if in_name not in header:
	183	continue
	184	header[in_name] = out_name
[7514]	185	return header
	186
[7512]	187	class Converters():
	188	"""Converters to turn old-style values into new ones.
	189	"""
[8478]	190
[7512]	191	@classmethod
[8478]	192	def student_id(self, value):
	193	""" 'A123456' --> 'EA123456'
	194	"""
	195	if len(value) == 7:
	196	return 'E' + value
	197	return value
	198
	199	@classmethod
[7526]	200	def reg_state(self, value):
	201	""" 'courses_validated' --> 'courses validated'
	202	"""
	203	return REGSTATE_MAPPING.get(value,value)
	204
	205	@classmethod
[7602]	206	def level(self, value):
	207	""" '000' --> '10'
	208	"""
	209	try:
	210	number = int(value)
	211	except ValueError:
	212	return 9999
	213	if number == 0:
	214	return 10
	215	return number
	216
	217	@classmethod
	218	def semester(self, value):
	219	""" '0' --> '9'
	220	"""
	221	try:
	222	number = int(value)
	223	except ValueError:
	224	return 9999
	225	if number == 0:
	226	return 9
	227	return number
	228
	229	@classmethod
	230	def application_category(self, value):
	231	""" '' --> 'no'
	232	"""
	233	if value == '':
	234	return 'no'
	235	return value
	236
[7610]	237	@classmethod
	238	def lga(self, value):
	239	""" Remove apostrophe
	240	"""
	241	if value == 'akwa_ibom_uru_offong_oruko':
	242	return 'akwa_ibom_urue-offong-oruko'
[8939]	243	if value == 'edo_ohionmwon':
	244	return 'edo_orhionmwon'
	245
	246	if value == 'nassarawa_nassarawa':
	247	return 'nassarawa_nassawara'
	248
	249	if value == 'kogi_mopa-muro-mopi':
	250	return 'kogi_mopa-muro'
	251
	252	if value == 'delta_osimili-north':
	253	return 'delta_oshielli-north'
	254
	255	if value == 'delta_osimili':
	256	return 'delta_oshimili'
	257
	258	if value == 'delta_osimili-south':
	259	return 'delta_oshimili-south'
[7610]	260	try:
	261	value = value.replace("'","")
	262	except:
	263	return ''
[8784]	264	lower = value.lower()
	265	if lower in LGAS_dict.keys():
	266	return lower
[8781]	267	# If real names are given, let's see if a similar value
	268	# in LGAS exist.
[8784]	269	value = LGAS_inverted_stripped.get(strip(lower), value)
[7610]	270	return value
[7602]	271
[7610]	272
[7602]	273	@classmethod
[7514]	274	def session(self, value):
	275	""" '08' --> '2008'
[8478]	276	'2008/2009' --> '2008'
[7514]	277	"""
[8478]	278	if '/' in value:
	279	numbers = value.split('/')
	280	number = int(numbers[0])
	281	if number in range(2000,2015):
	282	return number
	283	else:
	284	return 9999
[7514]	285	try:
	286	number = int(value)
	287	except ValueError:
[7602]	288	#import pdb; pdb.set_trace()
[7514]	289	return 9999
	290	if number < 14:
	291	return number + 2000
	292	elif number in range(2000,2015):
	293	return number
	294	else:
	295	return 9999
	296
	297	@classmethod
[8938]	298	def former(self, value):
	299	""" True --> yes
	300	'2008/2009' --> '2008'
	301	"""
	302	if value == 'True':
	303	return 'yes'
	304	return
	305
	306	@classmethod
	307	def year(self, value):
	308	""" '0' --> ''
	309	"""
	310	if value == '0':
	311	return
	312	if value == 'None':
	313	return
	314	return value
	315
	316
	317	@classmethod
[7514]	318	def marit_stat(self, value):
	319	""" 'True'/'False' --> 'married'/'unmarried'
	320	"""
[7610]	321	if value in ('True','married'):
[7514]	322	value = 'married'
[7610]	323	elif value in ('False','unmarried'):
[7514]	324	value = 'unmarried'
	325	else:
	326	value = ''
	327	return value
	328
	329	@classmethod
	330	def gender(self, value):
[7526]	331	""" 'True'/'False' --> 'f'/'m'
[7514]	332	"""
[8785]	333	if value.strip() in ('F', 'True','f'):
[7526]	334	value = 'f'
[8785]	335	elif value.strip() in ('M', 'False','m'):
[7526]	336	value = 'm'
[7514]	337	else:
	338	value = ''
	339	return value
	340
	341	@classmethod
[7512]	342	def date(self, value):
	343	""" 'yyyy/mm/dd' --> 'yyyy-mm-dd'
	344	"""
	345	if value == "None":
	346	value = ""
	347	elif value == "":
	348	value = ""
	349	else:
	350	value = value.replace('/', '-')
[7514]	351	# We add the hash symbol to avoid automatic date transformation
	352	# in Excel and Calc for further processing
	353	value += '#'
[7512]	354	return value
	355
	356	@classmethod
[7628]	357	def no_int(self, value):
[8941]	358	""" Add hash and skip numbers starting with 999999
[7628]	359	"""
	360	# We add the hash symbol to avoid automatic number transformation
	361	# in Excel and Calc for further processing
[8941]	362	try:
	363	intvalue = int(value)
	364	value += '#'
	365	except:
	366	pass
	367	if value.startswith('999999'):
	368	return
[7628]	369	return value
	370
	371
	372	@classmethod
[7512]	373	def datetime(self, value):
	374	""" 'yyyy/mm/dd' --> 'yyyy-mm-dd'
	375	"""
	376	#print "IN: ", value
	377	if value == "None":
	378	value = ""
	379	elif value == "":
	380	value = ""
	381	else:
	382	#value = datetime.datetime.strptime(value, '%Y/%m/%d')
	383	#value = datetime.datetime.strftime(value, '%Y-%m-%d')
	384	pass
	385	#print "OUT: ", value
	386	return value
	387
[7526]	388	@classmethod
[7632]	389	def mode(self, value):
	390	if value == "transfer_fulltime":
	391	return "transfer_ft"
[8522]	392	if value == "ume_ft":
	393	return "utme_ft"
[7632]	394	return value
	395
	396	@classmethod
[7526]	397	def password(self, value):
	398	if value == "not set":
	399	return ""
	400	return value
	401
[7575]	402	@classmethod
[8478]	403	def nationality(self, value):
	404	if value == "nigeria":
	405	return "NG"
[8939]	406	if value == "niger":
	407	return "NE"
[8478]	408	return value
	409
	410	@classmethod
[8939]	411	def sittype(self, value):
	412	if value == "nabtec":
	413	return "nabteb"
	414	return value
	415
	416	@classmethod
[8941]	417	def company(self, value):
	418	if value == "online":
	419	return "interswitch"
	420	return value
	421
	422	@classmethod
	423	def p_category(self, value):
	424	if value == "acceptance":
	425	return "clearance"
	426	return value
	427
	428	@classmethod
[8939]	429	def email(self, value):
	430	return value.strip()
	431
	432	@classmethod
[7575]	433	def phone(self, value):
	434	""" '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
[7526]	435
[7575]	436	Dashes and slashes are removed before looking for sequences
	437	of numbers.
	438	"""
[8938]	439	if not value:
	440	return
[7575]	441	value = value.replace('-', '')
	442	value = value.replace('/', '')
	443	match = RE_PHONE.match(value)
	444	phone = match.groups()[0]
[8478]	445	value = '-%s' % phone
[8938]	446	return value + '#'
[7575]	447
[8938]	448	@classmethod
	449	def result(self, value):
	450	try:
	451	liste = eval(value)
	452	except:
	453	return
	454	if isinstance(liste,list):
	455	return [(i[0].lower(),i[1]) for i in liste]
	456	return
[7575]	457
[7572]	458	def main():
[7573]	459	input_file = '%s' % sys.argv[1]
	460	output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
	461	reader = csv.DictReader(open(input_file, 'rb'))
[7572]	462	writer = None
[7512]	463
[7572]	464	for num, row in enumerate(reader):
	465	if num == 0:
[7573]	466	writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
[7572]	467	print "FIELDS: "
	468	for x, y in enumerate(reader.fieldnames):
	469	print x, y
	470	header = convert_fieldnames(reader.fieldnames)
	471	writer.writerow(header)
[8939]	472	if row.get('reg_state') == 'student_created':
	473	continue
[8941]	474	if row.get('status') == 'started':
	475	# We do not use started payments
	476	continue
[7572]	477	for key, value in row.items():
	478	if not key in OPTIONS.keys():
	479	continue
	480	conv_name = OPTIONS[key]
	481	converter = getattr(Converters, conv_name, None)
	482	if converter is None:
	483	print "WARNING: cannot find converter %s" % conv_name
	484	continue
	485	row[key] = converter(row[key])
[7602]	486	try:
	487	writer.writerow(row)
	488	except:
	489	print row['student_id']
[7512]	490
[7573]	491	print "Output written to %s" % output_file
[7572]	492
	493
	494	if __name__ == '__main__':
	495	if len(sys.argv) != 2:
	496	print 'Usage: %s <filename>' % __file__
	497	sys.exit(1)
	498	main()

Note: See TracBrowser for help on using the repository browser.

Download in other formats: