Context navigation

← Previous revision
Next revision →
Normal
Revision log

fix_import_file.py

Last change on this file was 9478, checked in by Henrik Bettermann, 12 years ago
Add converter for amounts and other things.
Property svn:keywords set to `Id`
File size: 15.5 KB

Rev	Line
[7512]	1	## $Id: fix_import_file.py 9478 2012-10-30 20:19:06Z henrik $
	2	##
[7518]	3	## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
[7512]	4	## This program is free software; you can redistribute it and/or modify
	5	## it under the terms of the GNU General Public License as published by
	6	## the Free Software Foundation; either version 2 of the License, or
	7	## (at your option) any later version.
	8	##
	9	## This program is distributed in the hope that it will be useful,
	10	## but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	## GNU General Public License for more details.
	13	##
	14	## You should have received a copy of the GNU General Public License
	15	## along with this program; if not, write to the Free Software
	16	## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	17	##
	18	"""
[8781]	19	Fix exports from old SRP portal and other data sources to make
	20	them importable by current portal.
[7512]	21
	22	Usage:
	23
	24	Change into this directory, set the options below (files are assumed
	25	to be in the same directory) and then run
	26
[7518]	27	python fix_import_file.py <filename>
[7512]	28
	29	Errors/warnings will be displayed on the shell, the output will be put
	30	into the specified output file.
[8781]	31
	32
	33	The lgas.py module must be copied into the same folder where this script
	34	is started.
[7512]	35	"""
[7573]	36	import csv
	37	import datetime
[9362]	38	import os
[7575]	39	import re
[7573]	40	import sys
[7512]	41
[8781]	42	try:
	43	from lgas import LGAS
	44	except:
	45	print 'ERROR: lgas.py is missing.'
	46	sys.exit(1)
	47
	48	def strip(string):
	49	string = string.replace('_', '')
	50	string = string.replace('/', '')
	51	string = string.replace('-', '')
	52	string = string.replace(' ', '')
	53	string = string.lower()
	54	return string
	55
	56	LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
	57	LGAS_dict = dict(LGAS)
	58
[7512]	59	##
	60	## CONFIGURATION SECTION
	61	##
	62	# keys are fieldnames in input file, values are methods of class
	63	# Converter (see below)
	64	OPTIONS = {
[8478]	65	'student_id': 'student_id',
[7512]	66	'sex': 'gender',
	67	'birthday': 'date',
[7514]	68	'marit_stat': 'marit_stat',
[7537]	69	'session': 'session',
[7514]	70	'entry_session': 'session',
	71	'current_session': 'session',
[7602]	72	'session_id': 'session',
[7632]	73	'entry_mode': 'mode',
[7526]	74	'reg_state': 'reg_state',
	75	'password': 'password',
[7575]	76	'phone': 'phone',
[8478]	77	'nationality': 'nationality',
[7602]	78	'level': 'level',
	79	'start_level': 'level',
	80	'end_level': 'level',
	81	'level_id': 'level',
	82	'current_level': 'level',
	83	'semester': 'semester',
	84	'application_category': 'application_category',
[7610]	85	'lga': 'lga',
[7628]	86	'order_id': 'no_int',
[8938]	87	'uniben': 'former',
	88	'nysc_year': 'year',
	89	'alr_date': 'date',
	90	'fst_sit_date': 'date',
	91	'scd_sit_date': 'date',
	92	'emp_start': 'date',
	93	'emp_end': 'date',
	94	'emp_start2': 'date',
	95	'emp_end2': 'date',
	96	'fst_sit_results': 'result',
	97	'scd_sit_results': 'result',
	98	'alr_results': 'result',
[8939]	99	'email': 'email',
	100	'fst_sit_type': 'sittype',
	101	'scd_sit_type': 'sittype',
[8941]	102	'resp_pay_reference': 'no_int',
	103	'type': 'company',
	104	'date': 'date',
[9194]	105	'core_or_elective': 'bool',
[8945]	106	'category': 'p_category',
[9007]	107	'reg_transition': 'reg_state', # we completely change this column,
	108	# since reg_state import is usually intended
	109	'transition': 'reg_transition',
[9158]	110	'payment_date': 'date',
[9160]	111	'validation_date': 'date',
[9478]	112	'resp_approved_amount': 'amount',
	113	'amount': 'amount',
	114	'amount_auth': 'amount',
	115	'surcharge': 'amount'
[7512]	116	}
[7516]	117
	118	# Mapping input file colnames --> output file colnames
	119	COLNAME_MAPPING = {
[7666]	120	# base data
[9009]	121	'id': 'student_id',
[8478]	122	'reg_state': 'state',
[9007]	123	'reg_transition': 'state',
[7518]	124	'jamb_reg_no': 'reg_number',
[7645]	125	'matric_no': 'matric_number',
[7516]	126	'birthday': 'date_of_birth',
[7526]	127	'clr_ac_pin': 'clr_code',
[8941]	128	# clearance
	129	'hq_grade': 'hq_degree',
	130	'uniben': 'former_matric',
	131	'hq_type2': 'hq2_type',
	132	'hq_grade2': 'hq2_degree',
	133	'hq_school2': 'hq2_school',
	134	'hq_matric_no2': 'hq2_matric_no',
	135	'hq_session2': 'hq2_session',
	136	'hq_disc2': 'hq2_disc',
	137	'emp': 'employer',
	138	'emp2': 'employer2',
	139	'emp_position2': 'emp2_position',
	140	'emp_start2': 'emp2_start',
	141	'emp_end2': 'emp2_end',
	142	'emp_reason2': 'emp2_reason',
[7550]	143	# study course
[7530]	144	'study_course': 'certificate',
[7550]	145	# study level
[7537]	146	'session': 'level_session',
	147	'verdict': 'level_verdict',
[7550]	148	# course ticket
	149	'level_id': 'level',
[7666]	150	'core_or_elective': 'mandatory',
[7628]	151	# payment ticket
	152	'order_id': 'p_id',
	153	'status': 'p_state',
	154	'category': 'p_category',
	155	'resp_pay_reference': 'r_pay_reference',
	156	'resp_desc': 'r_desc',
	157	'resp_approved_amount': 'r_amount_approved',
	158	'item': 'p_item',
	159	'amount': 'amount_auth',
	160	'resp_card_num': 'r_card_num',
	161	'resp_code': 'r_code',
	162	'date': 'creation_date',
	163	'surcharge': 'surcharge_1',
	164	'session_id': 'p_session',
[8941]	165	'type': 'r_company',
[9362]	166	'old_id': 'old_id',
[7516]	167	}
[7526]	168
[8478]	169	# Mapping input state --> output state
[7526]	170	REGSTATE_MAPPING = {
	171	'student_created': 'created',
	172	'admitted': 'admitted',
[7610]	173	'objection_raised': 'clearance started',
[7526]	174	'clearance_pin_entered': 'clearance started',
	175	'clearance_requested': 'clearance requested',
	176	'cleared_and_validated': 'cleared',
	177	'school_fee_paid': 'school fee paid',
	178	'returning': 'returning',
	179	'courses_registered': 'courses registered',
	180	'courses_validated': 'courses validated',
[9007]	181	'admit': 'admitted',
	182	'return': 'returning'
[7526]	183	}
	184
[9362]	185	# Mapping of special cases, where new id is not deductible from old id
	186	# Set to `None`, if no such special cases should be considered.
	187	ID_MAP_CSV = None
	188	ID_MAP_CSV = "id_mapping.csv"
	189
[7512]	190	##
	191	## END OF CONFIG
	192	##
	193
[7575]	194	# Look for the first sequence of numbers
	195	RE_PHONE = re.compile('[^\d](\d)[^\d]*')
	196
[9362]	197	def get_id_mapping():
	198	"""Returns a dict mapping from old (SRP) ids to new ids.
	199
	200	The dict is read from ID_MAP_CSV file. If this var is set to
	201	``None`` an empty dict is returned. The ID_MAP_CSV contains only
	202	the student ids of those students, for which the standard method
	203	(new_id=CHAR+old_id) does not work.
	204	"""
	205	if ID_MAP_CSV is None:
	206	return {}
	207	if not os.path.isfile(ID_MAP_CSV):
	208	raise IOError(
	209	"No such file for mapping old to new ids: %s" % ID_MAP_CSV)
	210	result = dict()
	211	reader = csv.DictReader(open(ID_MAP_CSV, 'rb'))
	212	for row in reader:
	213	result[row['student_id']] = row['new_id']
	214	return result
	215
	216
[7516]	217	def convert_fieldnames(fieldnames):
	218	"""Replace input fieldnames by fieldnames of COLNAME_MAPPING.
	219	"""
[9009]	220	# Remove whitespaces
[9019]	221	header = dict([(name, name.strip()) for name in fieldnames])
[7516]	222	for in_name, out_name in COLNAME_MAPPING.items():
[9019]	223	if in_name not in header.values():
[7516]	224	continue
[9019]	225	# Inverse dictionary lookup
	226	key = [key for key,value in header.items() if value==in_name][0]
	227	header[key] = out_name
[7514]	228	return header
	229
[7512]	230	class Converters():
	231	"""Converters to turn old-style values into new ones.
	232	"""
[8478]	233
[9362]	234	old_new_id_map = get_id_mapping()
	235
[7512]	236	@classmethod
[9362]	237	def student_id(cls, value, row):
[8478]	238	""" 'A123456' --> 'EA123456'
	239	"""
[9362]	240	value = cls.old_new_id_map.get(value, value)
[8478]	241	if len(value) == 7:
[9478]	242	return 'B' + value
[8478]	243	return value
	244
	245	@classmethod
[9009]	246	def reg_state(self, value, row):
[7526]	247	""" 'courses_validated' --> 'courses validated'
	248	"""
	249	return REGSTATE_MAPPING.get(value,value)
	250
	251	@classmethod
[9009]	252	def reg_transition(self, value, row):
[9007]	253	if value == "admitted":
	254	return "admit"
	255	if value == "returning":
	256	return "return"
	257	return value
	258
	259	@classmethod
[9009]	260	def level(self, value, row):
[7602]	261	""" '000' --> '10'
[9009]	262	'800' --> '999' if pg student
[7602]	263	"""
	264	try:
	265	number = int(value)
	266	except ValueError:
	267	return 9999
	268	if number == 0:
	269	return 10
[9158]	270	if row.get('entry_mode') and row.get('entry_mode').startswith('pg'):
[9009]	271	return 999
[7602]	272	return number
	273
	274	@classmethod
[9009]	275	def semester(self, value, row):
[7602]	276	""" '0' --> '9'
	277	"""
	278	try:
	279	number = int(value)
	280	except ValueError:
	281	return 9999
	282	if number == 0:
	283	return 9
	284	return number
	285
	286	@classmethod
[9009]	287	def application_category(self, value, row):
[7602]	288	""" '' --> 'no'
	289	"""
	290	if value == '':
	291	return 'no'
	292	return value
	293
[7610]	294	@classmethod
[9009]	295	def lga(self, value, row):
[7610]	296	""" Remove apostrophe
	297	"""
	298	if value == 'akwa_ibom_uru_offong_oruko':
	299	return 'akwa_ibom_urue-offong-oruko'
[8939]	300	if value == 'edo_ohionmwon':
	301	return 'edo_orhionmwon'
	302
	303	if value == 'nassarawa_nassarawa':
	304	return 'nassarawa_nassawara'
	305
	306	if value == 'kogi_mopa-muro-mopi':
	307	return 'kogi_mopa-muro'
	308
	309	if value == 'delta_osimili-north':
	310	return 'delta_oshielli-north'
	311
	312	if value == 'delta_osimili':
	313	return 'delta_oshimili'
	314
	315	if value == 'delta_osimili-south':
	316	return 'delta_oshimili-south'
[7610]	317	try:
	318	value = value.replace("'","")
	319	except:
	320	return ''
[8784]	321	lower = value.lower()
	322	if lower in LGAS_dict.keys():
	323	return lower
[8781]	324	# If real names are given, let's see if a similar value
	325	# in LGAS exist.
[9478]	326	value = value.replace('_',' ')
[8784]	327	value = LGAS_inverted_stripped.get(strip(lower), value)
[7610]	328	return value
[7602]	329
[7610]	330
[7602]	331	@classmethod
[9009]	332	def session(self, value, row):
[7514]	333	""" '08' --> '2008'
[8478]	334	'2008/2009' --> '2008'
[7514]	335	"""
[8478]	336	if '/' in value:
	337	numbers = value.split('/')
	338	number = int(numbers[0])
	339	if number in range(2000,2015):
	340	return number
	341	else:
	342	return 9999
[7514]	343	try:
	344	number = int(value)
	345	except ValueError:
	346	return 9999
	347	if number < 14:
	348	return number + 2000
	349	elif number in range(2000,2015):
	350	return number
	351	else:
	352	return 9999
	353
	354	@classmethod
[9009]	355	def former(self, value, row):
[8938]	356	""" True --> yes
	357	'2008/2009' --> '2008'
	358	"""
	359	if value == 'True':
	360	return 'yes'
	361	return
	362
	363	@classmethod
[9194]	364	def bool(self, value, row):
	365	""" True --> 1
	366	"""
	367	if value in ('TRUE', 'True'):
	368	return '1'
	369	elif value in ('FALSE', 'False'):
	370	return '0'
	371	return
	372
	373	@classmethod
[9009]	374	def year(self, value, row):
[8938]	375	""" '0' --> ''
	376	"""
	377	if value == '0':
	378	return
	379	if value == 'None':
	380	return
	381	return value
	382
	383
	384	@classmethod
[9009]	385	def marit_stat(self, value, row):
[7514]	386	""" 'True'/'False' --> 'married'/'unmarried'
	387	"""
[7610]	388	if value in ('True','married'):
[7514]	389	value = 'married'
[7610]	390	elif value in ('False','unmarried'):
[7514]	391	value = 'unmarried'
	392	else:
	393	value = ''
	394	return value
	395
	396	@classmethod
[9009]	397	def gender(self, value, row):
[7526]	398	""" 'True'/'False' --> 'f'/'m'
[7514]	399	"""
[8785]	400	if value.strip() in ('F', 'True','f'):
[7526]	401	value = 'f'
[8785]	402	elif value.strip() in ('M', 'False','m'):
[7526]	403	value = 'm'
[7514]	404	else:
	405	value = ''
	406	return value
	407
	408	@classmethod
[9009]	409	def date(self, value, row):
[7512]	410	""" 'yyyy/mm/dd' --> 'yyyy-mm-dd'
	411	"""
	412	if value == "None":
	413	value = ""
	414	elif value == "":
	415	value = ""
	416	else:
	417	value = value.replace('/', '-')
[7514]	418	# We add the hash symbol to avoid automatic date transformation
	419	# in Excel and Calc for further processing
	420	value += '#'
[7512]	421	return value
	422
	423	@classmethod
[9009]	424	def no_int(self, value, row):
[8941]	425	""" Add hash and skip numbers starting with 999999
[7628]	426	"""
	427	# We add the hash symbol to avoid automatic number transformation
	428	# in Excel and Calc for further processing
[8941]	429	try:
	430	intvalue = int(value)
	431	value += '#'
	432	except:
	433	pass
	434	if value.startswith('999999'):
	435	return
[7628]	436	return value
	437
	438	@classmethod
[9478]	439	def amount(self, value, row):
	440	""" Amounts must be integers
	441	"""
	442	try:
	443	return float(value)
	444	except:
	445	return
	446
	447	@classmethod
[9009]	448	def mode(self, value, row):
[7632]	449	if value == "transfer_fulltime":
	450	return "transfer_ft"
[8522]	451	if value == "ume_ft":
	452	return "utme_ft"
[7632]	453	return value
	454
	455	@classmethod
[9009]	456	def password(self, value, row):
[7526]	457	if value == "not set":
	458	return ""
	459	return value
	460
[7575]	461	@classmethod
[9009]	462	def nationality(self, value, row):
[9270]	463	if value in ('nigeria', 'Nigeria'):
[8478]	464	return "NG"
[9270]	465	if value in ('niger', 'Niger'):
[8939]	466	return "NE"
[8478]	467	return value
	468
	469	@classmethod
[9009]	470	def sittype(self, value, row):
[8939]	471	if value == "nabtec":
	472	return "nabteb"
	473	return value
	474
	475	@classmethod
[9009]	476	def company(self, value, row):
[8941]	477	if value == "online":
	478	return "interswitch"
	479	return value
	480
	481	@classmethod
[9009]	482	def p_category(self, value, row):
[8941]	483	if value == "acceptance":
	484	return "clearance"
	485	return value
	486
	487	@classmethod
[9009]	488	def email(self, value, row):
[8939]	489	return value.strip()
	490
	491	@classmethod
[9009]	492	def phone(self, value, row):
[7575]	493	""" '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
[7526]	494
[7575]	495	Dashes and slashes are removed before looking for sequences
	496	of numbers.
	497	"""
[8938]	498	if not value:
	499	return
[9478]	500	if len(value) < 5:
	501	return
[9007]	502	value = value.strip('#')
[7575]	503	value = value.replace('-', '')
	504	value = value.replace('/', '')
	505	match = RE_PHONE.match(value)
	506	phone = match.groups()[0]
[9007]	507	if value.startswith('234'):
	508	value = '+' + value[:3] + '-' + value[3:]
[9478]	509	elif value.startswith('+234'):
	510	value = value[:4] + '-' + value[4:]
[9007]	511	else:
	512	value = '-%s' % phone
[8938]	513	return value + '#'
[7575]	514
[8938]	515	@classmethod
[9009]	516	def result(self, value, row):
[8938]	517	try:
	518	liste = eval(value)
	519	except:
	520	return
	521	if isinstance(liste,list):
	522	return [(i[0].lower(),i[1]) for i in liste]
	523	return
[7575]	524
[9007]	525
[7572]	526	def main():
[7573]	527	input_file = '%s' % sys.argv[1]
	528	output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
	529	reader = csv.DictReader(open(input_file, 'rb'))
[7572]	530	writer = None
[7512]	531
[7572]	532	for num, row in enumerate(reader):
	533	if num == 0:
[7573]	534	writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
[7572]	535	print "FIELDS: "
	536	for x, y in enumerate(reader.fieldnames):
	537	print x, y
	538	header = convert_fieldnames(reader.fieldnames)
	539	writer.writerow(header)
[9478]	540	#if row.get('reg_state') == 'student_created':
	541	# # We do not reimport student records which have never been accessed.
	542	# continue
[8941]	543	if row.get('status') == 'started':
[9009]	544	# We do not reimport started payments.
[8941]	545	continue
[7572]	546	for key, value in row.items():
[9009]	547	# Remove unwanted whitespaces.
	548	row[key] = row[key].strip()
[7572]	549	if not key in OPTIONS.keys():
	550	continue
	551	conv_name = OPTIONS[key]
	552	converter = getattr(Converters, conv_name, None)
	553	if converter is None:
	554	print "WARNING: cannot find converter %s" % conv_name
	555	continue
[9009]	556	row[key] = converter(row[key], row)
[7602]	557	try:
	558	writer.writerow(row)
	559	except:
	560	print row['student_id']
[7512]	561
[7573]	562	print "Output written to %s" % output_file
[7572]	563
	564
	565	if __name__ == '__main__':
	566	if len(sys.argv) != 2:
	567	print 'Usage: %s <filename>' % __file__
	568	sys.exit(1)
	569	main()

Note: See TracBrowser for help on using the repository browser.

Context navigation

source: main/waeup.kofa/trunk/tools/fix_import_file.py

Download in other formats: