Context navigation

source: main/waeup.kofa/trunk/tools/fix_import_file.py @ 9453

Last change on this file since 9453 was 9362, checked in by uli, 13 years ago
Update tools to allow new ids in imports. Support for new import col 'old_id'.
Property svn:keywords set to `Id`
File size: 15.1 KB

Rev	Line
[7512]	1	## $Id: fix_import_file.py 9362 2012-10-19 22:59:41Z uli $
	2	##
[7518]	3	## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
[7512]	4	## This program is free software; you can redistribute it and/or modify
	5	## it under the terms of the GNU General Public License as published by
	6	## the Free Software Foundation; either version 2 of the License, or
	7	## (at your option) any later version.
	8	##
	9	## This program is distributed in the hope that it will be useful,
	10	## but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	## GNU General Public License for more details.
	13	##
	14	## You should have received a copy of the GNU General Public License
	15	## along with this program; if not, write to the Free Software
	16	## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	17	##
	18	"""
[8781]	19	Fix exports from old SRP portal and other data sources to make
	20	them importable by current portal.
[7512]	21
	22	Usage:
	23
	24	Change into this directory, set the options below (files are assumed
	25	to be in the same directory) and then run
	26
[7518]	27	python fix_import_file.py <filename>
[7512]	28
	29	Errors/warnings will be displayed on the shell, the output will be put
	30	into the specified output file.
[8781]	31
	32
	33	The lgas.py module must be copied into the same folder where this script
	34	is started.
[7512]	35	"""
[7573]	36	import csv
	37	import datetime
[9362]	38	import os
[7575]	39	import re
[7573]	40	import sys
[7512]	41
[8781]	42	try:
	43	from lgas import LGAS
	44	except:
	45	print 'ERROR: lgas.py is missing.'
	46	sys.exit(1)
	47
	48	def strip(string):
	49	string = string.replace('_', '')
	50	string = string.replace('/', '')
	51	string = string.replace('-', '')
	52	string = string.replace(' ', '')
	53	string = string.lower()
	54	return string
	55
	56	LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
	57	LGAS_dict = dict(LGAS)
	58
[7512]	59	##
	60	## CONFIGURATION SECTION
	61	##
	62	# keys are fieldnames in input file, values are methods of class
	63	# Converter (see below)
	64	OPTIONS = {
[8478]	65	'student_id': 'student_id',
[7512]	66	'sex': 'gender',
	67	'birthday': 'date',
[7514]	68	'marit_stat': 'marit_stat',
[7537]	69	'session': 'session',
[7514]	70	'entry_session': 'session',
	71	'current_session': 'session',
[7602]	72	'session_id': 'session',
[7632]	73	'entry_mode': 'mode',
[7526]	74	'reg_state': 'reg_state',
	75	'password': 'password',
[7575]	76	'phone': 'phone',
[8478]	77	'nationality': 'nationality',
[7602]	78	'level': 'level',
	79	'start_level': 'level',
	80	'end_level': 'level',
	81	'level_id': 'level',
	82	'current_level': 'level',
	83	'semester': 'semester',
	84	'application_category': 'application_category',
[7610]	85	'lga': 'lga',
[7628]	86	'order_id': 'no_int',
[8938]	87	'uniben': 'former',
	88	'nysc_year': 'year',
	89	'alr_date': 'date',
	90	'fst_sit_date': 'date',
	91	'scd_sit_date': 'date',
	92	'emp_start': 'date',
	93	'emp_end': 'date',
	94	'emp_start2': 'date',
	95	'emp_end2': 'date',
	96	'fst_sit_results': 'result',
	97	'scd_sit_results': 'result',
	98	'alr_results': 'result',
[8939]	99	'email': 'email',
	100	'fst_sit_type': 'sittype',
	101	'scd_sit_type': 'sittype',
[8941]	102	'resp_pay_reference': 'no_int',
	103	'type': 'company',
	104	'date': 'date',
[9194]	105	'core_or_elective': 'bool',
[8945]	106	'category': 'p_category',
[9007]	107	'reg_transition': 'reg_state', # we completely change this column,
	108	# since reg_state import is usually intended
	109	'transition': 'reg_transition',
[9158]	110	'payment_date': 'date',
[9160]	111	'validation_date': 'date',
[7512]	112	}
[7516]	113
	114	# Mapping input file colnames --> output file colnames
	115	COLNAME_MAPPING = {
[7666]	116	# base data
[9009]	117	'id': 'student_id',
[8478]	118	'reg_state': 'state',
[9007]	119	'reg_transition': 'state',
[7518]	120	'jamb_reg_no': 'reg_number',
[7645]	121	'matric_no': 'matric_number',
[7516]	122	'birthday': 'date_of_birth',
[7526]	123	'clr_ac_pin': 'clr_code',
[8941]	124	# clearance
	125	'hq_grade': 'hq_degree',
	126	'uniben': 'former_matric',
	127	'hq_type2': 'hq2_type',
	128	'hq_grade2': 'hq2_degree',
	129	'hq_school2': 'hq2_school',
	130	'hq_matric_no2': 'hq2_matric_no',
	131	'hq_session2': 'hq2_session',
	132	'hq_disc2': 'hq2_disc',
	133	'emp': 'employer',
	134	'emp2': 'employer2',
	135	'emp_position2': 'emp2_position',
	136	'emp_start2': 'emp2_start',
	137	'emp_end2': 'emp2_end',
	138	'emp_reason2': 'emp2_reason',
[7550]	139	# study course
[7530]	140	'study_course': 'certificate',
[7550]	141	# study level
[7537]	142	'session': 'level_session',
	143	'verdict': 'level_verdict',
[7550]	144	# course ticket
	145	'level_id': 'level',
[7666]	146	'core_or_elective': 'mandatory',
[7628]	147	# payment ticket
	148	'order_id': 'p_id',
	149	'status': 'p_state',
	150	'category': 'p_category',
	151	'resp_pay_reference': 'r_pay_reference',
	152	'resp_desc': 'r_desc',
	153	'resp_approved_amount': 'r_amount_approved',
	154	'item': 'p_item',
	155	'amount': 'amount_auth',
	156	'resp_card_num': 'r_card_num',
	157	'resp_code': 'r_code',
	158	'date': 'creation_date',
	159	'surcharge': 'surcharge_1',
	160	'session_id': 'p_session',
[8941]	161	'type': 'r_company',
[9362]	162	'old_id': 'old_id',
[7516]	163	}
[7526]	164
[8478]	165	# Mapping input state --> output state
[7526]	166	REGSTATE_MAPPING = {
	167	'student_created': 'created',
	168	'admitted': 'admitted',
[7610]	169	'objection_raised': 'clearance started',
[7526]	170	'clearance_pin_entered': 'clearance started',
	171	'clearance_requested': 'clearance requested',
	172	'cleared_and_validated': 'cleared',
	173	'school_fee_paid': 'school fee paid',
	174	'returning': 'returning',
	175	'courses_registered': 'courses registered',
	176	'courses_validated': 'courses validated',
[9007]	177	'admit': 'admitted',
	178	'return': 'returning'
[7526]	179	}
	180
[9362]	181	# Mapping of special cases, where new id is not deductible from old id
	182	# Set to `None`, if no such special cases should be considered.
	183	ID_MAP_CSV = None
	184	ID_MAP_CSV = "id_mapping.csv"
	185
[7512]	186	##
	187	## END OF CONFIG
	188	##
	189
[7575]	190	# Look for the first sequence of numbers
	191	RE_PHONE = re.compile('[^\d](\d)[^\d]*')
	192
[9362]	193	def get_id_mapping():
	194	"""Returns a dict mapping from old (SRP) ids to new ids.
	195
	196	The dict is read from ID_MAP_CSV file. If this var is set to
	197	``None`` an empty dict is returned. The ID_MAP_CSV contains only
	198	the student ids of those students, for which the standard method
	199	(new_id=CHAR+old_id) does not work.
	200	"""
	201	if ID_MAP_CSV is None:
	202	return {}
	203	if not os.path.isfile(ID_MAP_CSV):
	204	raise IOError(
	205	"No such file for mapping old to new ids: %s" % ID_MAP_CSV)
	206	result = dict()
	207	reader = csv.DictReader(open(ID_MAP_CSV, 'rb'))
	208	for row in reader:
	209	result[row['student_id']] = row['new_id']
	210	return result
	211
	212
[7516]	213	def convert_fieldnames(fieldnames):
	214	"""Replace input fieldnames by fieldnames of COLNAME_MAPPING.
	215	"""
[9009]	216	# Remove whitespaces
[9019]	217	header = dict([(name, name.strip()) for name in fieldnames])
[7516]	218	for in_name, out_name in COLNAME_MAPPING.items():
[9019]	219	if in_name not in header.values():
[7516]	220	continue
[9019]	221	# Inverse dictionary lookup
	222	key = [key for key,value in header.items() if value==in_name][0]
	223	header[key] = out_name
[7514]	224	return header
	225
[7512]	226	class Converters():
	227	"""Converters to turn old-style values into new ones.
	228	"""
[8478]	229
[9362]	230	old_new_id_map = get_id_mapping()
	231
[7512]	232	@classmethod
[9362]	233	def student_id(cls, value, row):
[8478]	234	""" 'A123456' --> 'EA123456'
	235	"""
[9362]	236	value = cls.old_new_id_map.get(value, value)
[8478]	237	if len(value) == 7:
[9270]	238	return 'M' + value
[8478]	239	return value
	240
	241	@classmethod
[9009]	242	def reg_state(self, value, row):
[7526]	243	""" 'courses_validated' --> 'courses validated'
	244	"""
	245	return REGSTATE_MAPPING.get(value,value)
	246
	247	@classmethod
[9009]	248	def reg_transition(self, value, row):
[9007]	249	if value == "admitted":
	250	return "admit"
	251	if value == "returning":
	252	return "return"
	253	return value
	254
	255	@classmethod
[9009]	256	def level(self, value, row):
[7602]	257	""" '000' --> '10'
[9009]	258	'800' --> '999' if pg student
[7602]	259	"""
	260	try:
	261	number = int(value)
	262	except ValueError:
	263	return 9999
	264	if number == 0:
	265	return 10
[9158]	266	if row.get('entry_mode') and row.get('entry_mode').startswith('pg'):
[9009]	267	return 999
[7602]	268	return number
	269
	270	@classmethod
[9009]	271	def semester(self, value, row):
[7602]	272	""" '0' --> '9'
	273	"""
	274	try:
	275	number = int(value)
	276	except ValueError:
	277	return 9999
	278	if number == 0:
	279	return 9
	280	return number
	281
	282	@classmethod
[9009]	283	def application_category(self, value, row):
[7602]	284	""" '' --> 'no'
	285	"""
	286	if value == '':
	287	return 'no'
	288	return value
	289
[7610]	290	@classmethod
[9009]	291	def lga(self, value, row):
[7610]	292	""" Remove apostrophe
	293	"""
	294	if value == 'akwa_ibom_uru_offong_oruko':
	295	return 'akwa_ibom_urue-offong-oruko'
[8939]	296	if value == 'edo_ohionmwon':
	297	return 'edo_orhionmwon'
	298
	299	if value == 'nassarawa_nassarawa':
	300	return 'nassarawa_nassawara'
	301
	302	if value == 'kogi_mopa-muro-mopi':
	303	return 'kogi_mopa-muro'
	304
	305	if value == 'delta_osimili-north':
	306	return 'delta_oshielli-north'
	307
	308	if value == 'delta_osimili':
	309	return 'delta_oshimili'
	310
	311	if value == 'delta_osimili-south':
	312	return 'delta_oshimili-south'
[7610]	313	try:
	314	value = value.replace("'","")
	315	except:
	316	return ''
[8784]	317	lower = value.lower()
	318	if lower in LGAS_dict.keys():
	319	return lower
[8781]	320	# If real names are given, let's see if a similar value
	321	# in LGAS exist.
[8784]	322	value = LGAS_inverted_stripped.get(strip(lower), value)
[7610]	323	return value
[7602]	324
[7610]	325
[7602]	326	@classmethod
[9009]	327	def session(self, value, row):
[7514]	328	""" '08' --> '2008'
[8478]	329	'2008/2009' --> '2008'
[7514]	330	"""
[8478]	331	if '/' in value:
	332	numbers = value.split('/')
	333	number = int(numbers[0])
	334	if number in range(2000,2015):
	335	return number
	336	else:
	337	return 9999
[7514]	338	try:
	339	number = int(value)
	340	except ValueError:
	341	return 9999
	342	if number < 14:
	343	return number + 2000
	344	elif number in range(2000,2015):
	345	return number
	346	else:
	347	return 9999
	348
	349	@classmethod
[9009]	350	def former(self, value, row):
[8938]	351	""" True --> yes
	352	'2008/2009' --> '2008'
	353	"""
	354	if value == 'True':
	355	return 'yes'
	356	return
	357
	358	@classmethod
[9194]	359	def bool(self, value, row):
	360	""" True --> 1
	361	"""
	362	if value in ('TRUE', 'True'):
	363	return '1'
	364	elif value in ('FALSE', 'False'):
	365	return '0'
	366	return
	367
	368	@classmethod
[9009]	369	def year(self, value, row):
[8938]	370	""" '0' --> ''
	371	"""
	372	if value == '0':
	373	return
	374	if value == 'None':
	375	return
	376	return value
	377
	378
	379	@classmethod
[9009]	380	def marit_stat(self, value, row):
[7514]	381	""" 'True'/'False' --> 'married'/'unmarried'
	382	"""
[7610]	383	if value in ('True','married'):
[7514]	384	value = 'married'
[7610]	385	elif value in ('False','unmarried'):
[7514]	386	value = 'unmarried'
	387	else:
	388	value = ''
	389	return value
	390
	391	@classmethod
[9009]	392	def gender(self, value, row):
[7526]	393	""" 'True'/'False' --> 'f'/'m'
[7514]	394	"""
[8785]	395	if value.strip() in ('F', 'True','f'):
[7526]	396	value = 'f'
[8785]	397	elif value.strip() in ('M', 'False','m'):
[7526]	398	value = 'm'
[7514]	399	else:
	400	value = ''
	401	return value
	402
	403	@classmethod
[9009]	404	def date(self, value, row):
[7512]	405	""" 'yyyy/mm/dd' --> 'yyyy-mm-dd'
	406	"""
	407	if value == "None":
	408	value = ""
	409	elif value == "":
	410	value = ""
	411	else:
	412	value = value.replace('/', '-')
[7514]	413	# We add the hash symbol to avoid automatic date transformation
	414	# in Excel and Calc for further processing
	415	value += '#'
[7512]	416	return value
	417
	418	@classmethod
[9009]	419	def no_int(self, value, row):
[8941]	420	""" Add hash and skip numbers starting with 999999
[7628]	421	"""
	422	# We add the hash symbol to avoid automatic number transformation
	423	# in Excel and Calc for further processing
[8941]	424	try:
	425	intvalue = int(value)
	426	value += '#'
	427	except:
	428	pass
	429	if value.startswith('999999'):
	430	return
[7628]	431	return value
	432
	433	@classmethod
[9009]	434	def mode(self, value, row):
[7632]	435	if value == "transfer_fulltime":
	436	return "transfer_ft"
[8522]	437	if value == "ume_ft":
	438	return "utme_ft"
[7632]	439	return value
	440
	441	@classmethod
[9009]	442	def password(self, value, row):
[7526]	443	if value == "not set":
	444	return ""
	445	return value
	446
[7575]	447	@classmethod
[9009]	448	def nationality(self, value, row):
[9270]	449	if value in ('nigeria', 'Nigeria'):
[8478]	450	return "NG"
[9270]	451	if value in ('niger', 'Niger'):
[8939]	452	return "NE"
[8478]	453	return value
	454
	455	@classmethod
[9009]	456	def sittype(self, value, row):
[8939]	457	if value == "nabtec":
	458	return "nabteb"
	459	return value
	460
	461	@classmethod
[9009]	462	def company(self, value, row):
[8941]	463	if value == "online":
	464	return "interswitch"
	465	return value
	466
	467	@classmethod
[9009]	468	def p_category(self, value, row):
[8941]	469	if value == "acceptance":
	470	return "clearance"
	471	return value
	472
	473	@classmethod
[9009]	474	def email(self, value, row):
[8939]	475	return value.strip()
	476
	477	@classmethod
[9009]	478	def phone(self, value, row):
[7575]	479	""" '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
[7526]	480
[7575]	481	Dashes and slashes are removed before looking for sequences
	482	of numbers.
	483	"""
[8938]	484	if not value:
	485	return
[9007]	486	value = value.strip('#')
[7575]	487	value = value.replace('-', '')
	488	value = value.replace('/', '')
	489	match = RE_PHONE.match(value)
	490	phone = match.groups()[0]
[9007]	491	if value.startswith('234'):
	492	value = '+' + value[:3] + '-' + value[3:]
	493	else:
	494	value = '-%s' % phone
[8938]	495	return value + '#'
[7575]	496
[8938]	497	@classmethod
[9009]	498	def result(self, value, row):
[8938]	499	try:
	500	liste = eval(value)
	501	except:
	502	return
	503	if isinstance(liste,list):
	504	return [(i[0].lower(),i[1]) for i in liste]
	505	return
[7575]	506
[9007]	507
[7572]	508	def main():
[7573]	509	input_file = '%s' % sys.argv[1]
	510	output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
	511	reader = csv.DictReader(open(input_file, 'rb'))
[7572]	512	writer = None
[7512]	513
[7572]	514	for num, row in enumerate(reader):
	515	if num == 0:
[7573]	516	writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
[7572]	517	print "FIELDS: "
	518	for x, y in enumerate(reader.fieldnames):
	519	print x, y
	520	header = convert_fieldnames(reader.fieldnames)
	521	writer.writerow(header)
[8939]	522	if row.get('reg_state') == 'student_created':
[9009]	523	# We do not reimport student records which have never been accessed.
[8939]	524	continue
[8941]	525	if row.get('status') == 'started':
[9009]	526	# We do not reimport started payments.
[8941]	527	continue
[7572]	528	for key, value in row.items():
[9009]	529	# Remove unwanted whitespaces.
	530	row[key] = row[key].strip()
[7572]	531	if not key in OPTIONS.keys():
	532	continue
	533	conv_name = OPTIONS[key]
	534	converter = getattr(Converters, conv_name, None)
	535	if converter is None:
	536	print "WARNING: cannot find converter %s" % conv_name
	537	continue
[9009]	538	row[key] = converter(row[key], row)
[7602]	539	try:
	540	writer.writerow(row)
	541	except:
	542	print row['student_id']
[7512]	543
[7573]	544	print "Output written to %s" % output_file
[7572]	545
	546
	547	if __name__ == '__main__':
	548	if len(sys.argv) != 2:
	549	print 'Usage: %s <filename>' % __file__
	550	sys.exit(1)
	551	main()

Note: See TracBrowser for help on using the repository browser.

Download in other formats: