Context navigation

source: main/waeup.kofa/trunk/tools/fix_import_file.py @ 9423

Last change on this file since 9423 was 9362, checked in by uli, 12 years ago
Update tools to allow new ids in imports. Support for new import col 'old_id'.
Property svn:keywords set to `Id`
File size: 15.1 KB

Line
1	## $Id: fix_import_file.py 9362 2012-10-19 22:59:41Z uli $
2	##
3	## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
4	## This program is free software; you can redistribute it and/or modify
5	## it under the terms of the GNU General Public License as published by
6	## the Free Software Foundation; either version 2 of the License, or
7	## (at your option) any later version.
8	##
9	## This program is distributed in the hope that it will be useful,
10	## but WITHOUT ANY WARRANTY; without even the implied warranty of
11	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	## GNU General Public License for more details.
13	##
14	## You should have received a copy of the GNU General Public License
15	## along with this program; if not, write to the Free Software
16	## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17	##
18	"""
19	Fix exports from old SRP portal and other data sources to make
20	them importable by current portal.
21
22	Usage:
23
24	Change into this directory, set the options below (files are assumed
25	to be in the same directory) and then run
26
27	python fix_import_file.py <filename>
28
29	Errors/warnings will be displayed on the shell, the output will be put
30	into the specified output file.
31
32
33	The lgas.py module must be copied into the same folder where this script
34	is started.
35	"""
36	import csv
37	import datetime
38	import os
39	import re
40	import sys
41
42	try:
43	from lgas import LGAS
44	except:
45	print 'ERROR: lgas.py is missing.'
46	sys.exit(1)
47
48	def strip(string):
49	string = string.replace('_', '')
50	string = string.replace('/', '')
51	string = string.replace('-', '')
52	string = string.replace(' ', '')
53	string = string.lower()
54	return string
55
56	LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
57	LGAS_dict = dict(LGAS)
58
59	##
60	## CONFIGURATION SECTION
61	##
62	# keys are fieldnames in input file, values are methods of class
63	# Converter (see below)
64	OPTIONS = {
65	'student_id': 'student_id',
66	'sex': 'gender',
67	'birthday': 'date',
68	'marit_stat': 'marit_stat',
69	'session': 'session',
70	'entry_session': 'session',
71	'current_session': 'session',
72	'session_id': 'session',
73	'entry_mode': 'mode',
74	'reg_state': 'reg_state',
75	'password': 'password',
76	'phone': 'phone',
77	'nationality': 'nationality',
78	'level': 'level',
79	'start_level': 'level',
80	'end_level': 'level',
81	'level_id': 'level',
82	'current_level': 'level',
83	'semester': 'semester',
84	'application_category': 'application_category',
85	'lga': 'lga',
86	'order_id': 'no_int',
87	'uniben': 'former',
88	'nysc_year': 'year',
89	'alr_date': 'date',
90	'fst_sit_date': 'date',
91	'scd_sit_date': 'date',
92	'emp_start': 'date',
93	'emp_end': 'date',
94	'emp_start2': 'date',
95	'emp_end2': 'date',
96	'fst_sit_results': 'result',
97	'scd_sit_results': 'result',
98	'alr_results': 'result',
99	'email': 'email',
100	'fst_sit_type': 'sittype',
101	'scd_sit_type': 'sittype',
102	'resp_pay_reference': 'no_int',
103	'type': 'company',
104	'date': 'date',
105	'core_or_elective': 'bool',
106	'category': 'p_category',
107	'reg_transition': 'reg_state', # we completely change this column,
108	# since reg_state import is usually intended
109	'transition': 'reg_transition',
110	'payment_date': 'date',
111	'validation_date': 'date',
112	}
113
114	# Mapping input file colnames --> output file colnames
115	COLNAME_MAPPING = {
116	# base data
117	'id': 'student_id',
118	'reg_state': 'state',
119	'reg_transition': 'state',
120	'jamb_reg_no': 'reg_number',
121	'matric_no': 'matric_number',
122	'birthday': 'date_of_birth',
123	'clr_ac_pin': 'clr_code',
124	# clearance
125	'hq_grade': 'hq_degree',
126	'uniben': 'former_matric',
127	'hq_type2': 'hq2_type',
128	'hq_grade2': 'hq2_degree',
129	'hq_school2': 'hq2_school',
130	'hq_matric_no2': 'hq2_matric_no',
131	'hq_session2': 'hq2_session',
132	'hq_disc2': 'hq2_disc',
133	'emp': 'employer',
134	'emp2': 'employer2',
135	'emp_position2': 'emp2_position',
136	'emp_start2': 'emp2_start',
137	'emp_end2': 'emp2_end',
138	'emp_reason2': 'emp2_reason',
139	# study course
140	'study_course': 'certificate',
141	# study level
142	'session': 'level_session',
143	'verdict': 'level_verdict',
144	# course ticket
145	'level_id': 'level',
146	'core_or_elective': 'mandatory',
147	# payment ticket
148	'order_id': 'p_id',
149	'status': 'p_state',
150	'category': 'p_category',
151	'resp_pay_reference': 'r_pay_reference',
152	'resp_desc': 'r_desc',
153	'resp_approved_amount': 'r_amount_approved',
154	'item': 'p_item',
155	'amount': 'amount_auth',
156	'resp_card_num': 'r_card_num',
157	'resp_code': 'r_code',
158	'date': 'creation_date',
159	'surcharge': 'surcharge_1',
160	'session_id': 'p_session',
161	'type': 'r_company',
162	'old_id': 'old_id',
163	}
164
165	# Mapping input state --> output state
166	REGSTATE_MAPPING = {
167	'student_created': 'created',
168	'admitted': 'admitted',
169	'objection_raised': 'clearance started',
170	'clearance_pin_entered': 'clearance started',
171	'clearance_requested': 'clearance requested',
172	'cleared_and_validated': 'cleared',
173	'school_fee_paid': 'school fee paid',
174	'returning': 'returning',
175	'courses_registered': 'courses registered',
176	'courses_validated': 'courses validated',
177	'admit': 'admitted',
178	'return': 'returning'
179	}
180
181	# Mapping of special cases, where new id is not deductible from old id
182	# Set to `None`, if no such special cases should be considered.
183	ID_MAP_CSV = None
184	ID_MAP_CSV = "id_mapping.csv"
185
186	##
187	## END OF CONFIG
188	##
189
190	# Look for the first sequence of numbers
191	RE_PHONE = re.compile('[^\d](\d)[^\d]*')
192
193	def get_id_mapping():
194	"""Returns a dict mapping from old (SRP) ids to new ids.
195
196	The dict is read from ID_MAP_CSV file. If this var is set to
197	``None`` an empty dict is returned. The ID_MAP_CSV contains only
198	the student ids of those students, for which the standard method
199	(new_id=CHAR+old_id) does not work.
200	"""
201	if ID_MAP_CSV is None:
202	return {}
203	if not os.path.isfile(ID_MAP_CSV):
204	raise IOError(
205	"No such file for mapping old to new ids: %s" % ID_MAP_CSV)
206	result = dict()
207	reader = csv.DictReader(open(ID_MAP_CSV, 'rb'))
208	for row in reader:
209	result[row['student_id']] = row['new_id']
210	return result
211
212
213	def convert_fieldnames(fieldnames):
214	"""Replace input fieldnames by fieldnames of COLNAME_MAPPING.
215	"""
216	# Remove whitespaces
217	header = dict([(name, name.strip()) for name in fieldnames])
218	for in_name, out_name in COLNAME_MAPPING.items():
219	if in_name not in header.values():
220	continue
221	# Inverse dictionary lookup
222	key = [key for key,value in header.items() if value==in_name][0]
223	header[key] = out_name
224	return header
225
226	class Converters():
227	"""Converters to turn old-style values into new ones.
228	"""
229
230	old_new_id_map = get_id_mapping()
231
232	@classmethod
233	def student_id(cls, value, row):
234	""" 'A123456' --> 'EA123456'
235	"""
236	value = cls.old_new_id_map.get(value, value)
237	if len(value) == 7:
238	return 'M' + value
239	return value
240
241	@classmethod
242	def reg_state(self, value, row):
243	""" 'courses_validated' --> 'courses validated'
244	"""
245	return REGSTATE_MAPPING.get(value,value)
246
247	@classmethod
248	def reg_transition(self, value, row):
249	if value == "admitted":
250	return "admit"
251	if value == "returning":
252	return "return"
253	return value
254
255	@classmethod
256	def level(self, value, row):
257	""" '000' --> '10'
258	'800' --> '999' if pg student
259	"""
260	try:
261	number = int(value)
262	except ValueError:
263	return 9999
264	if number == 0:
265	return 10
266	if row.get('entry_mode') and row.get('entry_mode').startswith('pg'):
267	return 999
268	return number
269
270	@classmethod
271	def semester(self, value, row):
272	""" '0' --> '9'
273	"""
274	try:
275	number = int(value)
276	except ValueError:
277	return 9999
278	if number == 0:
279	return 9
280	return number
281
282	@classmethod
283	def application_category(self, value, row):
284	""" '' --> 'no'
285	"""
286	if value == '':
287	return 'no'
288	return value
289
290	@classmethod
291	def lga(self, value, row):
292	""" Remove apostrophe
293	"""
294	if value == 'akwa_ibom_uru_offong_oruko':
295	return 'akwa_ibom_urue-offong-oruko'
296	if value == 'edo_ohionmwon':
297	return 'edo_orhionmwon'
298
299	if value == 'nassarawa_nassarawa':
300	return 'nassarawa_nassawara'
301
302	if value == 'kogi_mopa-muro-mopi':
303	return 'kogi_mopa-muro'
304
305	if value == 'delta_osimili-north':
306	return 'delta_oshielli-north'
307
308	if value == 'delta_osimili':
309	return 'delta_oshimili'
310
311	if value == 'delta_osimili-south':
312	return 'delta_oshimili-south'
313	try:
314	value = value.replace("'","")
315	except:
316	return ''
317	lower = value.lower()
318	if lower in LGAS_dict.keys():
319	return lower
320	# If real names are given, let's see if a similar value
321	# in LGAS exist.
322	value = LGAS_inverted_stripped.get(strip(lower), value)
323	return value
324
325
326	@classmethod
327	def session(self, value, row):
328	""" '08' --> '2008'
329	'2008/2009' --> '2008'
330	"""
331	if '/' in value:
332	numbers = value.split('/')
333	number = int(numbers[0])
334	if number in range(2000,2015):
335	return number
336	else:
337	return 9999
338	try:
339	number = int(value)
340	except ValueError:
341	return 9999
342	if number < 14:
343	return number + 2000
344	elif number in range(2000,2015):
345	return number
346	else:
347	return 9999
348
349	@classmethod
350	def former(self, value, row):
351	""" True --> yes
352	'2008/2009' --> '2008'
353	"""
354	if value == 'True':
355	return 'yes'
356	return
357
358	@classmethod
359	def bool(self, value, row):
360	""" True --> 1
361	"""
362	if value in ('TRUE', 'True'):
363	return '1'
364	elif value in ('FALSE', 'False'):
365	return '0'
366	return
367
368	@classmethod
369	def year(self, value, row):
370	""" '0' --> ''
371	"""
372	if value == '0':
373	return
374	if value == 'None':
375	return
376	return value
377
378
379	@classmethod
380	def marit_stat(self, value, row):
381	""" 'True'/'False' --> 'married'/'unmarried'
382	"""
383	if value in ('True','married'):
384	value = 'married'
385	elif value in ('False','unmarried'):
386	value = 'unmarried'
387	else:
388	value = ''
389	return value
390
391	@classmethod
392	def gender(self, value, row):
393	""" 'True'/'False' --> 'f'/'m'
394	"""
395	if value.strip() in ('F', 'True','f'):
396	value = 'f'
397	elif value.strip() in ('M', 'False','m'):
398	value = 'm'
399	else:
400	value = ''
401	return value
402
403	@classmethod
404	def date(self, value, row):
405	""" 'yyyy/mm/dd' --> 'yyyy-mm-dd'
406	"""
407	if value == "None":
408	value = ""
409	elif value == "":
410	value = ""
411	else:
412	value = value.replace('/', '-')
413	# We add the hash symbol to avoid automatic date transformation
414	# in Excel and Calc for further processing
415	value += '#'
416	return value
417
418	@classmethod
419	def no_int(self, value, row):
420	""" Add hash and skip numbers starting with 999999
421	"""
422	# We add the hash symbol to avoid automatic number transformation
423	# in Excel and Calc for further processing
424	try:
425	intvalue = int(value)
426	value += '#'
427	except:
428	pass
429	if value.startswith('999999'):
430	return
431	return value
432
433	@classmethod
434	def mode(self, value, row):
435	if value == "transfer_fulltime":
436	return "transfer_ft"
437	if value == "ume_ft":
438	return "utme_ft"
439	return value
440
441	@classmethod
442	def password(self, value, row):
443	if value == "not set":
444	return ""
445	return value
446
447	@classmethod
448	def nationality(self, value, row):
449	if value in ('nigeria', 'Nigeria'):
450	return "NG"
451	if value in ('niger', 'Niger'):
452	return "NE"
453	return value
454
455	@classmethod
456	def sittype(self, value, row):
457	if value == "nabtec":
458	return "nabteb"
459	return value
460
461	@classmethod
462	def company(self, value, row):
463	if value == "online":
464	return "interswitch"
465	return value
466
467	@classmethod
468	def p_category(self, value, row):
469	if value == "acceptance":
470	return "clearance"
471	return value
472
473	@classmethod
474	def email(self, value, row):
475	return value.strip()
476
477	@classmethod
478	def phone(self, value, row):
479	""" '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
480
481	Dashes and slashes are removed before looking for sequences
482	of numbers.
483	"""
484	if not value:
485	return
486	value = value.strip('#')
487	value = value.replace('-', '')
488	value = value.replace('/', '')
489	match = RE_PHONE.match(value)
490	phone = match.groups()[0]
491	if value.startswith('234'):
492	value = '+' + value[:3] + '-' + value[3:]
493	else:
494	value = '-%s' % phone
495	return value + '#'
496
497	@classmethod
498	def result(self, value, row):
499	try:
500	liste = eval(value)
501	except:
502	return
503	if isinstance(liste,list):
504	return [(i[0].lower(),i[1]) for i in liste]
505	return
506
507
508	def main():
509	input_file = '%s' % sys.argv[1]
510	output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
511	reader = csv.DictReader(open(input_file, 'rb'))
512	writer = None
513
514	for num, row in enumerate(reader):
515	if num == 0:
516	writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
517	print "FIELDS: "
518	for x, y in enumerate(reader.fieldnames):
519	print x, y
520	header = convert_fieldnames(reader.fieldnames)
521	writer.writerow(header)
522	if row.get('reg_state') == 'student_created':
523	# We do not reimport student records which have never been accessed.
524	continue
525	if row.get('status') == 'started':
526	# We do not reimport started payments.
527	continue
528	for key, value in row.items():
529	# Remove unwanted whitespaces.
530	row[key] = row[key].strip()
531	if not key in OPTIONS.keys():
532	continue
533	conv_name = OPTIONS[key]
534	converter = getattr(Converters, conv_name, None)
535	if converter is None:
536	print "WARNING: cannot find converter %s" % conv_name
537	continue
538	row[key] = converter(row[key], row)
539	try:
540	writer.writerow(row)
541	except:
542	print row['student_id']
543
544	print "Output written to %s" % output_file
545
546
547	if __name__ == '__main__':
548	if len(sys.argv) != 2:
549	print 'Usage: %s <filename>' % __file__
550	sys.exit(1)
551	main()

Note: See TracBrowser for help on using the repository browser.

Download in other formats: