## $Id: fix_import_file.py 8941 2012-07-08 08:58:45Z henrik $ ## ## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ## """ Fix exports from old SRP portal and other data sources to make them importable by current portal. Usage: Change into this directory, set the options below (files are assumed to be in the same directory) and then run python fix_import_file.py Errors/warnings will be displayed on the shell, the output will be put into the specified output file. The lgas.py module must be copied into the same folder where this script is started. """ import csv import datetime import re import sys try: from lgas import LGAS except: print 'ERROR: lgas.py is missing.' sys.exit(1) def strip(string): string = string.replace('_', '') string = string.replace('/', '') string = string.replace('-', '') string = string.replace(' ', '') string = string.lower() return string LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS]) LGAS_dict = dict(LGAS) ## ## CONFIGURATION SECTION ## # keys are fieldnames in input file, values are methods of class # Converter (see below) OPTIONS = { 'student_id': 'student_id', 'sex': 'gender', 'birthday': 'date', 'request_date': 'datetime', 'marit_stat': 'marit_stat', 'session': 'session', 'entry_session': 'session', 'current_session': 'session', 'session_id': 'session', 'entry_mode': 'mode', 'reg_state': 'reg_state', 'password': 'password', 'phone': 'phone', 'nationality': 'nationality', 'level': 'level', 'start_level': 'level', 'end_level': 'level', 'level_id': 'level', 'current_level': 'level', 'semester': 'semester', 'application_category': 'application_category', 'lga': 'lga', 'order_id': 'no_int', 'uniben': 'former', 'nysc_year': 'year', 'alr_date': 'date', 'fst_sit_date': 'date', 'scd_sit_date': 'date', 'emp_start': 'date', 'emp_end': 'date', 'emp_start2': 'date', 'emp_end2': 'date', 'fst_sit_results': 'result', 'scd_sit_results': 'result', 'alr_results': 'result', 'email': 'email', 'fst_sit_type': 'sittype', 'scd_sit_type': 'sittype', 'resp_pay_reference': 'no_int', 'type': 'company', 'date': 'date', 'p_category': 'p_category', } # Mapping input file colnames --> output file colnames COLNAME_MAPPING = { # base data 'reg_state': 'state', 'jamb_reg_no': 'reg_number', 'matric_no': 'matric_number', 'birthday': 'date_of_birth', 'clr_ac_pin': 'clr_code', # clearance 'hq_grade': 'hq_degree', 'uniben': 'former_matric', 'hq_type2': 'hq2_type', 'hq_grade2': 'hq2_degree', 'hq_school2': 'hq2_school', 'hq_matric_no2': 'hq2_matric_no', 'hq_session2': 'hq2_session', 'hq_disc2': 'hq2_disc', 'emp': 'employer', 'emp2': 'employer2', 'emp_position2': 'emp2_position', 'emp_start2': 'emp2_start', 'emp_end2': 'emp2_end', 'emp_reason2': 'emp2_reason', # study course 'study_course': 'certificate', # study level 'session': 'level_session', 'verdict': 'level_verdict', # course ticket 'level_id': 'level', 'core_or_elective': 'mandatory', # payment ticket 'order_id': 'p_id', 'status': 'p_state', 'category': 'p_category', 'resp_pay_reference': 'r_pay_reference', 'resp_desc': 'r_desc', 'resp_approved_amount': 'r_amount_approved', 'item': 'p_item', 'amount': 'amount_auth', 'resp_card_num': 'r_card_num', 'resp_code': 'r_code', 'date': 'creation_date', 'surcharge': 'surcharge_1', 'session_id': 'p_session', 'type': 'r_company', } # Mapping input state --> output state REGSTATE_MAPPING = { 'student_created': 'created', 'admitted': 'admitted', 'objection_raised': 'clearance started', 'clearance_pin_entered': 'clearance started', 'clearance_requested': 'clearance requested', 'cleared_and_validated': 'cleared', 'school_fee_paid': 'school fee paid', 'returning': 'returning', 'courses_registered': 'courses registered', 'courses_validated': 'courses validated', } ## ## END OF CONFIG ## # Look for the first sequence of numbers RE_PHONE = re.compile('[^\d]*(\d*)[^\d]*') def convert_fieldnames(fieldnames): """Replace input fieldnames by fieldnames of COLNAME_MAPPING. """ header = dict([(name, name) for name in fieldnames]) for in_name, out_name in COLNAME_MAPPING.items(): if in_name not in header: continue header[in_name] = out_name return header class Converters(): """Converters to turn old-style values into new ones. """ @classmethod def student_id(self, value): """ 'A123456' --> 'EA123456' """ if len(value) == 7: return 'E' + value return value @classmethod def reg_state(self, value): """ 'courses_validated' --> 'courses validated' """ return REGSTATE_MAPPING.get(value,value) @classmethod def level(self, value): """ '000' --> '10' """ try: number = int(value) except ValueError: return 9999 if number == 0: return 10 return number @classmethod def semester(self, value): """ '0' --> '9' """ try: number = int(value) except ValueError: return 9999 if number == 0: return 9 return number @classmethod def application_category(self, value): """ '' --> 'no' """ if value == '': return 'no' return value @classmethod def lga(self, value): """ Remove apostrophe """ if value == 'akwa_ibom_uru_offong_oruko': return 'akwa_ibom_urue-offong-oruko' if value == 'edo_ohionmwon': return 'edo_orhionmwon' if value == 'nassarawa_nassarawa': return 'nassarawa_nassawara' if value == 'kogi_mopa-muro-mopi': return 'kogi_mopa-muro' if value == 'delta_osimili-north': return 'delta_oshielli-north' if value == 'delta_osimili': return 'delta_oshimili' if value == 'delta_osimili-south': return 'delta_oshimili-south' try: value = value.replace("'","") except: return '' lower = value.lower() if lower in LGAS_dict.keys(): return lower # If real names are given, let's see if a similar value # in LGAS exist. value = LGAS_inverted_stripped.get(strip(lower), value) return value @classmethod def session(self, value): """ '08' --> '2008' '2008/2009' --> '2008' """ if '/' in value: numbers = value.split('/') number = int(numbers[0]) if number in range(2000,2015): return number else: return 9999 try: number = int(value) except ValueError: #import pdb; pdb.set_trace() return 9999 if number < 14: return number + 2000 elif number in range(2000,2015): return number else: return 9999 @classmethod def former(self, value): """ True --> yes '2008/2009' --> '2008' """ if value == 'True': return 'yes' return @classmethod def year(self, value): """ '0' --> '' """ if value == '0': return if value == 'None': return return value @classmethod def marit_stat(self, value): """ 'True'/'False' --> 'married'/'unmarried' """ if value in ('True','married'): value = 'married' elif value in ('False','unmarried'): value = 'unmarried' else: value = '' return value @classmethod def gender(self, value): """ 'True'/'False' --> 'f'/'m' """ if value.strip() in ('F', 'True','f'): value = 'f' elif value.strip() in ('M', 'False','m'): value = 'm' else: value = '' return value @classmethod def date(self, value): """ 'yyyy/mm/dd' --> 'yyyy-mm-dd' """ if value == "None": value = "" elif value == "": value = "" else: value = value.replace('/', '-') # We add the hash symbol to avoid automatic date transformation # in Excel and Calc for further processing value += '#' return value @classmethod def no_int(self, value): """ Add hash and skip numbers starting with 999999 """ # We add the hash symbol to avoid automatic number transformation # in Excel and Calc for further processing try: intvalue = int(value) value += '#' except: pass if value.startswith('999999'): return return value @classmethod def datetime(self, value): """ 'yyyy/mm/dd' --> 'yyyy-mm-dd' """ #print "IN: ", value if value == "None": value = "" elif value == "": value = "" else: #value = datetime.datetime.strptime(value, '%Y/%m/%d') #value = datetime.datetime.strftime(value, '%Y-%m-%d') pass #print "OUT: ", value return value @classmethod def mode(self, value): if value == "transfer_fulltime": return "transfer_ft" if value == "ume_ft": return "utme_ft" return value @classmethod def password(self, value): if value == "not set": return "" return value @classmethod def nationality(self, value): if value == "nigeria": return "NG" if value == "niger": return "NE" return value @classmethod def sittype(self, value): if value == "nabtec": return "nabteb" return value @classmethod def company(self, value): if value == "online": return "interswitch" return value @classmethod def p_category(self, value): if value == "acceptance": return "clearance" return value @classmethod def email(self, value): return value.strip() @classmethod def phone(self, value): """ '- asd' -> '--' Dashes and slashes are removed before looking for sequences of numbers. """ if not value: return value = value.replace('-', '') value = value.replace('/', '') match = RE_PHONE.match(value) phone = match.groups()[0] value = '-%s' % phone return value + '#' @classmethod def result(self, value): try: liste = eval(value) except: return if isinstance(liste,list): return [(i[0].lower(),i[1]) for i in liste] return def main(): input_file = '%s' % sys.argv[1] output_file = '%s_edited.csv' % sys.argv[1].split('.')[0] reader = csv.DictReader(open(input_file, 'rb')) writer = None for num, row in enumerate(reader): if num == 0: writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames) print "FIELDS: " for x, y in enumerate(reader.fieldnames): print x, y header = convert_fieldnames(reader.fieldnames) writer.writerow(header) if row.get('reg_state') == 'student_created': continue if row.get('status') == 'started': # We do not use started payments continue for key, value in row.items(): if not key in OPTIONS.keys(): continue conv_name = OPTIONS[key] converter = getattr(Converters, conv_name, None) if converter is None: print "WARNING: cannot find converter %s" % conv_name continue row[key] = converter(row[key]) try: writer.writerow(row) except: print row['student_id'] print "Output written to %s" % output_file if __name__ == '__main__': if len(sys.argv) != 2: print 'Usage: %s ' % __file__ sys.exit(1) main()