## $Id: fix_import_file.py 7666 2012-02-19 08:21:39Z henrik $
## 
## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
## 
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
## 
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
##
"""
Fix exports from old SRP portal to make them importable by current portal.

Usage:

Change into this directory, set the options below (files are assumed
to be in the same directory) and then run

  python fix_import_file.py <filename>

Errors/warnings will be displayed on the shell, the output will be put
into the specified output file.
"""
import csv
import datetime
import re
import sys

##
## CONFIGURATION SECTION
##
# keys are fieldnames in input file, values are methods of class
# Converter (see below)
OPTIONS = {
    'sex': 'gender',
    'birthday': 'date',
    'request_date': 'datetime',
    'marit_stat': 'marit_stat',
    'session': 'session',
    'entry_session': 'session',
    'current_session': 'session',
    'session_id': 'session',
    'entry_mode': 'mode',
    'reg_state': 'reg_state',
    'password': 'password',
    'phone': 'phone',
    'level': 'level',
    'start_level': 'level',
    'end_level': 'level',
    'level_id': 'level',
    'current_level': 'level',
    'semester': 'semester',
    'application_category': 'application_category',
    'lga': 'lga',
    'order_id': 'no_int',
    }

# Mapping input file colnames --> output file colnames
COLNAME_MAPPING = {
    # base data
    'jamb_reg_no': 'reg_number',
    'matric_no': 'matric_number',
    'birthday': 'date_of_birth',
    'clr_ac_pin': 'clr_code',
    # study course
    'study_course': 'certificate',
    # study level
    'session': 'level_session',
    'verdict': 'level_verdict',
    # course ticket
    'level_id': 'level',
    'core_or_elective': 'mandatory',
    # payment ticket
    'order_id': 'p_id',
    'status': 'p_state',
    'category': 'p_category',
    'resp_pay_reference': 'r_pay_reference',
    'resp_desc': 'r_desc',
    'resp_approved_amount': 'r_amount_approved',
    'item': 'p_item',
    'amount': 'amount_auth',
    'resp_card_num': 'r_card_num',
    'resp_code': 'r_code',
    'date': 'creation_date',
    'surcharge': 'surcharge_1',
    'session_id': 'p_session',
    }

# Mapping input regh_state --> output reg_state
REGSTATE_MAPPING = {
    'student_created': 'created',
    'admitted': 'admitted',
    'objection_raised': 'clearance started',
    'clearance_pin_entered': 'clearance started',
    'clearance_requested': 'clearance requested',
    'cleared_and_validated': 'cleared',
    'school_fee_paid': 'school fee paid',
    'returning': 'returning',
    'courses_registered': 'courses registered',
    'courses_validated': 'courses validated',
    }

##
## END OF CONFIG
##

# Look for the first sequence of numbers
RE_PHONE = re.compile('[^\d]*(\d*)[^\d]*')

def convert_fieldnames(fieldnames):
    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
    """
    header = dict([(name, name) for name in fieldnames])
    for in_name, out_name in COLNAME_MAPPING.items():
        if in_name not in header:
            continue
        header[in_name] = out_name
    return header

class Converters():
    """Converters to turn old-style values into new ones.
    """
    @classmethod
    def reg_state(self, value):
        """ 'courses_validated' --> 'courses validated'
        """
        return REGSTATE_MAPPING.get(value,value)

    @classmethod
    def level(self, value):
        """ '000' --> '10'
        """
        try:
            number = int(value)
        except ValueError:
            return 9999
        if number == 0:
            return 10
        return number

    @classmethod
    def semester(self, value):
        """ '0' --> '9'
        """
        try:
            number = int(value)
        except ValueError:
            return 9999
        if number == 0:
            return 9
        return number

    @classmethod
    def application_category(self, value):
        """ '' --> 'no'
        """
        if value == '':
            return 'no'
        return value

    @classmethod
    def lga(self, value):
        """ Remove apostrophe
        """
        if value == 'akwa_ibom_uru_offong_oruko':
            return 'akwa_ibom_urue-offong-oruko'
        try:
            value = value.replace("'","")
            value = value.lower()
        except:
            return ''
        return value


    @classmethod
    def session(self, value):
        """ '08' --> '2008'
        """
        try:
            number = int(value)
        except ValueError:
            #import pdb; pdb.set_trace()
            return 9999
        if number < 14:
            return number + 2000
        elif number in range(2000,2015):
            return number
        else:
            return 9999

    @classmethod
    def marit_stat(self, value):
        """ 'True'/'False' --> 'married'/'unmarried'
        """
        if value in ('True','married'):
            value = 'married'
        elif value in ('False','unmarried'):
            value = 'unmarried'
        else:
            value = ''
        return value

    @classmethod
    def gender(self, value):
        """ 'True'/'False' --> 'f'/'m'
        """
        if value in ('True','f'):
            value = 'f'
        elif value in ('False','m'):
            value = 'm'
        else:
            value = ''
        return value

    @classmethod
    def date(self, value):
        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
        """
        if value == "None":
            value = ""
        elif value == "":
            value = ""
        else:
            value = value.replace('/', '-')
            # We add the hash symbol to avoid automatic date transformation
            # in Excel and Calc for further processing
            value += '#'
        return value

    @classmethod
    def no_int(self, value):
        """ Add hash.
        """
        # We add the hash symbol to avoid automatic number transformation
        # in Excel and Calc for further processing
        value += '#'
        return value


    @classmethod
    def datetime(self, value):
        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
        """
        #print  "IN: ", value
        if value == "None":
            value = ""
        elif value == "":
            value = ""
        else:
            #value = datetime.datetime.strptime(value, '%Y/%m/%d')
            #value = datetime.datetime.strftime(value, '%Y-%m-%d')
            pass
        #print "OUT: ", value
        return value

    @classmethod
    def mode(self, value):
        if value == "transfer_fulltime":
            return "transfer_ft"
        return value

    @classmethod
    def password(self, value):
        if value == "not set":
            return ""
        return value

    @classmethod
    def phone(self, value):
        """ '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'

        Dashes and slashes are removed before looking for sequences
        of numbers.
        """
        value = value.replace('-', '')
        value = value.replace('/', '')
        match = RE_PHONE.match(value)
        phone = match.groups()[0]
        value = '--%s' % phone
        return value


def main():
    input_file = '%s' % sys.argv[1]
    output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
    reader = csv.DictReader(open(input_file, 'rb'))
    writer = None

    for num, row in enumerate(reader):
        if num == 0:
            writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
            print "FIELDS: "
            for x, y in enumerate(reader.fieldnames):
                print x, y
            header = convert_fieldnames(reader.fieldnames)
            writer.writerow(header)
        for key, value in row.items():
            if not key in OPTIONS.keys():
                continue
            conv_name = OPTIONS[key]
            converter = getattr(Converters, conv_name, None)
            if converter is None:
                print "WARNING: cannot find converter %s" % conv_name
                continue
            row[key] = converter(row[key])
        try:
            writer.writerow(row)
        except:
            print row['student_id']

    print "Output written to %s" % output_file


if __name__ == '__main__':
    if len(sys.argv) != 2:
        print 'Usage: %s <filename>' % __file__
        sys.exit(1)
    main()
