source: main/waeup.kofa/trunk/tools/fix_import_file.py @ 11996

Last change on this file since 11996 was 9478, checked in by Henrik Bettermann, 12 years ago

Add converter for amounts and other things.

  • Property svn:keywords set to Id
File size: 15.5 KB
RevLine 
[7512]1## $Id: fix_import_file.py 9478 2012-10-30 20:19:06Z henrik $
2##
[7518]3## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
[7512]4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""
[8781]19Fix exports from old SRP portal and other data sources to make
20them importable by current portal.
[7512]21
22Usage:
23
24Change into this directory, set the options below (files are assumed
25to be in the same directory) and then run
26
[7518]27  python fix_import_file.py <filename>
[7512]28
29Errors/warnings will be displayed on the shell, the output will be put
30into the specified output file.
[8781]31
32
33The lgas.py module must be copied into the same folder where this script
34is started.
[7512]35"""
[7573]36import csv
37import datetime
[9362]38import os
[7575]39import re
[7573]40import sys
[7512]41
[8781]42try:
43    from lgas import LGAS
44except:
45    print 'ERROR: lgas.py is missing.'
46    sys.exit(1)
47
48def strip(string):
49    string = string.replace('_', '')
50    string = string.replace('/', '')
51    string = string.replace('-', '')
52    string = string.replace(' ', '')
53    string = string.lower()
54    return string
55
56LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
57LGAS_dict = dict(LGAS)
58
[7512]59##
60## CONFIGURATION SECTION
61##
62# keys are fieldnames in input file, values are methods of class
63# Converter (see below)
64OPTIONS = {
[8478]65    'student_id': 'student_id',
[7512]66    'sex': 'gender',
67    'birthday': 'date',
[7514]68    'marit_stat': 'marit_stat',
[7537]69    'session': 'session',
[7514]70    'entry_session': 'session',
71    'current_session': 'session',
[7602]72    'session_id': 'session',
[7632]73    'entry_mode': 'mode',
[7526]74    'reg_state': 'reg_state',
75    'password': 'password',
[7575]76    'phone': 'phone',
[8478]77    'nationality': 'nationality',
[7602]78    'level': 'level',
79    'start_level': 'level',
80    'end_level': 'level',
81    'level_id': 'level',
82    'current_level': 'level',
83    'semester': 'semester',
84    'application_category': 'application_category',
[7610]85    'lga': 'lga',
[7628]86    'order_id': 'no_int',
[8938]87    'uniben': 'former',
88    'nysc_year': 'year',
89    'alr_date': 'date',
90    'fst_sit_date': 'date',
91    'scd_sit_date': 'date',
92    'emp_start': 'date',
93    'emp_end': 'date',
94    'emp_start2': 'date',
95    'emp_end2': 'date',
96    'fst_sit_results': 'result',
97    'scd_sit_results': 'result',
98    'alr_results': 'result',
[8939]99    'email': 'email',
100    'fst_sit_type': 'sittype',
101    'scd_sit_type': 'sittype',
[8941]102    'resp_pay_reference': 'no_int',
103    'type': 'company',
104    'date': 'date',
[9194]105    'core_or_elective': 'bool',
[8945]106    'category': 'p_category',
[9007]107    'reg_transition': 'reg_state',  # we completely change this column,
108                                    # since reg_state import is usually intended
109    'transition': 'reg_transition',
[9158]110    'payment_date': 'date',
[9160]111    'validation_date': 'date',
[9478]112    'resp_approved_amount': 'amount',
113    'amount': 'amount',
114    'amount_auth': 'amount',
115    'surcharge': 'amount'
[7512]116    }
[7516]117
118# Mapping input file colnames --> output file colnames
119COLNAME_MAPPING = {
[7666]120    # base data
[9009]121    'id': 'student_id',
[8478]122    'reg_state': 'state',
[9007]123    'reg_transition': 'state',
[7518]124    'jamb_reg_no': 'reg_number',
[7645]125    'matric_no': 'matric_number',
[7516]126    'birthday': 'date_of_birth',
[7526]127    'clr_ac_pin': 'clr_code',
[8941]128    # clearance
129    'hq_grade': 'hq_degree',
130    'uniben': 'former_matric',
131    'hq_type2': 'hq2_type',
132    'hq_grade2': 'hq2_degree',
133    'hq_school2': 'hq2_school',
134    'hq_matric_no2': 'hq2_matric_no',
135    'hq_session2': 'hq2_session',
136    'hq_disc2': 'hq2_disc',
137    'emp': 'employer',
138    'emp2': 'employer2',
139    'emp_position2': 'emp2_position',
140    'emp_start2': 'emp2_start',
141    'emp_end2': 'emp2_end',
142    'emp_reason2': 'emp2_reason',
[7550]143    # study course
[7530]144    'study_course': 'certificate',
[7550]145    # study level
[7537]146    'session': 'level_session',
147    'verdict': 'level_verdict',
[7550]148    # course ticket
149    'level_id': 'level',
[7666]150    'core_or_elective': 'mandatory',
[7628]151    # payment ticket
152    'order_id': 'p_id',
153    'status': 'p_state',
154    'category': 'p_category',
155    'resp_pay_reference': 'r_pay_reference',
156    'resp_desc': 'r_desc',
157    'resp_approved_amount': 'r_amount_approved',
158    'item': 'p_item',
159    'amount': 'amount_auth',
160    'resp_card_num': 'r_card_num',
161    'resp_code': 'r_code',
162    'date': 'creation_date',
163    'surcharge': 'surcharge_1',
164    'session_id': 'p_session',
[8941]165    'type': 'r_company',
[9362]166    'old_id': 'old_id',
[7516]167    }
[7526]168
[8478]169# Mapping input state --> output state
[7526]170REGSTATE_MAPPING = {
171    'student_created': 'created',
172    'admitted': 'admitted',
[7610]173    'objection_raised': 'clearance started',
[7526]174    'clearance_pin_entered': 'clearance started',
175    'clearance_requested': 'clearance requested',
176    'cleared_and_validated': 'cleared',
177    'school_fee_paid': 'school fee paid',
178    'returning': 'returning',
179    'courses_registered': 'courses registered',
180    'courses_validated': 'courses validated',
[9007]181    'admit': 'admitted',
182    'return': 'returning'
[7526]183    }
184
[9362]185# Mapping of special cases, where new id is not deductible from old id
186# Set to `None`, if no such special cases should be considered.
187ID_MAP_CSV = None
188ID_MAP_CSV = "id_mapping.csv"
189
[7512]190##
191## END OF CONFIG
192##
193
[7575]194# Look for the first sequence of numbers
195RE_PHONE = re.compile('[^\d]*(\d*)[^\d]*')
196
[9362]197def get_id_mapping():
198    """Returns a dict mapping from old (SRP) ids to new ids.
199
200    The dict is read from ID_MAP_CSV file. If this var is set to
201    ``None`` an empty dict is returned. The ID_MAP_CSV contains only
202    the student ids of those students, for which the standard method
203    (new_id=CHAR+old_id) does not work.
204    """
205    if ID_MAP_CSV is None:
206        return {}
207    if not os.path.isfile(ID_MAP_CSV):
208        raise IOError(
209            "No such file for mapping old to new ids: %s" % ID_MAP_CSV)
210    result = dict()
211    reader = csv.DictReader(open(ID_MAP_CSV, 'rb'))
212    for row in reader:
213        result[row['student_id']] = row['new_id']
214    return result
215
216
[7516]217def convert_fieldnames(fieldnames):
218    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
219    """
[9009]220    # Remove whitespaces
[9019]221    header = dict([(name, name.strip()) for name in fieldnames])
[7516]222    for in_name, out_name in COLNAME_MAPPING.items():
[9019]223        if in_name not in header.values():
[7516]224            continue
[9019]225        # Inverse dictionary lookup
226        key = [key for key,value in header.items() if value==in_name][0]
227        header[key] = out_name
[7514]228    return header
229
[7512]230class Converters():
231    """Converters to turn old-style values into new ones.
232    """
[8478]233
[9362]234    old_new_id_map = get_id_mapping()
235
[7512]236    @classmethod
[9362]237    def student_id(cls, value, row):
[8478]238        """ 'A123456' --> 'EA123456'
239        """
[9362]240        value = cls.old_new_id_map.get(value, value)
[8478]241        if len(value) == 7:
[9478]242            return 'B' + value
[8478]243        return value
244
245    @classmethod
[9009]246    def reg_state(self, value, row):
[7526]247        """ 'courses_validated' --> 'courses validated'
248        """
249        return REGSTATE_MAPPING.get(value,value)
250
251    @classmethod
[9009]252    def reg_transition(self, value, row):
[9007]253        if value == "admitted":
254            return "admit"
255        if value == "returning":
256            return "return"
257        return value
258
259    @classmethod
[9009]260    def level(self, value, row):
[7602]261        """ '000' --> '10'
[9009]262        '800' --> '999' if pg student
[7602]263        """
264        try:
265            number = int(value)
266        except ValueError:
267            return 9999
268        if number == 0:
269            return 10
[9158]270        if row.get('entry_mode') and row.get('entry_mode').startswith('pg'):
[9009]271            return 999
[7602]272        return number
273
274    @classmethod
[9009]275    def semester(self, value, row):
[7602]276        """ '0' --> '9'
277        """
278        try:
279            number = int(value)
280        except ValueError:
281            return 9999
282        if number == 0:
283            return 9
284        return number
285
286    @classmethod
[9009]287    def application_category(self, value, row):
[7602]288        """ '' --> 'no'
289        """
290        if value == '':
291            return 'no'
292        return value
293
[7610]294    @classmethod
[9009]295    def lga(self, value, row):
[7610]296        """ Remove apostrophe
297        """
298        if value == 'akwa_ibom_uru_offong_oruko':
299            return 'akwa_ibom_urue-offong-oruko'
[8939]300        if value == 'edo_ohionmwon':
301            return 'edo_orhionmwon'
302
303        if value == 'nassarawa_nassarawa':
304            return 'nassarawa_nassawara'
305
306        if value == 'kogi_mopa-muro-mopi':
307            return 'kogi_mopa-muro'
308
309        if value == 'delta_osimili-north':
310            return 'delta_oshielli-north'
311
312        if value == 'delta_osimili':
313            return 'delta_oshimili'
314
315        if value == 'delta_osimili-south':
316            return 'delta_oshimili-south'
[7610]317        try:
318            value = value.replace("'","")
319        except:
320            return ''
[8784]321        lower = value.lower()
322        if lower in LGAS_dict.keys():
323            return lower
[8781]324        # If real names are given, let's see if a similar value
325        # in LGAS exist.
[9478]326        value = value.replace('_',' ')
[8784]327        value = LGAS_inverted_stripped.get(strip(lower), value)
[7610]328        return value
[7602]329
[7610]330
[7602]331    @classmethod
[9009]332    def session(self, value, row):
[7514]333        """ '08' --> '2008'
[8478]334        '2008/2009' --> '2008'
[7514]335        """
[8478]336        if '/' in value:
337            numbers = value.split('/')
338            number = int(numbers[0])
339            if number in range(2000,2015):
340                return number
341            else:
342                return 9999
[7514]343        try:
344            number = int(value)
345        except ValueError:
346            return 9999
347        if number < 14:
348            return number + 2000
349        elif number in range(2000,2015):
350            return number
351        else:
352            return 9999
353
354    @classmethod
[9009]355    def former(self, value, row):
[8938]356        """ True --> yes
357        '2008/2009' --> '2008'
358        """
359        if value == 'True':
360            return 'yes'
361        return
362
363    @classmethod
[9194]364    def bool(self, value, row):
365        """ True --> 1
366        """
367        if value in ('TRUE', 'True'):
368            return '1'
369        elif value in ('FALSE', 'False'):
370            return '0'
371        return
372
373    @classmethod
[9009]374    def year(self, value, row):
[8938]375        """ '0' --> ''
376        """
377        if value == '0':
378            return
379        if value == 'None':
380            return
381        return value
382
383
384    @classmethod
[9009]385    def marit_stat(self, value, row):
[7514]386        """ 'True'/'False' --> 'married'/'unmarried'
387        """
[7610]388        if value in ('True','married'):
[7514]389            value = 'married'
[7610]390        elif value in ('False','unmarried'):
[7514]391            value = 'unmarried'
392        else:
393            value = ''
394        return value
395
396    @classmethod
[9009]397    def gender(self, value, row):
[7526]398        """ 'True'/'False' --> 'f'/'m'
[7514]399        """
[8785]400        if value.strip() in ('F', 'True','f'):
[7526]401            value = 'f'
[8785]402        elif value.strip() in ('M', 'False','m'):
[7526]403            value = 'm'
[7514]404        else:
405            value = ''
406        return value
407
408    @classmethod
[9009]409    def date(self, value, row):
[7512]410        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
411        """
412        if value == "None":
413            value = ""
414        elif value == "":
415            value = ""
416        else:
417            value = value.replace('/', '-')
[7514]418            # We add the hash symbol to avoid automatic date transformation
419            # in Excel and Calc for further processing
420            value += '#'
[7512]421        return value
422
423    @classmethod
[9009]424    def no_int(self, value, row):
[8941]425        """ Add hash and skip numbers starting with 999999
[7628]426        """
427        # We add the hash symbol to avoid automatic number transformation
428        # in Excel and Calc for further processing
[8941]429        try:
430            intvalue = int(value)
431            value += '#'
432        except:
433            pass
434        if value.startswith('999999'):
435            return
[7628]436        return value
437
438    @classmethod
[9478]439    def amount(self, value, row):
440        """ Amounts must be integers
441        """
442        try:
443            return float(value)
444        except:
445            return
446
447    @classmethod
[9009]448    def mode(self, value, row):
[7632]449        if value == "transfer_fulltime":
450            return "transfer_ft"
[8522]451        if value == "ume_ft":
452            return "utme_ft"
[7632]453        return value
454
455    @classmethod
[9009]456    def password(self, value, row):
[7526]457        if value == "not set":
458            return ""
459        return value
460
[7575]461    @classmethod
[9009]462    def nationality(self, value, row):
[9270]463        if value in ('nigeria', 'Nigeria'):
[8478]464            return "NG"
[9270]465        if value in ('niger', 'Niger'):
[8939]466            return "NE"
[8478]467        return value
468
469    @classmethod
[9009]470    def sittype(self, value, row):
[8939]471        if value == "nabtec":
472            return "nabteb"
473        return value
474
475    @classmethod
[9009]476    def company(self, value, row):
[8941]477        if value == "online":
478            return "interswitch"
479        return value
480
481    @classmethod
[9009]482    def p_category(self, value, row):
[8941]483        if value == "acceptance":
484            return "clearance"
485        return value
486
487    @classmethod
[9009]488    def email(self, value, row):
[8939]489        return value.strip()
490
491    @classmethod
[9009]492    def phone(self, value, row):
[7575]493        """ '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
[7526]494
[7575]495        Dashes and slashes are removed before looking for sequences
496        of numbers.
497        """
[8938]498        if not value:
499            return
[9478]500        if len(value) < 5:
501            return
[9007]502        value = value.strip('#')
[7575]503        value = value.replace('-', '')
504        value = value.replace('/', '')
505        match = RE_PHONE.match(value)
506        phone = match.groups()[0]
[9007]507        if value.startswith('234'):
508            value = '+' + value[:3] + '-' + value[3:]
[9478]509        elif value.startswith('+234'):
510            value = value[:4] + '-' + value[4:]
[9007]511        else:
512            value = '-%s' % phone
[8938]513        return value + '#'
[7575]514
[8938]515    @classmethod
[9009]516    def result(self, value, row):
[8938]517        try:
518            liste = eval(value)
519        except:
520            return
521        if isinstance(liste,list):
522            return [(i[0].lower(),i[1]) for i in liste]
523        return
[7575]524
[9007]525
[7572]526def main():
[7573]527    input_file = '%s' % sys.argv[1]
528    output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
529    reader = csv.DictReader(open(input_file, 'rb'))
[7572]530    writer = None
[7512]531
[7572]532    for num, row in enumerate(reader):
533        if num == 0:
[7573]534            writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
[7572]535            print "FIELDS: "
536            for x, y in enumerate(reader.fieldnames):
537                print x, y
538            header = convert_fieldnames(reader.fieldnames)
539            writer.writerow(header)
[9478]540        #if row.get('reg_state') == 'student_created':
541        #    # We do not reimport student records which have never been accessed.
542        #    continue
[8941]543        if row.get('status') == 'started':
[9009]544            # We do not reimport started payments.
[8941]545            continue
[7572]546        for key, value in row.items():
[9009]547            # Remove unwanted whitespaces.
548            row[key] = row[key].strip()
[7572]549            if not key in OPTIONS.keys():
550                continue
551            conv_name = OPTIONS[key]
552            converter = getattr(Converters, conv_name, None)
553            if converter is None:
554                print "WARNING: cannot find converter %s" % conv_name
555                continue
[9009]556            row[key] = converter(row[key], row)
[7602]557        try:
558            writer.writerow(row)
559        except:
560            print row['student_id']
[7512]561
[7573]562    print "Output written to %s" % output_file
[7572]563
564
565if __name__ == '__main__':
566    if len(sys.argv) != 2:
567        print 'Usage: %s <filename>' % __file__
568        sys.exit(1)
569    main()
Note: See TracBrowser for help on using the repository browser.