source: main/waeup.kofa/trunk/tools/fix_import_file.py @ 9437

Last change on this file since 9437 was 9362, checked in by uli, 12 years ago

Update tools to allow new ids in imports. Support for new import col 'old_id'.

  • Property svn:keywords set to Id
File size: 15.1 KB
RevLine 
[7512]1## $Id: fix_import_file.py 9362 2012-10-19 22:59:41Z uli $
2##
[7518]3## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
[7512]4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""
[8781]19Fix exports from old SRP portal and other data sources to make
20them importable by current portal.
[7512]21
22Usage:
23
24Change into this directory, set the options below (files are assumed
25to be in the same directory) and then run
26
[7518]27  python fix_import_file.py <filename>
[7512]28
29Errors/warnings will be displayed on the shell, the output will be put
30into the specified output file.
[8781]31
32
33The lgas.py module must be copied into the same folder where this script
34is started.
[7512]35"""
[7573]36import csv
37import datetime
[9362]38import os
[7575]39import re
[7573]40import sys
[7512]41
[8781]42try:
43    from lgas import LGAS
44except:
45    print 'ERROR: lgas.py is missing.'
46    sys.exit(1)
47
48def strip(string):
49    string = string.replace('_', '')
50    string = string.replace('/', '')
51    string = string.replace('-', '')
52    string = string.replace(' ', '')
53    string = string.lower()
54    return string
55
56LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
57LGAS_dict = dict(LGAS)
58
[7512]59##
60## CONFIGURATION SECTION
61##
62# keys are fieldnames in input file, values are methods of class
63# Converter (see below)
64OPTIONS = {
[8478]65    'student_id': 'student_id',
[7512]66    'sex': 'gender',
67    'birthday': 'date',
[7514]68    'marit_stat': 'marit_stat',
[7537]69    'session': 'session',
[7514]70    'entry_session': 'session',
71    'current_session': 'session',
[7602]72    'session_id': 'session',
[7632]73    'entry_mode': 'mode',
[7526]74    'reg_state': 'reg_state',
75    'password': 'password',
[7575]76    'phone': 'phone',
[8478]77    'nationality': 'nationality',
[7602]78    'level': 'level',
79    'start_level': 'level',
80    'end_level': 'level',
81    'level_id': 'level',
82    'current_level': 'level',
83    'semester': 'semester',
84    'application_category': 'application_category',
[7610]85    'lga': 'lga',
[7628]86    'order_id': 'no_int',
[8938]87    'uniben': 'former',
88    'nysc_year': 'year',
89    'alr_date': 'date',
90    'fst_sit_date': 'date',
91    'scd_sit_date': 'date',
92    'emp_start': 'date',
93    'emp_end': 'date',
94    'emp_start2': 'date',
95    'emp_end2': 'date',
96    'fst_sit_results': 'result',
97    'scd_sit_results': 'result',
98    'alr_results': 'result',
[8939]99    'email': 'email',
100    'fst_sit_type': 'sittype',
101    'scd_sit_type': 'sittype',
[8941]102    'resp_pay_reference': 'no_int',
103    'type': 'company',
104    'date': 'date',
[9194]105    'core_or_elective': 'bool',
[8945]106    'category': 'p_category',
[9007]107    'reg_transition': 'reg_state',  # we completely change this column,
108                                    # since reg_state import is usually intended
109    'transition': 'reg_transition',
[9158]110    'payment_date': 'date',
[9160]111    'validation_date': 'date',
[7512]112    }
[7516]113
114# Mapping input file colnames --> output file colnames
115COLNAME_MAPPING = {
[7666]116    # base data
[9009]117    'id': 'student_id',
[8478]118    'reg_state': 'state',
[9007]119    'reg_transition': 'state',
[7518]120    'jamb_reg_no': 'reg_number',
[7645]121    'matric_no': 'matric_number',
[7516]122    'birthday': 'date_of_birth',
[7526]123    'clr_ac_pin': 'clr_code',
[8941]124    # clearance
125    'hq_grade': 'hq_degree',
126    'uniben': 'former_matric',
127    'hq_type2': 'hq2_type',
128    'hq_grade2': 'hq2_degree',
129    'hq_school2': 'hq2_school',
130    'hq_matric_no2': 'hq2_matric_no',
131    'hq_session2': 'hq2_session',
132    'hq_disc2': 'hq2_disc',
133    'emp': 'employer',
134    'emp2': 'employer2',
135    'emp_position2': 'emp2_position',
136    'emp_start2': 'emp2_start',
137    'emp_end2': 'emp2_end',
138    'emp_reason2': 'emp2_reason',
[7550]139    # study course
[7530]140    'study_course': 'certificate',
[7550]141    # study level
[7537]142    'session': 'level_session',
143    'verdict': 'level_verdict',
[7550]144    # course ticket
145    'level_id': 'level',
[7666]146    'core_or_elective': 'mandatory',
[7628]147    # payment ticket
148    'order_id': 'p_id',
149    'status': 'p_state',
150    'category': 'p_category',
151    'resp_pay_reference': 'r_pay_reference',
152    'resp_desc': 'r_desc',
153    'resp_approved_amount': 'r_amount_approved',
154    'item': 'p_item',
155    'amount': 'amount_auth',
156    'resp_card_num': 'r_card_num',
157    'resp_code': 'r_code',
158    'date': 'creation_date',
159    'surcharge': 'surcharge_1',
160    'session_id': 'p_session',
[8941]161    'type': 'r_company',
[9362]162    'old_id': 'old_id',
[7516]163    }
[7526]164
[8478]165# Mapping input state --> output state
[7526]166REGSTATE_MAPPING = {
167    'student_created': 'created',
168    'admitted': 'admitted',
[7610]169    'objection_raised': 'clearance started',
[7526]170    'clearance_pin_entered': 'clearance started',
171    'clearance_requested': 'clearance requested',
172    'cleared_and_validated': 'cleared',
173    'school_fee_paid': 'school fee paid',
174    'returning': 'returning',
175    'courses_registered': 'courses registered',
176    'courses_validated': 'courses validated',
[9007]177    'admit': 'admitted',
178    'return': 'returning'
[7526]179    }
180
[9362]181# Mapping of special cases, where new id is not deductible from old id
182# Set to `None`, if no such special cases should be considered.
183ID_MAP_CSV = None
184ID_MAP_CSV = "id_mapping.csv"
185
[7512]186##
187## END OF CONFIG
188##
189
[7575]190# Look for the first sequence of numbers
191RE_PHONE = re.compile('[^\d]*(\d*)[^\d]*')
192
[9362]193def get_id_mapping():
194    """Returns a dict mapping from old (SRP) ids to new ids.
195
196    The dict is read from ID_MAP_CSV file. If this var is set to
197    ``None`` an empty dict is returned. The ID_MAP_CSV contains only
198    the student ids of those students, for which the standard method
199    (new_id=CHAR+old_id) does not work.
200    """
201    if ID_MAP_CSV is None:
202        return {}
203    if not os.path.isfile(ID_MAP_CSV):
204        raise IOError(
205            "No such file for mapping old to new ids: %s" % ID_MAP_CSV)
206    result = dict()
207    reader = csv.DictReader(open(ID_MAP_CSV, 'rb'))
208    for row in reader:
209        result[row['student_id']] = row['new_id']
210    return result
211
212
[7516]213def convert_fieldnames(fieldnames):
214    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
215    """
[9009]216    # Remove whitespaces
[9019]217    header = dict([(name, name.strip()) for name in fieldnames])
[7516]218    for in_name, out_name in COLNAME_MAPPING.items():
[9019]219        if in_name not in header.values():
[7516]220            continue
[9019]221        # Inverse dictionary lookup
222        key = [key for key,value in header.items() if value==in_name][0]
223        header[key] = out_name
[7514]224    return header
225
[7512]226class Converters():
227    """Converters to turn old-style values into new ones.
228    """
[8478]229
[9362]230    old_new_id_map = get_id_mapping()
231
[7512]232    @classmethod
[9362]233    def student_id(cls, value, row):
[8478]234        """ 'A123456' --> 'EA123456'
235        """
[9362]236        value = cls.old_new_id_map.get(value, value)
[8478]237        if len(value) == 7:
[9270]238            return 'M' + value
[8478]239        return value
240
241    @classmethod
[9009]242    def reg_state(self, value, row):
[7526]243        """ 'courses_validated' --> 'courses validated'
244        """
245        return REGSTATE_MAPPING.get(value,value)
246
247    @classmethod
[9009]248    def reg_transition(self, value, row):
[9007]249        if value == "admitted":
250            return "admit"
251        if value == "returning":
252            return "return"
253        return value
254
255    @classmethod
[9009]256    def level(self, value, row):
[7602]257        """ '000' --> '10'
[9009]258        '800' --> '999' if pg student
[7602]259        """
260        try:
261            number = int(value)
262        except ValueError:
263            return 9999
264        if number == 0:
265            return 10
[9158]266        if row.get('entry_mode') and row.get('entry_mode').startswith('pg'):
[9009]267            return 999
[7602]268        return number
269
270    @classmethod
[9009]271    def semester(self, value, row):
[7602]272        """ '0' --> '9'
273        """
274        try:
275            number = int(value)
276        except ValueError:
277            return 9999
278        if number == 0:
279            return 9
280        return number
281
282    @classmethod
[9009]283    def application_category(self, value, row):
[7602]284        """ '' --> 'no'
285        """
286        if value == '':
287            return 'no'
288        return value
289
[7610]290    @classmethod
[9009]291    def lga(self, value, row):
[7610]292        """ Remove apostrophe
293        """
294        if value == 'akwa_ibom_uru_offong_oruko':
295            return 'akwa_ibom_urue-offong-oruko'
[8939]296        if value == 'edo_ohionmwon':
297            return 'edo_orhionmwon'
298
299        if value == 'nassarawa_nassarawa':
300            return 'nassarawa_nassawara'
301
302        if value == 'kogi_mopa-muro-mopi':
303            return 'kogi_mopa-muro'
304
305        if value == 'delta_osimili-north':
306            return 'delta_oshielli-north'
307
308        if value == 'delta_osimili':
309            return 'delta_oshimili'
310
311        if value == 'delta_osimili-south':
312            return 'delta_oshimili-south'
[7610]313        try:
314            value = value.replace("'","")
315        except:
316            return ''
[8784]317        lower = value.lower()
318        if lower in LGAS_dict.keys():
319            return lower
[8781]320        # If real names are given, let's see if a similar value
321        # in LGAS exist.
[8784]322        value = LGAS_inverted_stripped.get(strip(lower), value)
[7610]323        return value
[7602]324
[7610]325
[7602]326    @classmethod
[9009]327    def session(self, value, row):
[7514]328        """ '08' --> '2008'
[8478]329        '2008/2009' --> '2008'
[7514]330        """
[8478]331        if '/' in value:
332            numbers = value.split('/')
333            number = int(numbers[0])
334            if number in range(2000,2015):
335                return number
336            else:
337                return 9999
[7514]338        try:
339            number = int(value)
340        except ValueError:
341            return 9999
342        if number < 14:
343            return number + 2000
344        elif number in range(2000,2015):
345            return number
346        else:
347            return 9999
348
349    @classmethod
[9009]350    def former(self, value, row):
[8938]351        """ True --> yes
352        '2008/2009' --> '2008'
353        """
354        if value == 'True':
355            return 'yes'
356        return
357
358    @classmethod
[9194]359    def bool(self, value, row):
360        """ True --> 1
361        """
362        if value in ('TRUE', 'True'):
363            return '1'
364        elif value in ('FALSE', 'False'):
365            return '0'
366        return
367
368    @classmethod
[9009]369    def year(self, value, row):
[8938]370        """ '0' --> ''
371        """
372        if value == '0':
373            return
374        if value == 'None':
375            return
376        return value
377
378
379    @classmethod
[9009]380    def marit_stat(self, value, row):
[7514]381        """ 'True'/'False' --> 'married'/'unmarried'
382        """
[7610]383        if value in ('True','married'):
[7514]384            value = 'married'
[7610]385        elif value in ('False','unmarried'):
[7514]386            value = 'unmarried'
387        else:
388            value = ''
389        return value
390
391    @classmethod
[9009]392    def gender(self, value, row):
[7526]393        """ 'True'/'False' --> 'f'/'m'
[7514]394        """
[8785]395        if value.strip() in ('F', 'True','f'):
[7526]396            value = 'f'
[8785]397        elif value.strip() in ('M', 'False','m'):
[7526]398            value = 'm'
[7514]399        else:
400            value = ''
401        return value
402
403    @classmethod
[9009]404    def date(self, value, row):
[7512]405        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
406        """
407        if value == "None":
408            value = ""
409        elif value == "":
410            value = ""
411        else:
412            value = value.replace('/', '-')
[7514]413            # We add the hash symbol to avoid automatic date transformation
414            # in Excel and Calc for further processing
415            value += '#'
[7512]416        return value
417
418    @classmethod
[9009]419    def no_int(self, value, row):
[8941]420        """ Add hash and skip numbers starting with 999999
[7628]421        """
422        # We add the hash symbol to avoid automatic number transformation
423        # in Excel and Calc for further processing
[8941]424        try:
425            intvalue = int(value)
426            value += '#'
427        except:
428            pass
429        if value.startswith('999999'):
430            return
[7628]431        return value
432
433    @classmethod
[9009]434    def mode(self, value, row):
[7632]435        if value == "transfer_fulltime":
436            return "transfer_ft"
[8522]437        if value == "ume_ft":
438            return "utme_ft"
[7632]439        return value
440
441    @classmethod
[9009]442    def password(self, value, row):
[7526]443        if value == "not set":
444            return ""
445        return value
446
[7575]447    @classmethod
[9009]448    def nationality(self, value, row):
[9270]449        if value in ('nigeria', 'Nigeria'):
[8478]450            return "NG"
[9270]451        if value in ('niger', 'Niger'):
[8939]452            return "NE"
[8478]453        return value
454
455    @classmethod
[9009]456    def sittype(self, value, row):
[8939]457        if value == "nabtec":
458            return "nabteb"
459        return value
460
461    @classmethod
[9009]462    def company(self, value, row):
[8941]463        if value == "online":
464            return "interswitch"
465        return value
466
467    @classmethod
[9009]468    def p_category(self, value, row):
[8941]469        if value == "acceptance":
470            return "clearance"
471        return value
472
473    @classmethod
[9009]474    def email(self, value, row):
[8939]475        return value.strip()
476
477    @classmethod
[9009]478    def phone(self, value, row):
[7575]479        """ '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
[7526]480
[7575]481        Dashes and slashes are removed before looking for sequences
482        of numbers.
483        """
[8938]484        if not value:
485            return
[9007]486        value = value.strip('#')
[7575]487        value = value.replace('-', '')
488        value = value.replace('/', '')
489        match = RE_PHONE.match(value)
490        phone = match.groups()[0]
[9007]491        if value.startswith('234'):
492            value = '+' + value[:3] + '-' + value[3:]
493        else:
494            value = '-%s' % phone
[8938]495        return value + '#'
[7575]496
[8938]497    @classmethod
[9009]498    def result(self, value, row):
[8938]499        try:
500            liste = eval(value)
501        except:
502            return
503        if isinstance(liste,list):
504            return [(i[0].lower(),i[1]) for i in liste]
505        return
[7575]506
[9007]507
[7572]508def main():
[7573]509    input_file = '%s' % sys.argv[1]
510    output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
511    reader = csv.DictReader(open(input_file, 'rb'))
[7572]512    writer = None
[7512]513
[7572]514    for num, row in enumerate(reader):
515        if num == 0:
[7573]516            writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
[7572]517            print "FIELDS: "
518            for x, y in enumerate(reader.fieldnames):
519                print x, y
520            header = convert_fieldnames(reader.fieldnames)
521            writer.writerow(header)
[8939]522        if row.get('reg_state') == 'student_created':
[9009]523            # We do not reimport student records which have never been accessed.
[8939]524            continue
[8941]525        if row.get('status') == 'started':
[9009]526            # We do not reimport started payments.
[8941]527            continue
[7572]528        for key, value in row.items():
[9009]529            # Remove unwanted whitespaces.
530            row[key] = row[key].strip()
[7572]531            if not key in OPTIONS.keys():
532                continue
533            conv_name = OPTIONS[key]
534            converter = getattr(Converters, conv_name, None)
535            if converter is None:
536                print "WARNING: cannot find converter %s" % conv_name
537                continue
[9009]538            row[key] = converter(row[key], row)
[7602]539        try:
540            writer.writerow(row)
541        except:
542            print row['student_id']
[7512]543
[7573]544    print "Output written to %s" % output_file
[7572]545
546
547if __name__ == '__main__':
548    if len(sys.argv) != 2:
549        print 'Usage: %s <filename>' % __file__
550        sys.exit(1)
551    main()
Note: See TracBrowser for help on using the repository browser.