source: main/waeup.kofa/trunk/tools/fix_import_file.py @ 9007

Last change on this file since 9007 was 9007, checked in by Henrik Bettermann, 13 years ago

Extend import file repair script.

Add shell script for downloading export files.

  • Property svn:keywords set to Id
File size: 13.2 KB
RevLine 
[7512]1## $Id: fix_import_file.py 9007 2012-07-17 04:36:27Z henrik $
2##
[7518]3## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
[7512]4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""
[8781]19Fix exports from old SRP portal and other data sources to make
20them importable by current portal.
[7512]21
22Usage:
23
24Change into this directory, set the options below (files are assumed
25to be in the same directory) and then run
26
[7518]27  python fix_import_file.py <filename>
[7512]28
29Errors/warnings will be displayed on the shell, the output will be put
30into the specified output file.
[8781]31
32
33The lgas.py module must be copied into the same folder where this script
34is started.
[7512]35"""
[7573]36import csv
37import datetime
[7575]38import re
[7573]39import sys
[7512]40
[8781]41try:
42    from lgas import LGAS
43except:
44    print 'ERROR: lgas.py is missing.'
45    sys.exit(1)
46
47def strip(string):
48    string = string.replace('_', '')
49    string = string.replace('/', '')
50    string = string.replace('-', '')
51    string = string.replace(' ', '')
52    string = string.lower()
53    return string
54
55LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
56LGAS_dict = dict(LGAS)
57
[7512]58##
59## CONFIGURATION SECTION
60##
61# keys are fieldnames in input file, values are methods of class
62# Converter (see below)
63OPTIONS = {
[8478]64    'student_id': 'student_id',
[7512]65    'sex': 'gender',
66    'birthday': 'date',
[7514]67    'marit_stat': 'marit_stat',
[7537]68    'session': 'session',
[7514]69    'entry_session': 'session',
70    'current_session': 'session',
[7602]71    'session_id': 'session',
[7632]72    'entry_mode': 'mode',
[7526]73    'reg_state': 'reg_state',
74    'password': 'password',
[7575]75    'phone': 'phone',
[8478]76    'nationality': 'nationality',
[7602]77    'level': 'level',
78    'start_level': 'level',
79    'end_level': 'level',
80    'level_id': 'level',
81    'current_level': 'level',
82    'semester': 'semester',
83    'application_category': 'application_category',
[7610]84    'lga': 'lga',
[7628]85    'order_id': 'no_int',
[8938]86    'uniben': 'former',
87    'nysc_year': 'year',
88    'alr_date': 'date',
89    'fst_sit_date': 'date',
90    'scd_sit_date': 'date',
91    'emp_start': 'date',
92    'emp_end': 'date',
93    'emp_start2': 'date',
94    'emp_end2': 'date',
95    'fst_sit_results': 'result',
96    'scd_sit_results': 'result',
97    'alr_results': 'result',
[8939]98    'email': 'email',
99    'fst_sit_type': 'sittype',
100    'scd_sit_type': 'sittype',
[8941]101    'resp_pay_reference': 'no_int',
102    'type': 'company',
103    'date': 'date',
[8945]104    'category': 'p_category',
[9007]105    'reg_transition': 'reg_state',  # we completely change this column,
106                                    # since reg_state import is usually intended
107    'transition': 'reg_transition',
[7512]108    }
[7516]109
110# Mapping input file colnames --> output file colnames
111COLNAME_MAPPING = {
[7666]112    # base data
[8478]113    'reg_state': 'state',
[9007]114    'reg_transition': 'state',
[7518]115    'jamb_reg_no': 'reg_number',
[7645]116    'matric_no': 'matric_number',
[7516]117    'birthday': 'date_of_birth',
[7526]118    'clr_ac_pin': 'clr_code',
[8941]119    # clearance
120    'hq_grade': 'hq_degree',
121    'uniben': 'former_matric',
122    'hq_type2': 'hq2_type',
123    'hq_grade2': 'hq2_degree',
124    'hq_school2': 'hq2_school',
125    'hq_matric_no2': 'hq2_matric_no',
126    'hq_session2': 'hq2_session',
127    'hq_disc2': 'hq2_disc',
128    'emp': 'employer',
129    'emp2': 'employer2',
130    'emp_position2': 'emp2_position',
131    'emp_start2': 'emp2_start',
132    'emp_end2': 'emp2_end',
133    'emp_reason2': 'emp2_reason',
[7550]134    # study course
[7530]135    'study_course': 'certificate',
[7550]136    # study level
[7537]137    'session': 'level_session',
138    'verdict': 'level_verdict',
[7550]139    # course ticket
140    'level_id': 'level',
[7666]141    'core_or_elective': 'mandatory',
[7628]142    # payment ticket
143    'order_id': 'p_id',
144    'status': 'p_state',
145    'category': 'p_category',
146    'resp_pay_reference': 'r_pay_reference',
147    'resp_desc': 'r_desc',
148    'resp_approved_amount': 'r_amount_approved',
149    'item': 'p_item',
150    'amount': 'amount_auth',
151    'resp_card_num': 'r_card_num',
152    'resp_code': 'r_code',
153    'date': 'creation_date',
154    'surcharge': 'surcharge_1',
155    'session_id': 'p_session',
[8941]156    'type': 'r_company',
[7516]157    }
[7526]158
[8478]159# Mapping input state --> output state
[7526]160REGSTATE_MAPPING = {
161    'student_created': 'created',
162    'admitted': 'admitted',
[7610]163    'objection_raised': 'clearance started',
[7526]164    'clearance_pin_entered': 'clearance started',
165    'clearance_requested': 'clearance requested',
166    'cleared_and_validated': 'cleared',
167    'school_fee_paid': 'school fee paid',
168    'returning': 'returning',
169    'courses_registered': 'courses registered',
170    'courses_validated': 'courses validated',
[9007]171    'admit': 'admitted',
172    'return': 'returning'
[7526]173    }
174
[7512]175##
176## END OF CONFIG
177##
178
[7575]179# Look for the first sequence of numbers
180RE_PHONE = re.compile('[^\d]*(\d*)[^\d]*')
181
[7516]182def convert_fieldnames(fieldnames):
183    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
184    """
185    header = dict([(name, name) for name in fieldnames])
186    for in_name, out_name in COLNAME_MAPPING.items():
187        if in_name not in header:
188            continue
189        header[in_name] = out_name
[7514]190    return header
191
[7512]192class Converters():
193    """Converters to turn old-style values into new ones.
194    """
[8478]195
[7512]196    @classmethod
[8478]197    def student_id(self, value):
198        """ 'A123456' --> 'EA123456'
199        """
200        if len(value) == 7:
[9007]201            return 'B' + value
[8478]202        return value
203
204    @classmethod
[7526]205    def reg_state(self, value):
206        """ 'courses_validated' --> 'courses validated'
207        """
208        return REGSTATE_MAPPING.get(value,value)
209
210    @classmethod
[9007]211    def reg_transition(self, value):
212        if value == "admitted":
213            return "admit"
214        if value == "returning":
215            return "return"
216        return value
217
218    @classmethod
[7602]219    def level(self, value):
220        """ '000' --> '10'
221        """
222        try:
223            number = int(value)
224        except ValueError:
225            return 9999
226        if number == 0:
227            return 10
228        return number
229
230    @classmethod
231    def semester(self, value):
232        """ '0' --> '9'
233        """
234        try:
235            number = int(value)
236        except ValueError:
237            return 9999
238        if number == 0:
239            return 9
240        return number
241
242    @classmethod
243    def application_category(self, value):
244        """ '' --> 'no'
245        """
246        if value == '':
247            return 'no'
248        return value
249
[7610]250    @classmethod
251    def lga(self, value):
252        """ Remove apostrophe
253        """
254        if value == 'akwa_ibom_uru_offong_oruko':
255            return 'akwa_ibom_urue-offong-oruko'
[8939]256        if value == 'edo_ohionmwon':
257            return 'edo_orhionmwon'
258
259        if value == 'nassarawa_nassarawa':
260            return 'nassarawa_nassawara'
261
262        if value == 'kogi_mopa-muro-mopi':
263            return 'kogi_mopa-muro'
264
265        if value == 'delta_osimili-north':
266            return 'delta_oshielli-north'
267
268        if value == 'delta_osimili':
269            return 'delta_oshimili'
270
271        if value == 'delta_osimili-south':
272            return 'delta_oshimili-south'
[7610]273        try:
274            value = value.replace("'","")
275        except:
276            return ''
[8784]277        lower = value.lower()
278        if lower in LGAS_dict.keys():
279            return lower
[8781]280        # If real names are given, let's see if a similar value
281        # in LGAS exist.
[8784]282        value = LGAS_inverted_stripped.get(strip(lower), value)
[7610]283        return value
[7602]284
[7610]285
[7602]286    @classmethod
[7514]287    def session(self, value):
288        """ '08' --> '2008'
[8478]289        '2008/2009' --> '2008'
[7514]290        """
[8478]291        if '/' in value:
292            numbers = value.split('/')
293            number = int(numbers[0])
294            if number in range(2000,2015):
295                return number
296            else:
297                return 9999
[7514]298        try:
299            number = int(value)
300        except ValueError:
[7602]301            #import pdb; pdb.set_trace()
[7514]302            return 9999
303        if number < 14:
304            return number + 2000
305        elif number in range(2000,2015):
306            return number
307        else:
308            return 9999
309
310    @classmethod
[8938]311    def former(self, value):
312        """ True --> yes
313        '2008/2009' --> '2008'
314        """
315        if value == 'True':
316            return 'yes'
317        return
318
319    @classmethod
320    def year(self, value):
321        """ '0' --> ''
322        """
323        if value == '0':
324            return
325        if value == 'None':
326            return
327        return value
328
329
330    @classmethod
[7514]331    def marit_stat(self, value):
332        """ 'True'/'False' --> 'married'/'unmarried'
333        """
[7610]334        if value in ('True','married'):
[7514]335            value = 'married'
[7610]336        elif value in ('False','unmarried'):
[7514]337            value = 'unmarried'
338        else:
339            value = ''
340        return value
341
342    @classmethod
343    def gender(self, value):
[7526]344        """ 'True'/'False' --> 'f'/'m'
[7514]345        """
[8785]346        if value.strip() in ('F', 'True','f'):
[7526]347            value = 'f'
[8785]348        elif value.strip() in ('M', 'False','m'):
[7526]349            value = 'm'
[7514]350        else:
351            value = ''
352        return value
353
354    @classmethod
[7512]355    def date(self, value):
356        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
357        """
358        if value == "None":
359            value = ""
360        elif value == "":
361            value = ""
362        else:
363            value = value.replace('/', '-')
[7514]364            # We add the hash symbol to avoid automatic date transformation
365            # in Excel and Calc for further processing
366            value += '#'
[7512]367        return value
368
369    @classmethod
[7628]370    def no_int(self, value):
[8941]371        """ Add hash and skip numbers starting with 999999
[7628]372        """
373        # We add the hash symbol to avoid automatic number transformation
374        # in Excel and Calc for further processing
[8941]375        try:
376            intvalue = int(value)
377            value += '#'
378        except:
379            pass
380        if value.startswith('999999'):
381            return
[7628]382        return value
383
384    @classmethod
[7632]385    def mode(self, value):
386        if value == "transfer_fulltime":
387            return "transfer_ft"
[8522]388        if value == "ume_ft":
389            return "utme_ft"
[7632]390        return value
391
392    @classmethod
[7526]393    def password(self, value):
394        if value == "not set":
395            return ""
396        return value
397
[7575]398    @classmethod
[8478]399    def nationality(self, value):
400        if value == "nigeria":
401            return "NG"
[8939]402        if value == "niger":
403            return "NE"
[8478]404        return value
405
406    @classmethod
[8939]407    def sittype(self, value):
408        if value == "nabtec":
409            return "nabteb"
410        return value
411
412    @classmethod
[8941]413    def company(self, value):
414        if value == "online":
415            return "interswitch"
416        return value
417
418    @classmethod
419    def p_category(self, value):
420        if value == "acceptance":
421            return "clearance"
422        return value
423
424    @classmethod
[8939]425    def email(self, value):
426        return value.strip()
427
428    @classmethod
[7575]429    def phone(self, value):
430        """ '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
[7526]431
[7575]432        Dashes and slashes are removed before looking for sequences
433        of numbers.
434        """
[8938]435        if not value:
436            return
[9007]437        value = value.strip('#')
[7575]438        value = value.replace('-', '')
439        value = value.replace('/', '')
440        match = RE_PHONE.match(value)
441        phone = match.groups()[0]
[9007]442        if value.startswith('234'):
443            value = '+' + value[:3] + '-' + value[3:]
444        else:
445            value = '-%s' % phone
[8938]446        return value + '#'
[7575]447
[8938]448    @classmethod
449    def result(self, value):
450        try:
451            liste = eval(value)
452        except:
453            return
454        if isinstance(liste,list):
455            return [(i[0].lower(),i[1]) for i in liste]
456        return
[7575]457
[9007]458
[7572]459def main():
[7573]460    input_file = '%s' % sys.argv[1]
461    output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
462    reader = csv.DictReader(open(input_file, 'rb'))
[7572]463    writer = None
[7512]464
[7572]465    for num, row in enumerate(reader):
466        if num == 0:
[7573]467            writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
[7572]468            print "FIELDS: "
469            for x, y in enumerate(reader.fieldnames):
470                print x, y
471            header = convert_fieldnames(reader.fieldnames)
472            writer.writerow(header)
[8939]473        if row.get('reg_state') == 'student_created':
474            continue
[8941]475        if row.get('status') == 'started':
476            # We do not use started payments
477            continue
[7572]478        for key, value in row.items():
479            if not key in OPTIONS.keys():
480                continue
481            conv_name = OPTIONS[key]
482            converter = getattr(Converters, conv_name, None)
483            if converter is None:
484                print "WARNING: cannot find converter %s" % conv_name
485                continue
486            row[key] = converter(row[key])
[7602]487        try:
488            writer.writerow(row)
489        except:
490            print row['student_id']
[7512]491
[7573]492    print "Output written to %s" % output_file
[7572]493
494
495if __name__ == '__main__':
496    if len(sys.argv) != 2:
497        print 'Usage: %s <filename>' % __file__
498        sys.exit(1)
499    main()
Note: See TracBrowser for help on using the repository browser.