source: main/waeup.kofa/trunk/tools/fix_import_file.py @ 8943

Last change on this file since 8943 was 8941, checked in by Henrik Bettermann, 12 years ago

More conversions.

Skip 'started' payments.

  • Property svn:keywords set to Id
File size: 13.1 KB
RevLine 
[7512]1## $Id: fix_import_file.py 8941 2012-07-08 08:58:45Z henrik $
2##
[7518]3## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
[7512]4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""
[8781]19Fix exports from old SRP portal and other data sources to make
20them importable by current portal.
[7512]21
22Usage:
23
24Change into this directory, set the options below (files are assumed
25to be in the same directory) and then run
26
[7518]27  python fix_import_file.py <filename>
[7512]28
29Errors/warnings will be displayed on the shell, the output will be put
30into the specified output file.
[8781]31
32
33The lgas.py module must be copied into the same folder where this script
34is started.
[7512]35"""
[7573]36import csv
37import datetime
[7575]38import re
[7573]39import sys
[7512]40
[8781]41try:
42    from lgas import LGAS
43except:
44    print 'ERROR: lgas.py is missing.'
45    sys.exit(1)
46
47def strip(string):
48    string = string.replace('_', '')
49    string = string.replace('/', '')
50    string = string.replace('-', '')
51    string = string.replace(' ', '')
52    string = string.lower()
53    return string
54
55LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
56LGAS_dict = dict(LGAS)
57
[7512]58##
59## CONFIGURATION SECTION
60##
61# keys are fieldnames in input file, values are methods of class
62# Converter (see below)
63OPTIONS = {
[8478]64    'student_id': 'student_id',
[7512]65    'sex': 'gender',
66    'birthday': 'date',
67    'request_date': 'datetime',
[7514]68    'marit_stat': 'marit_stat',
[7537]69    'session': 'session',
[7514]70    'entry_session': 'session',
71    'current_session': 'session',
[7602]72    'session_id': 'session',
[7632]73    'entry_mode': 'mode',
[7526]74    'reg_state': 'reg_state',
75    'password': 'password',
[7575]76    'phone': 'phone',
[8478]77    'nationality': 'nationality',
[7602]78    'level': 'level',
79    'start_level': 'level',
80    'end_level': 'level',
81    'level_id': 'level',
82    'current_level': 'level',
83    'semester': 'semester',
84    'application_category': 'application_category',
[7610]85    'lga': 'lga',
[7628]86    'order_id': 'no_int',
[8938]87    'uniben': 'former',
88    'nysc_year': 'year',
89    'alr_date': 'date',
90    'fst_sit_date': 'date',
91    'scd_sit_date': 'date',
92    'emp_start': 'date',
93    'emp_end': 'date',
94    'emp_start2': 'date',
95    'emp_end2': 'date',
96    'fst_sit_results': 'result',
97    'scd_sit_results': 'result',
98    'alr_results': 'result',
[8939]99    'email': 'email',
100    'fst_sit_type': 'sittype',
101    'scd_sit_type': 'sittype',
[8941]102    'resp_pay_reference': 'no_int',
103    'type': 'company',
104    'date': 'date',
105    'p_category': 'p_category',
[7512]106    }
[7516]107
108# Mapping input file colnames --> output file colnames
109COLNAME_MAPPING = {
[7666]110    # base data
[8478]111    'reg_state': 'state',
[7518]112    'jamb_reg_no': 'reg_number',
[7645]113    'matric_no': 'matric_number',
[7516]114    'birthday': 'date_of_birth',
[7526]115    'clr_ac_pin': 'clr_code',
[8941]116    # clearance
117    'hq_grade': 'hq_degree',
118    'uniben': 'former_matric',
119    'hq_type2': 'hq2_type',
120    'hq_grade2': 'hq2_degree',
121    'hq_school2': 'hq2_school',
122    'hq_matric_no2': 'hq2_matric_no',
123    'hq_session2': 'hq2_session',
124    'hq_disc2': 'hq2_disc',
125    'emp': 'employer',
126    'emp2': 'employer2',
127    'emp_position2': 'emp2_position',
128    'emp_start2': 'emp2_start',
129    'emp_end2': 'emp2_end',
130    'emp_reason2': 'emp2_reason',
[7550]131    # study course
[7530]132    'study_course': 'certificate',
[7550]133    # study level
[7537]134    'session': 'level_session',
135    'verdict': 'level_verdict',
[7550]136    # course ticket
137    'level_id': 'level',
[7666]138    'core_or_elective': 'mandatory',
[7628]139    # payment ticket
140    'order_id': 'p_id',
141    'status': 'p_state',
142    'category': 'p_category',
143    'resp_pay_reference': 'r_pay_reference',
144    'resp_desc': 'r_desc',
145    'resp_approved_amount': 'r_amount_approved',
146    'item': 'p_item',
147    'amount': 'amount_auth',
148    'resp_card_num': 'r_card_num',
149    'resp_code': 'r_code',
150    'date': 'creation_date',
151    'surcharge': 'surcharge_1',
152    'session_id': 'p_session',
[8941]153    'type': 'r_company',
[7516]154    }
[7526]155
[8478]156# Mapping input state --> output state
[7526]157REGSTATE_MAPPING = {
158    'student_created': 'created',
159    'admitted': 'admitted',
[7610]160    'objection_raised': 'clearance started',
[7526]161    'clearance_pin_entered': 'clearance started',
162    'clearance_requested': 'clearance requested',
163    'cleared_and_validated': 'cleared',
164    'school_fee_paid': 'school fee paid',
165    'returning': 'returning',
166    'courses_registered': 'courses registered',
167    'courses_validated': 'courses validated',
168    }
169
[7512]170##
171## END OF CONFIG
172##
173
[7575]174# Look for the first sequence of numbers
175RE_PHONE = re.compile('[^\d]*(\d*)[^\d]*')
176
[7516]177def convert_fieldnames(fieldnames):
178    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
179    """
180    header = dict([(name, name) for name in fieldnames])
181    for in_name, out_name in COLNAME_MAPPING.items():
182        if in_name not in header:
183            continue
184        header[in_name] = out_name
[7514]185    return header
186
[7512]187class Converters():
188    """Converters to turn old-style values into new ones.
189    """
[8478]190
[7512]191    @classmethod
[8478]192    def student_id(self, value):
193        """ 'A123456' --> 'EA123456'
194        """
195        if len(value) == 7:
196            return 'E' + value
197        return value
198
199    @classmethod
[7526]200    def reg_state(self, value):
201        """ 'courses_validated' --> 'courses validated'
202        """
203        return REGSTATE_MAPPING.get(value,value)
204
205    @classmethod
[7602]206    def level(self, value):
207        """ '000' --> '10'
208        """
209        try:
210            number = int(value)
211        except ValueError:
212            return 9999
213        if number == 0:
214            return 10
215        return number
216
217    @classmethod
218    def semester(self, value):
219        """ '0' --> '9'
220        """
221        try:
222            number = int(value)
223        except ValueError:
224            return 9999
225        if number == 0:
226            return 9
227        return number
228
229    @classmethod
230    def application_category(self, value):
231        """ '' --> 'no'
232        """
233        if value == '':
234            return 'no'
235        return value
236
[7610]237    @classmethod
238    def lga(self, value):
239        """ Remove apostrophe
240        """
241        if value == 'akwa_ibom_uru_offong_oruko':
242            return 'akwa_ibom_urue-offong-oruko'
[8939]243        if value == 'edo_ohionmwon':
244            return 'edo_orhionmwon'
245
246        if value == 'nassarawa_nassarawa':
247            return 'nassarawa_nassawara'
248
249        if value == 'kogi_mopa-muro-mopi':
250            return 'kogi_mopa-muro'
251
252        if value == 'delta_osimili-north':
253            return 'delta_oshielli-north'
254
255        if value == 'delta_osimili':
256            return 'delta_oshimili'
257
258        if value == 'delta_osimili-south':
259            return 'delta_oshimili-south'
[7610]260        try:
261            value = value.replace("'","")
262        except:
263            return ''
[8784]264        lower = value.lower()
265        if lower in LGAS_dict.keys():
266            return lower
[8781]267        # If real names are given, let's see if a similar value
268        # in LGAS exist.
[8784]269        value = LGAS_inverted_stripped.get(strip(lower), value)
[7610]270        return value
[7602]271
[7610]272
[7602]273    @classmethod
[7514]274    def session(self, value):
275        """ '08' --> '2008'
[8478]276        '2008/2009' --> '2008'
[7514]277        """
[8478]278        if '/' in value:
279            numbers = value.split('/')
280            number = int(numbers[0])
281            if number in range(2000,2015):
282                return number
283            else:
284                return 9999
[7514]285        try:
286            number = int(value)
287        except ValueError:
[7602]288            #import pdb; pdb.set_trace()
[7514]289            return 9999
290        if number < 14:
291            return number + 2000
292        elif number in range(2000,2015):
293            return number
294        else:
295            return 9999
296
297    @classmethod
[8938]298    def former(self, value):
299        """ True --> yes
300        '2008/2009' --> '2008'
301        """
302        if value == 'True':
303            return 'yes'
304        return
305
306    @classmethod
307    def year(self, value):
308        """ '0' --> ''
309        """
310        if value == '0':
311            return
312        if value == 'None':
313            return
314        return value
315
316
317    @classmethod
[7514]318    def marit_stat(self, value):
319        """ 'True'/'False' --> 'married'/'unmarried'
320        """
[7610]321        if value in ('True','married'):
[7514]322            value = 'married'
[7610]323        elif value in ('False','unmarried'):
[7514]324            value = 'unmarried'
325        else:
326            value = ''
327        return value
328
329    @classmethod
330    def gender(self, value):
[7526]331        """ 'True'/'False' --> 'f'/'m'
[7514]332        """
[8785]333        if value.strip() in ('F', 'True','f'):
[7526]334            value = 'f'
[8785]335        elif value.strip() in ('M', 'False','m'):
[7526]336            value = 'm'
[7514]337        else:
338            value = ''
339        return value
340
341    @classmethod
[7512]342    def date(self, value):
343        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
344        """
345        if value == "None":
346            value = ""
347        elif value == "":
348            value = ""
349        else:
350            value = value.replace('/', '-')
[7514]351            # We add the hash symbol to avoid automatic date transformation
352            # in Excel and Calc for further processing
353            value += '#'
[7512]354        return value
355
356    @classmethod
[7628]357    def no_int(self, value):
[8941]358        """ Add hash and skip numbers starting with 999999
[7628]359        """
360        # We add the hash symbol to avoid automatic number transformation
361        # in Excel and Calc for further processing
[8941]362        try:
363            intvalue = int(value)
364            value += '#'
365        except:
366            pass
367        if value.startswith('999999'):
368            return
[7628]369        return value
370
371
372    @classmethod
[7512]373    def datetime(self, value):
374        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
375        """
376        #print  "IN: ", value
377        if value == "None":
378            value = ""
379        elif value == "":
380            value = ""
381        else:
382            #value = datetime.datetime.strptime(value, '%Y/%m/%d')
383            #value = datetime.datetime.strftime(value, '%Y-%m-%d')
384            pass
385        #print "OUT: ", value
386        return value
387
[7526]388    @classmethod
[7632]389    def mode(self, value):
390        if value == "transfer_fulltime":
391            return "transfer_ft"
[8522]392        if value == "ume_ft":
393            return "utme_ft"
[7632]394        return value
395
396    @classmethod
[7526]397    def password(self, value):
398        if value == "not set":
399            return ""
400        return value
401
[7575]402    @classmethod
[8478]403    def nationality(self, value):
404        if value == "nigeria":
405            return "NG"
[8939]406        if value == "niger":
407            return "NE"
[8478]408        return value
409
410    @classmethod
[8939]411    def sittype(self, value):
412        if value == "nabtec":
413            return "nabteb"
414        return value
415
416    @classmethod
[8941]417    def company(self, value):
418        if value == "online":
419            return "interswitch"
420        return value
421
422    @classmethod
423    def p_category(self, value):
424        if value == "acceptance":
425            return "clearance"
426        return value
427
428    @classmethod
[8939]429    def email(self, value):
430        return value.strip()
431
432    @classmethod
[7575]433    def phone(self, value):
434        """ '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
[7526]435
[7575]436        Dashes and slashes are removed before looking for sequences
437        of numbers.
438        """
[8938]439        if not value:
440            return
[7575]441        value = value.replace('-', '')
442        value = value.replace('/', '')
443        match = RE_PHONE.match(value)
444        phone = match.groups()[0]
[8478]445        value = '-%s' % phone
[8938]446        return value + '#'
[7575]447
[8938]448    @classmethod
449    def result(self, value):
450        try:
451            liste = eval(value)
452        except:
453            return
454        if isinstance(liste,list):
455            return [(i[0].lower(),i[1]) for i in liste]
456        return
[7575]457
[7572]458def main():
[7573]459    input_file = '%s' % sys.argv[1]
460    output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
461    reader = csv.DictReader(open(input_file, 'rb'))
[7572]462    writer = None
[7512]463
[7572]464    for num, row in enumerate(reader):
465        if num == 0:
[7573]466            writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
[7572]467            print "FIELDS: "
468            for x, y in enumerate(reader.fieldnames):
469                print x, y
470            header = convert_fieldnames(reader.fieldnames)
471            writer.writerow(header)
[8939]472        if row.get('reg_state') == 'student_created':
473            continue
[8941]474        if row.get('status') == 'started':
475            # We do not use started payments
476            continue
[7572]477        for key, value in row.items():
478            if not key in OPTIONS.keys():
479                continue
480            conv_name = OPTIONS[key]
481            converter = getattr(Converters, conv_name, None)
482            if converter is None:
483                print "WARNING: cannot find converter %s" % conv_name
484                continue
485            row[key] = converter(row[key])
[7602]486        try:
487            writer.writerow(row)
488        except:
489            print row['student_id']
[7512]490
[7573]491    print "Output written to %s" % output_file
[7572]492
493
494if __name__ == '__main__':
495    if len(sys.argv) != 2:
496        print 'Usage: %s <filename>' % __file__
497        sys.exit(1)
498    main()
Note: See TracBrowser for help on using the repository browser.