source: main/waeup.kofa/trunk/tools/fix_import_file.py @ 8945

Last change on this file since 8945 was 8945, checked in by Henrik Bettermann, 12 years ago

We do not need a special converter for datetime values. This can be done by the date converter.

  • Property svn:keywords set to Id
File size: 12.6 KB
RevLine 
[7512]1## $Id: fix_import_file.py 8945 2012-07-08 15:10:12Z henrik $
2##
[7518]3## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
[7512]4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""
[8781]19Fix exports from old SRP portal and other data sources to make
20them importable by current portal.
[7512]21
22Usage:
23
24Change into this directory, set the options below (files are assumed
25to be in the same directory) and then run
26
[7518]27  python fix_import_file.py <filename>
[7512]28
29Errors/warnings will be displayed on the shell, the output will be put
30into the specified output file.
[8781]31
32
33The lgas.py module must be copied into the same folder where this script
34is started.
[7512]35"""
[7573]36import csv
37import datetime
[7575]38import re
[7573]39import sys
[7512]40
[8781]41try:
42    from lgas import LGAS
43except:
44    print 'ERROR: lgas.py is missing.'
45    sys.exit(1)
46
47def strip(string):
48    string = string.replace('_', '')
49    string = string.replace('/', '')
50    string = string.replace('-', '')
51    string = string.replace(' ', '')
52    string = string.lower()
53    return string
54
55LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
56LGAS_dict = dict(LGAS)
57
[7512]58##
59## CONFIGURATION SECTION
60##
61# keys are fieldnames in input file, values are methods of class
62# Converter (see below)
63OPTIONS = {
[8478]64    'student_id': 'student_id',
[7512]65    'sex': 'gender',
66    'birthday': 'date',
[7514]67    'marit_stat': 'marit_stat',
[7537]68    'session': 'session',
[7514]69    'entry_session': 'session',
70    'current_session': 'session',
[7602]71    'session_id': 'session',
[7632]72    'entry_mode': 'mode',
[7526]73    'reg_state': 'reg_state',
74    'password': 'password',
[7575]75    'phone': 'phone',
[8478]76    'nationality': 'nationality',
[7602]77    'level': 'level',
78    'start_level': 'level',
79    'end_level': 'level',
80    'level_id': 'level',
81    'current_level': 'level',
82    'semester': 'semester',
83    'application_category': 'application_category',
[7610]84    'lga': 'lga',
[7628]85    'order_id': 'no_int',
[8938]86    'uniben': 'former',
87    'nysc_year': 'year',
88    'alr_date': 'date',
89    'fst_sit_date': 'date',
90    'scd_sit_date': 'date',
91    'emp_start': 'date',
92    'emp_end': 'date',
93    'emp_start2': 'date',
94    'emp_end2': 'date',
95    'fst_sit_results': 'result',
96    'scd_sit_results': 'result',
97    'alr_results': 'result',
[8939]98    'email': 'email',
99    'fst_sit_type': 'sittype',
100    'scd_sit_type': 'sittype',
[8941]101    'resp_pay_reference': 'no_int',
102    'type': 'company',
103    'date': 'date',
[8945]104    'category': 'p_category',
[7512]105    }
[7516]106
107# Mapping input file colnames --> output file colnames
108COLNAME_MAPPING = {
[7666]109    # base data
[8478]110    'reg_state': 'state',
[7518]111    'jamb_reg_no': 'reg_number',
[7645]112    'matric_no': 'matric_number',
[7516]113    'birthday': 'date_of_birth',
[7526]114    'clr_ac_pin': 'clr_code',
[8941]115    # clearance
116    'hq_grade': 'hq_degree',
117    'uniben': 'former_matric',
118    'hq_type2': 'hq2_type',
119    'hq_grade2': 'hq2_degree',
120    'hq_school2': 'hq2_school',
121    'hq_matric_no2': 'hq2_matric_no',
122    'hq_session2': 'hq2_session',
123    'hq_disc2': 'hq2_disc',
124    'emp': 'employer',
125    'emp2': 'employer2',
126    'emp_position2': 'emp2_position',
127    'emp_start2': 'emp2_start',
128    'emp_end2': 'emp2_end',
129    'emp_reason2': 'emp2_reason',
[7550]130    # study course
[7530]131    'study_course': 'certificate',
[7550]132    # study level
[7537]133    'session': 'level_session',
134    'verdict': 'level_verdict',
[7550]135    # course ticket
136    'level_id': 'level',
[7666]137    'core_or_elective': 'mandatory',
[7628]138    # payment ticket
139    'order_id': 'p_id',
140    'status': 'p_state',
141    'category': 'p_category',
142    'resp_pay_reference': 'r_pay_reference',
143    'resp_desc': 'r_desc',
144    'resp_approved_amount': 'r_amount_approved',
145    'item': 'p_item',
146    'amount': 'amount_auth',
147    'resp_card_num': 'r_card_num',
148    'resp_code': 'r_code',
149    'date': 'creation_date',
150    'surcharge': 'surcharge_1',
151    'session_id': 'p_session',
[8941]152    'type': 'r_company',
[7516]153    }
[7526]154
[8478]155# Mapping input state --> output state
[7526]156REGSTATE_MAPPING = {
157    'student_created': 'created',
158    'admitted': 'admitted',
[7610]159    'objection_raised': 'clearance started',
[7526]160    'clearance_pin_entered': 'clearance started',
161    'clearance_requested': 'clearance requested',
162    'cleared_and_validated': 'cleared',
163    'school_fee_paid': 'school fee paid',
164    'returning': 'returning',
165    'courses_registered': 'courses registered',
166    'courses_validated': 'courses validated',
167    }
168
[7512]169##
170## END OF CONFIG
171##
172
[7575]173# Look for the first sequence of numbers
174RE_PHONE = re.compile('[^\d]*(\d*)[^\d]*')
175
[7516]176def convert_fieldnames(fieldnames):
177    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
178    """
179    header = dict([(name, name) for name in fieldnames])
180    for in_name, out_name in COLNAME_MAPPING.items():
181        if in_name not in header:
182            continue
183        header[in_name] = out_name
[7514]184    return header
185
[7512]186class Converters():
187    """Converters to turn old-style values into new ones.
188    """
[8478]189
[7512]190    @classmethod
[8478]191    def student_id(self, value):
192        """ 'A123456' --> 'EA123456'
193        """
194        if len(value) == 7:
195            return 'E' + value
196        return value
197
198    @classmethod
[7526]199    def reg_state(self, value):
200        """ 'courses_validated' --> 'courses validated'
201        """
202        return REGSTATE_MAPPING.get(value,value)
203
204    @classmethod
[7602]205    def level(self, value):
206        """ '000' --> '10'
207        """
208        try:
209            number = int(value)
210        except ValueError:
211            return 9999
212        if number == 0:
213            return 10
214        return number
215
216    @classmethod
217    def semester(self, value):
218        """ '0' --> '9'
219        """
220        try:
221            number = int(value)
222        except ValueError:
223            return 9999
224        if number == 0:
225            return 9
226        return number
227
228    @classmethod
229    def application_category(self, value):
230        """ '' --> 'no'
231        """
232        if value == '':
233            return 'no'
234        return value
235
[7610]236    @classmethod
237    def lga(self, value):
238        """ Remove apostrophe
239        """
240        if value == 'akwa_ibom_uru_offong_oruko':
241            return 'akwa_ibom_urue-offong-oruko'
[8939]242        if value == 'edo_ohionmwon':
243            return 'edo_orhionmwon'
244
245        if value == 'nassarawa_nassarawa':
246            return 'nassarawa_nassawara'
247
248        if value == 'kogi_mopa-muro-mopi':
249            return 'kogi_mopa-muro'
250
251        if value == 'delta_osimili-north':
252            return 'delta_oshielli-north'
253
254        if value == 'delta_osimili':
255            return 'delta_oshimili'
256
257        if value == 'delta_osimili-south':
258            return 'delta_oshimili-south'
[7610]259        try:
260            value = value.replace("'","")
261        except:
262            return ''
[8784]263        lower = value.lower()
264        if lower in LGAS_dict.keys():
265            return lower
[8781]266        # If real names are given, let's see if a similar value
267        # in LGAS exist.
[8784]268        value = LGAS_inverted_stripped.get(strip(lower), value)
[7610]269        return value
[7602]270
[7610]271
[7602]272    @classmethod
[7514]273    def session(self, value):
274        """ '08' --> '2008'
[8478]275        '2008/2009' --> '2008'
[7514]276        """
[8478]277        if '/' in value:
278            numbers = value.split('/')
279            number = int(numbers[0])
280            if number in range(2000,2015):
281                return number
282            else:
283                return 9999
[7514]284        try:
285            number = int(value)
286        except ValueError:
[7602]287            #import pdb; pdb.set_trace()
[7514]288            return 9999
289        if number < 14:
290            return number + 2000
291        elif number in range(2000,2015):
292            return number
293        else:
294            return 9999
295
296    @classmethod
[8938]297    def former(self, value):
298        """ True --> yes
299        '2008/2009' --> '2008'
300        """
301        if value == 'True':
302            return 'yes'
303        return
304
305    @classmethod
306    def year(self, value):
307        """ '0' --> ''
308        """
309        if value == '0':
310            return
311        if value == 'None':
312            return
313        return value
314
315
316    @classmethod
[7514]317    def marit_stat(self, value):
318        """ 'True'/'False' --> 'married'/'unmarried'
319        """
[7610]320        if value in ('True','married'):
[7514]321            value = 'married'
[7610]322        elif value in ('False','unmarried'):
[7514]323            value = 'unmarried'
324        else:
325            value = ''
326        return value
327
328    @classmethod
329    def gender(self, value):
[7526]330        """ 'True'/'False' --> 'f'/'m'
[7514]331        """
[8785]332        if value.strip() in ('F', 'True','f'):
[7526]333            value = 'f'
[8785]334        elif value.strip() in ('M', 'False','m'):
[7526]335            value = 'm'
[7514]336        else:
337            value = ''
338        return value
339
340    @classmethod
[7512]341    def date(self, value):
342        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
343        """
344        if value == "None":
345            value = ""
346        elif value == "":
347            value = ""
348        else:
349            value = value.replace('/', '-')
[7514]350            # We add the hash symbol to avoid automatic date transformation
351            # in Excel and Calc for further processing
352            value += '#'
[7512]353        return value
354
355    @classmethod
[7628]356    def no_int(self, value):
[8941]357        """ Add hash and skip numbers starting with 999999
[7628]358        """
359        # We add the hash symbol to avoid automatic number transformation
360        # in Excel and Calc for further processing
[8941]361        try:
362            intvalue = int(value)
363            value += '#'
364        except:
365            pass
366        if value.startswith('999999'):
367            return
[7628]368        return value
369
370    @classmethod
[7632]371    def mode(self, value):
372        if value == "transfer_fulltime":
373            return "transfer_ft"
[8522]374        if value == "ume_ft":
375            return "utme_ft"
[7632]376        return value
377
378    @classmethod
[7526]379    def password(self, value):
380        if value == "not set":
381            return ""
382        return value
383
[7575]384    @classmethod
[8478]385    def nationality(self, value):
386        if value == "nigeria":
387            return "NG"
[8939]388        if value == "niger":
389            return "NE"
[8478]390        return value
391
392    @classmethod
[8939]393    def sittype(self, value):
394        if value == "nabtec":
395            return "nabteb"
396        return value
397
398    @classmethod
[8941]399    def company(self, value):
400        if value == "online":
401            return "interswitch"
402        return value
403
404    @classmethod
405    def p_category(self, value):
406        if value == "acceptance":
407            return "clearance"
408        return value
409
410    @classmethod
[8939]411    def email(self, value):
412        return value.strip()
413
414    @classmethod
[7575]415    def phone(self, value):
416        """ '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
[7526]417
[7575]418        Dashes and slashes are removed before looking for sequences
419        of numbers.
420        """
[8938]421        if not value:
422            return
[7575]423        value = value.replace('-', '')
424        value = value.replace('/', '')
425        match = RE_PHONE.match(value)
426        phone = match.groups()[0]
[8478]427        value = '-%s' % phone
[8938]428        return value + '#'
[7575]429
[8938]430    @classmethod
431    def result(self, value):
432        try:
433            liste = eval(value)
434        except:
435            return
436        if isinstance(liste,list):
437            return [(i[0].lower(),i[1]) for i in liste]
438        return
[7575]439
[7572]440def main():
[7573]441    input_file = '%s' % sys.argv[1]
442    output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
443    reader = csv.DictReader(open(input_file, 'rb'))
[7572]444    writer = None
[7512]445
[7572]446    for num, row in enumerate(reader):
447        if num == 0:
[7573]448            writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
[7572]449            print "FIELDS: "
450            for x, y in enumerate(reader.fieldnames):
451                print x, y
452            header = convert_fieldnames(reader.fieldnames)
453            writer.writerow(header)
[8939]454        if row.get('reg_state') == 'student_created':
455            continue
[8941]456        if row.get('status') == 'started':
457            # We do not use started payments
458            continue
[7572]459        for key, value in row.items():
460            if not key in OPTIONS.keys():
461                continue
462            conv_name = OPTIONS[key]
463            converter = getattr(Converters, conv_name, None)
464            if converter is None:
465                print "WARNING: cannot find converter %s" % conv_name
466                continue
467            row[key] = converter(row[key])
[7602]468        try:
469            writer.writerow(row)
470        except:
471            print row['student_id']
[7512]472
[7573]473    print "Output written to %s" % output_file
[7572]474
475
476if __name__ == '__main__':
477    if len(sys.argv) != 2:
478        print 'Usage: %s <filename>' % __file__
479        sys.exit(1)
480    main()
Note: See TracBrowser for help on using the repository browser.