source: main/waeup.kofa/trunk/tools/fix_import_file.py @ 9007

Last change on this file since 9007 was 9007, checked in by Henrik Bettermann, 13 years ago

Extend import file repair script.

Add shell script for downloading export files.

  • Property svn:keywords set to Id
File size: 13.2 KB
Line 
1## $Id: fix_import_file.py 9007 2012-07-17 04:36:27Z henrik $
2##
3## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""
19Fix exports from old SRP portal and other data sources to make
20them importable by current portal.
21
22Usage:
23
24Change into this directory, set the options below (files are assumed
25to be in the same directory) and then run
26
27  python fix_import_file.py <filename>
28
29Errors/warnings will be displayed on the shell, the output will be put
30into the specified output file.
31
32
33The lgas.py module must be copied into the same folder where this script
34is started.
35"""
36import csv
37import datetime
38import re
39import sys
40
41try:
42    from lgas import LGAS
43except:
44    print 'ERROR: lgas.py is missing.'
45    sys.exit(1)
46
47def strip(string):
48    string = string.replace('_', '')
49    string = string.replace('/', '')
50    string = string.replace('-', '')
51    string = string.replace(' ', '')
52    string = string.lower()
53    return string
54
55LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
56LGAS_dict = dict(LGAS)
57
58##
59## CONFIGURATION SECTION
60##
61# keys are fieldnames in input file, values are methods of class
62# Converter (see below)
63OPTIONS = {
64    'student_id': 'student_id',
65    'sex': 'gender',
66    'birthday': 'date',
67    'marit_stat': 'marit_stat',
68    'session': 'session',
69    'entry_session': 'session',
70    'current_session': 'session',
71    'session_id': 'session',
72    'entry_mode': 'mode',
73    'reg_state': 'reg_state',
74    'password': 'password',
75    'phone': 'phone',
76    'nationality': 'nationality',
77    'level': 'level',
78    'start_level': 'level',
79    'end_level': 'level',
80    'level_id': 'level',
81    'current_level': 'level',
82    'semester': 'semester',
83    'application_category': 'application_category',
84    'lga': 'lga',
85    'order_id': 'no_int',
86    'uniben': 'former',
87    'nysc_year': 'year',
88    'alr_date': 'date',
89    'fst_sit_date': 'date',
90    'scd_sit_date': 'date',
91    'emp_start': 'date',
92    'emp_end': 'date',
93    'emp_start2': 'date',
94    'emp_end2': 'date',
95    'fst_sit_results': 'result',
96    'scd_sit_results': 'result',
97    'alr_results': 'result',
98    'email': 'email',
99    'fst_sit_type': 'sittype',
100    'scd_sit_type': 'sittype',
101    'resp_pay_reference': 'no_int',
102    'type': 'company',
103    'date': 'date',
104    'category': 'p_category',
105    'reg_transition': 'reg_state',  # we completely change this column,
106                                    # since reg_state import is usually intended
107    'transition': 'reg_transition',
108    }
109
110# Mapping input file colnames --> output file colnames
111COLNAME_MAPPING = {
112    # base data
113    'reg_state': 'state',
114    'reg_transition': 'state',
115    'jamb_reg_no': 'reg_number',
116    'matric_no': 'matric_number',
117    'birthday': 'date_of_birth',
118    'clr_ac_pin': 'clr_code',
119    # clearance
120    'hq_grade': 'hq_degree',
121    'uniben': 'former_matric',
122    'hq_type2': 'hq2_type',
123    'hq_grade2': 'hq2_degree',
124    'hq_school2': 'hq2_school',
125    'hq_matric_no2': 'hq2_matric_no',
126    'hq_session2': 'hq2_session',
127    'hq_disc2': 'hq2_disc',
128    'emp': 'employer',
129    'emp2': 'employer2',
130    'emp_position2': 'emp2_position',
131    'emp_start2': 'emp2_start',
132    'emp_end2': 'emp2_end',
133    'emp_reason2': 'emp2_reason',
134    # study course
135    'study_course': 'certificate',
136    # study level
137    'session': 'level_session',
138    'verdict': 'level_verdict',
139    # course ticket
140    'level_id': 'level',
141    'core_or_elective': 'mandatory',
142    # payment ticket
143    'order_id': 'p_id',
144    'status': 'p_state',
145    'category': 'p_category',
146    'resp_pay_reference': 'r_pay_reference',
147    'resp_desc': 'r_desc',
148    'resp_approved_amount': 'r_amount_approved',
149    'item': 'p_item',
150    'amount': 'amount_auth',
151    'resp_card_num': 'r_card_num',
152    'resp_code': 'r_code',
153    'date': 'creation_date',
154    'surcharge': 'surcharge_1',
155    'session_id': 'p_session',
156    'type': 'r_company',
157    }
158
159# Mapping input state --> output state
160REGSTATE_MAPPING = {
161    'student_created': 'created',
162    'admitted': 'admitted',
163    'objection_raised': 'clearance started',
164    'clearance_pin_entered': 'clearance started',
165    'clearance_requested': 'clearance requested',
166    'cleared_and_validated': 'cleared',
167    'school_fee_paid': 'school fee paid',
168    'returning': 'returning',
169    'courses_registered': 'courses registered',
170    'courses_validated': 'courses validated',
171    'admit': 'admitted',
172    'return': 'returning'
173    }
174
175##
176## END OF CONFIG
177##
178
179# Look for the first sequence of numbers
180RE_PHONE = re.compile('[^\d]*(\d*)[^\d]*')
181
182def convert_fieldnames(fieldnames):
183    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
184    """
185    header = dict([(name, name) for name in fieldnames])
186    for in_name, out_name in COLNAME_MAPPING.items():
187        if in_name not in header:
188            continue
189        header[in_name] = out_name
190    return header
191
192class Converters():
193    """Converters to turn old-style values into new ones.
194    """
195
196    @classmethod
197    def student_id(self, value):
198        """ 'A123456' --> 'EA123456'
199        """
200        if len(value) == 7:
201            return 'B' + value
202        return value
203
204    @classmethod
205    def reg_state(self, value):
206        """ 'courses_validated' --> 'courses validated'
207        """
208        return REGSTATE_MAPPING.get(value,value)
209
210    @classmethod
211    def reg_transition(self, value):
212        if value == "admitted":
213            return "admit"
214        if value == "returning":
215            return "return"
216        return value
217
218    @classmethod
219    def level(self, value):
220        """ '000' --> '10'
221        """
222        try:
223            number = int(value)
224        except ValueError:
225            return 9999
226        if number == 0:
227            return 10
228        return number
229
230    @classmethod
231    def semester(self, value):
232        """ '0' --> '9'
233        """
234        try:
235            number = int(value)
236        except ValueError:
237            return 9999
238        if number == 0:
239            return 9
240        return number
241
242    @classmethod
243    def application_category(self, value):
244        """ '' --> 'no'
245        """
246        if value == '':
247            return 'no'
248        return value
249
250    @classmethod
251    def lga(self, value):
252        """ Remove apostrophe
253        """
254        if value == 'akwa_ibom_uru_offong_oruko':
255            return 'akwa_ibom_urue-offong-oruko'
256        if value == 'edo_ohionmwon':
257            return 'edo_orhionmwon'
258
259        if value == 'nassarawa_nassarawa':
260            return 'nassarawa_nassawara'
261
262        if value == 'kogi_mopa-muro-mopi':
263            return 'kogi_mopa-muro'
264
265        if value == 'delta_osimili-north':
266            return 'delta_oshielli-north'
267
268        if value == 'delta_osimili':
269            return 'delta_oshimili'
270
271        if value == 'delta_osimili-south':
272            return 'delta_oshimili-south'
273        try:
274            value = value.replace("'","")
275        except:
276            return ''
277        lower = value.lower()
278        if lower in LGAS_dict.keys():
279            return lower
280        # If real names are given, let's see if a similar value
281        # in LGAS exist.
282        value = LGAS_inverted_stripped.get(strip(lower), value)
283        return value
284
285
286    @classmethod
287    def session(self, value):
288        """ '08' --> '2008'
289        '2008/2009' --> '2008'
290        """
291        if '/' in value:
292            numbers = value.split('/')
293            number = int(numbers[0])
294            if number in range(2000,2015):
295                return number
296            else:
297                return 9999
298        try:
299            number = int(value)
300        except ValueError:
301            #import pdb; pdb.set_trace()
302            return 9999
303        if number < 14:
304            return number + 2000
305        elif number in range(2000,2015):
306            return number
307        else:
308            return 9999
309
310    @classmethod
311    def former(self, value):
312        """ True --> yes
313        '2008/2009' --> '2008'
314        """
315        if value == 'True':
316            return 'yes'
317        return
318
319    @classmethod
320    def year(self, value):
321        """ '0' --> ''
322        """
323        if value == '0':
324            return
325        if value == 'None':
326            return
327        return value
328
329
330    @classmethod
331    def marit_stat(self, value):
332        """ 'True'/'False' --> 'married'/'unmarried'
333        """
334        if value in ('True','married'):
335            value = 'married'
336        elif value in ('False','unmarried'):
337            value = 'unmarried'
338        else:
339            value = ''
340        return value
341
342    @classmethod
343    def gender(self, value):
344        """ 'True'/'False' --> 'f'/'m'
345        """
346        if value.strip() in ('F', 'True','f'):
347            value = 'f'
348        elif value.strip() in ('M', 'False','m'):
349            value = 'm'
350        else:
351            value = ''
352        return value
353
354    @classmethod
355    def date(self, value):
356        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
357        """
358        if value == "None":
359            value = ""
360        elif value == "":
361            value = ""
362        else:
363            value = value.replace('/', '-')
364            # We add the hash symbol to avoid automatic date transformation
365            # in Excel and Calc for further processing
366            value += '#'
367        return value
368
369    @classmethod
370    def no_int(self, value):
371        """ Add hash and skip numbers starting with 999999
372        """
373        # We add the hash symbol to avoid automatic number transformation
374        # in Excel and Calc for further processing
375        try:
376            intvalue = int(value)
377            value += '#'
378        except:
379            pass
380        if value.startswith('999999'):
381            return
382        return value
383
384    @classmethod
385    def mode(self, value):
386        if value == "transfer_fulltime":
387            return "transfer_ft"
388        if value == "ume_ft":
389            return "utme_ft"
390        return value
391
392    @classmethod
393    def password(self, value):
394        if value == "not set":
395            return ""
396        return value
397
398    @classmethod
399    def nationality(self, value):
400        if value == "nigeria":
401            return "NG"
402        if value == "niger":
403            return "NE"
404        return value
405
406    @classmethod
407    def sittype(self, value):
408        if value == "nabtec":
409            return "nabteb"
410        return value
411
412    @classmethod
413    def company(self, value):
414        if value == "online":
415            return "interswitch"
416        return value
417
418    @classmethod
419    def p_category(self, value):
420        if value == "acceptance":
421            return "clearance"
422        return value
423
424    @classmethod
425    def email(self, value):
426        return value.strip()
427
428    @classmethod
429    def phone(self, value):
430        """ '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
431
432        Dashes and slashes are removed before looking for sequences
433        of numbers.
434        """
435        if not value:
436            return
437        value = value.strip('#')
438        value = value.replace('-', '')
439        value = value.replace('/', '')
440        match = RE_PHONE.match(value)
441        phone = match.groups()[0]
442        if value.startswith('234'):
443            value = '+' + value[:3] + '-' + value[3:]
444        else:
445            value = '-%s' % phone
446        return value + '#'
447
448    @classmethod
449    def result(self, value):
450        try:
451            liste = eval(value)
452        except:
453            return
454        if isinstance(liste,list):
455            return [(i[0].lower(),i[1]) for i in liste]
456        return
457
458
459def main():
460    input_file = '%s' % sys.argv[1]
461    output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
462    reader = csv.DictReader(open(input_file, 'rb'))
463    writer = None
464
465    for num, row in enumerate(reader):
466        if num == 0:
467            writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
468            print "FIELDS: "
469            for x, y in enumerate(reader.fieldnames):
470                print x, y
471            header = convert_fieldnames(reader.fieldnames)
472            writer.writerow(header)
473        if row.get('reg_state') == 'student_created':
474            continue
475        if row.get('status') == 'started':
476            # We do not use started payments
477            continue
478        for key, value in row.items():
479            if not key in OPTIONS.keys():
480                continue
481            conv_name = OPTIONS[key]
482            converter = getattr(Converters, conv_name, None)
483            if converter is None:
484                print "WARNING: cannot find converter %s" % conv_name
485                continue
486            row[key] = converter(row[key])
487        try:
488            writer.writerow(row)
489        except:
490            print row['student_id']
491
492    print "Output written to %s" % output_file
493
494
495if __name__ == '__main__':
496    if len(sys.argv) != 2:
497        print 'Usage: %s <filename>' % __file__
498        sys.exit(1)
499    main()
Note: See TracBrowser for help on using the repository browser.