source: main/waeup.kofa/trunk/tools/fix_import_file.py @ 9270

Last change on this file since 9270 was 9270, checked in by Henrik Bettermann, 12 years ago

Update fix_import_file.py.

  • Property svn:keywords set to Id
File size: 14.1 KB
Line 
1## $Id: fix_import_file.py 9270 2012-10-02 08:24:05Z henrik $
2##
3## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""
19Fix exports from old SRP portal and other data sources to make
20them importable by current portal.
21
22Usage:
23
24Change into this directory, set the options below (files are assumed
25to be in the same directory) and then run
26
27  python fix_import_file.py <filename>
28
29Errors/warnings will be displayed on the shell, the output will be put
30into the specified output file.
31
32
33The lgas.py module must be copied into the same folder where this script
34is started.
35"""
36import csv
37import datetime
38import re
39import sys
40
41try:
42    from lgas import LGAS
43except:
44    print 'ERROR: lgas.py is missing.'
45    sys.exit(1)
46
47def strip(string):
48    string = string.replace('_', '')
49    string = string.replace('/', '')
50    string = string.replace('-', '')
51    string = string.replace(' ', '')
52    string = string.lower()
53    return string
54
55LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
56LGAS_dict = dict(LGAS)
57
58##
59## CONFIGURATION SECTION
60##
61# keys are fieldnames in input file, values are methods of class
62# Converter (see below)
63OPTIONS = {
64    'student_id': 'student_id',
65    'sex': 'gender',
66    'birthday': 'date',
67    'marit_stat': 'marit_stat',
68    'session': 'session',
69    'entry_session': 'session',
70    'current_session': 'session',
71    'session_id': 'session',
72    'entry_mode': 'mode',
73    'reg_state': 'reg_state',
74    'password': 'password',
75    'phone': 'phone',
76    'nationality': 'nationality',
77    'level': 'level',
78    'start_level': 'level',
79    'end_level': 'level',
80    'level_id': 'level',
81    'current_level': 'level',
82    'semester': 'semester',
83    'application_category': 'application_category',
84    'lga': 'lga',
85    'order_id': 'no_int',
86    'uniben': 'former',
87    'nysc_year': 'year',
88    'alr_date': 'date',
89    'fst_sit_date': 'date',
90    'scd_sit_date': 'date',
91    'emp_start': 'date',
92    'emp_end': 'date',
93    'emp_start2': 'date',
94    'emp_end2': 'date',
95    'fst_sit_results': 'result',
96    'scd_sit_results': 'result',
97    'alr_results': 'result',
98    'email': 'email',
99    'fst_sit_type': 'sittype',
100    'scd_sit_type': 'sittype',
101    'resp_pay_reference': 'no_int',
102    'type': 'company',
103    'date': 'date',
104    'core_or_elective': 'bool',
105    'category': 'p_category',
106    'reg_transition': 'reg_state',  # we completely change this column,
107                                    # since reg_state import is usually intended
108    'transition': 'reg_transition',
109    'payment_date': 'date',
110    'validation_date': 'date',
111    }
112
113# Mapping input file colnames --> output file colnames
114COLNAME_MAPPING = {
115    # base data
116    'id': 'student_id',
117    'reg_state': 'state',
118    'reg_transition': 'state',
119    'jamb_reg_no': 'reg_number',
120    'matric_no': 'matric_number',
121    'birthday': 'date_of_birth',
122    'clr_ac_pin': 'clr_code',
123    # clearance
124    'hq_grade': 'hq_degree',
125    'uniben': 'former_matric',
126    'hq_type2': 'hq2_type',
127    'hq_grade2': 'hq2_degree',
128    'hq_school2': 'hq2_school',
129    'hq_matric_no2': 'hq2_matric_no',
130    'hq_session2': 'hq2_session',
131    'hq_disc2': 'hq2_disc',
132    'emp': 'employer',
133    'emp2': 'employer2',
134    'emp_position2': 'emp2_position',
135    'emp_start2': 'emp2_start',
136    'emp_end2': 'emp2_end',
137    'emp_reason2': 'emp2_reason',
138    # study course
139    'study_course': 'certificate',
140    # study level
141    'session': 'level_session',
142    'verdict': 'level_verdict',
143    # course ticket
144    'level_id': 'level',
145    'core_or_elective': 'mandatory',
146    # payment ticket
147    'order_id': 'p_id',
148    'status': 'p_state',
149    'category': 'p_category',
150    'resp_pay_reference': 'r_pay_reference',
151    'resp_desc': 'r_desc',
152    'resp_approved_amount': 'r_amount_approved',
153    'item': 'p_item',
154    'amount': 'amount_auth',
155    'resp_card_num': 'r_card_num',
156    'resp_code': 'r_code',
157    'date': 'creation_date',
158    'surcharge': 'surcharge_1',
159    'session_id': 'p_session',
160    'type': 'r_company',
161    }
162
163# Mapping input state --> output state
164REGSTATE_MAPPING = {
165    'student_created': 'created',
166    'admitted': 'admitted',
167    'objection_raised': 'clearance started',
168    'clearance_pin_entered': 'clearance started',
169    'clearance_requested': 'clearance requested',
170    'cleared_and_validated': 'cleared',
171    'school_fee_paid': 'school fee paid',
172    'returning': 'returning',
173    'courses_registered': 'courses registered',
174    'courses_validated': 'courses validated',
175    'admit': 'admitted',
176    'return': 'returning'
177    }
178
179##
180## END OF CONFIG
181##
182
183# Look for the first sequence of numbers
184RE_PHONE = re.compile('[^\d]*(\d*)[^\d]*')
185
186def convert_fieldnames(fieldnames):
187    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
188    """
189    # Remove whitespaces
190    header = dict([(name, name.strip()) for name in fieldnames])
191    for in_name, out_name in COLNAME_MAPPING.items():
192        if in_name not in header.values():
193            continue
194        # Inverse dictionary lookup
195        key = [key for key,value in header.items() if value==in_name][0]
196        header[key] = out_name
197    return header
198
199class Converters():
200    """Converters to turn old-style values into new ones.
201    """
202
203    @classmethod
204    def student_id(self, value, row):
205        """ 'A123456' --> 'EA123456'
206        """
207        if len(value) == 7:
208            return 'M' + value
209        return value
210
211    @classmethod
212    def reg_state(self, value, row):
213        """ 'courses_validated' --> 'courses validated'
214        """
215        return REGSTATE_MAPPING.get(value,value)
216
217    @classmethod
218    def reg_transition(self, value, row):
219        if value == "admitted":
220            return "admit"
221        if value == "returning":
222            return "return"
223        return value
224
225    @classmethod
226    def level(self, value, row):
227        """ '000' --> '10'
228        '800' --> '999' if pg student
229        """
230        try:
231            number = int(value)
232        except ValueError:
233            return 9999
234        if number == 0:
235            return 10
236        if row.get('entry_mode') and row.get('entry_mode').startswith('pg'):
237            return 999
238        return number
239
240    @classmethod
241    def semester(self, value, row):
242        """ '0' --> '9'
243        """
244        try:
245            number = int(value)
246        except ValueError:
247            return 9999
248        if number == 0:
249            return 9
250        return number
251
252    @classmethod
253    def application_category(self, value, row):
254        """ '' --> 'no'
255        """
256        if value == '':
257            return 'no'
258        return value
259
260    @classmethod
261    def lga(self, value, row):
262        """ Remove apostrophe
263        """
264        if value == 'akwa_ibom_uru_offong_oruko':
265            return 'akwa_ibom_urue-offong-oruko'
266        if value == 'edo_ohionmwon':
267            return 'edo_orhionmwon'
268
269        if value == 'nassarawa_nassarawa':
270            return 'nassarawa_nassawara'
271
272        if value == 'kogi_mopa-muro-mopi':
273            return 'kogi_mopa-muro'
274
275        if value == 'delta_osimili-north':
276            return 'delta_oshielli-north'
277
278        if value == 'delta_osimili':
279            return 'delta_oshimili'
280
281        if value == 'delta_osimili-south':
282            return 'delta_oshimili-south'
283        try:
284            value = value.replace("'","")
285        except:
286            return ''
287        lower = value.lower()
288        if lower in LGAS_dict.keys():
289            return lower
290        # If real names are given, let's see if a similar value
291        # in LGAS exist.
292        value = LGAS_inverted_stripped.get(strip(lower), value)
293        return value
294
295
296    @classmethod
297    def session(self, value, row):
298        """ '08' --> '2008'
299        '2008/2009' --> '2008'
300        """
301        if '/' in value:
302            numbers = value.split('/')
303            number = int(numbers[0])
304            if number in range(2000,2015):
305                return number
306            else:
307                return 9999
308        try:
309            number = int(value)
310        except ValueError:
311            return 9999
312        if number < 14:
313            return number + 2000
314        elif number in range(2000,2015):
315            return number
316        else:
317            return 9999
318
319    @classmethod
320    def former(self, value, row):
321        """ True --> yes
322        '2008/2009' --> '2008'
323        """
324        if value == 'True':
325            return 'yes'
326        return
327
328    @classmethod
329    def bool(self, value, row):
330        """ True --> 1
331        """
332        if value in ('TRUE', 'True'):
333            return '1'
334        elif value in ('FALSE', 'False'):
335            return '0'
336        return
337
338    @classmethod
339    def year(self, value, row):
340        """ '0' --> ''
341        """
342        if value == '0':
343            return
344        if value == 'None':
345            return
346        return value
347
348
349    @classmethod
350    def marit_stat(self, value, row):
351        """ 'True'/'False' --> 'married'/'unmarried'
352        """
353        if value in ('True','married'):
354            value = 'married'
355        elif value in ('False','unmarried'):
356            value = 'unmarried'
357        else:
358            value = ''
359        return value
360
361    @classmethod
362    def gender(self, value, row):
363        """ 'True'/'False' --> 'f'/'m'
364        """
365        if value.strip() in ('F', 'True','f'):
366            value = 'f'
367        elif value.strip() in ('M', 'False','m'):
368            value = 'm'
369        else:
370            value = ''
371        return value
372
373    @classmethod
374    def date(self, value, row):
375        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
376        """
377        if value == "None":
378            value = ""
379        elif value == "":
380            value = ""
381        else:
382            value = value.replace('/', '-')
383            # We add the hash symbol to avoid automatic date transformation
384            # in Excel and Calc for further processing
385            value += '#'
386        return value
387
388    @classmethod
389    def no_int(self, value, row):
390        """ Add hash and skip numbers starting with 999999
391        """
392        # We add the hash symbol to avoid automatic number transformation
393        # in Excel and Calc for further processing
394        try:
395            intvalue = int(value)
396            value += '#'
397        except:
398            pass
399        if value.startswith('999999'):
400            return
401        return value
402
403    @classmethod
404    def mode(self, value, row):
405        if value == "transfer_fulltime":
406            return "transfer_ft"
407        if value == "ume_ft":
408            return "utme_ft"
409        return value
410
411    @classmethod
412    def password(self, value, row):
413        if value == "not set":
414            return ""
415        return value
416
417    @classmethod
418    def nationality(self, value, row):
419        if value in ('nigeria', 'Nigeria'):
420            return "NG"
421        if value in ('niger', 'Niger'):
422            return "NE"
423        return value
424
425    @classmethod
426    def sittype(self, value, row):
427        if value == "nabtec":
428            return "nabteb"
429        return value
430
431    @classmethod
432    def company(self, value, row):
433        if value == "online":
434            return "interswitch"
435        return value
436
437    @classmethod
438    def p_category(self, value, row):
439        if value == "acceptance":
440            return "clearance"
441        return value
442
443    @classmethod
444    def email(self, value, row):
445        return value.strip()
446
447    @classmethod
448    def phone(self, value, row):
449        """ '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
450
451        Dashes and slashes are removed before looking for sequences
452        of numbers.
453        """
454        if not value:
455            return
456        value = value.strip('#')
457        value = value.replace('-', '')
458        value = value.replace('/', '')
459        match = RE_PHONE.match(value)
460        phone = match.groups()[0]
461        if value.startswith('234'):
462            value = '+' + value[:3] + '-' + value[3:]
463        else:
464            value = '-%s' % phone
465        return value + '#'
466
467    @classmethod
468    def result(self, value, row):
469        try:
470            liste = eval(value)
471        except:
472            return
473        if isinstance(liste,list):
474            return [(i[0].lower(),i[1]) for i in liste]
475        return
476
477
478def main():
479    input_file = '%s' % sys.argv[1]
480    output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
481    reader = csv.DictReader(open(input_file, 'rb'))
482    writer = None
483
484    for num, row in enumerate(reader):
485        if num == 0:
486            writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
487            print "FIELDS: "
488            for x, y in enumerate(reader.fieldnames):
489                print x, y
490            header = convert_fieldnames(reader.fieldnames)
491            writer.writerow(header)
492        if row.get('reg_state') == 'student_created':
493            # We do not reimport student records which have never been accessed.
494            continue
495        if row.get('status') == 'started':
496            # We do not reimport started payments.
497            continue
498        for key, value in row.items():
499            # Remove unwanted whitespaces.
500            row[key] = row[key].strip()
501            if not key in OPTIONS.keys():
502                continue
503            conv_name = OPTIONS[key]
504            converter = getattr(Converters, conv_name, None)
505            if converter is None:
506                print "WARNING: cannot find converter %s" % conv_name
507                continue
508            row[key] = converter(row[key], row)
509        try:
510            writer.writerow(row)
511        except:
512            print row['student_id']
513
514    print "Output written to %s" % output_file
515
516
517if __name__ == '__main__':
518    if len(sys.argv) != 2:
519        print 'Usage: %s <filename>' % __file__
520        sys.exit(1)
521    main()
Note: See TracBrowser for help on using the repository browser.