source: main/waeup.kofa/trunk/tools/fix_import_file.py @ 9158

Last change on this file since 9158 was 9158, checked in by Henrik Bettermann, 12 years ago

Fix if statement.

  • Property svn:keywords set to Id
File size: 13.8 KB
Line 
1## $Id: fix_import_file.py 9158 2012-09-05 08:22:01Z henrik $
2##
3## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""
19Fix exports from old SRP portal and other data sources to make
20them importable by current portal.
21
22Usage:
23
24Change into this directory, set the options below (files are assumed
25to be in the same directory) and then run
26
27  python fix_import_file.py <filename>
28
29Errors/warnings will be displayed on the shell, the output will be put
30into the specified output file.
31
32
33The lgas.py module must be copied into the same folder where this script
34is started.
35"""
36import csv
37import datetime
38import re
39import sys
40
41try:
42    from lgas import LGAS
43except:
44    print 'ERROR: lgas.py is missing.'
45    sys.exit(1)
46
47def strip(string):
48    string = string.replace('_', '')
49    string = string.replace('/', '')
50    string = string.replace('-', '')
51    string = string.replace(' ', '')
52    string = string.lower()
53    return string
54
55LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
56LGAS_dict = dict(LGAS)
57
58##
59## CONFIGURATION SECTION
60##
61# keys are fieldnames in input file, values are methods of class
62# Converter (see below)
63OPTIONS = {
64    'student_id': 'student_id',
65    'sex': 'gender',
66    'birthday': 'date',
67    'marit_stat': 'marit_stat',
68    'session': 'session',
69    'entry_session': 'session',
70    'current_session': 'session',
71    'session_id': 'session',
72    'entry_mode': 'mode',
73    'reg_state': 'reg_state',
74    'password': 'password',
75    'phone': 'phone',
76    'nationality': 'nationality',
77    'level': 'level',
78    'start_level': 'level',
79    'end_level': 'level',
80    'level_id': 'level',
81    'current_level': 'level',
82    'semester': 'semester',
83    'application_category': 'application_category',
84    'lga': 'lga',
85    'order_id': 'no_int',
86    'uniben': 'former',
87    'nysc_year': 'year',
88    'alr_date': 'date',
89    'fst_sit_date': 'date',
90    'scd_sit_date': 'date',
91    'emp_start': 'date',
92    'emp_end': 'date',
93    'emp_start2': 'date',
94    'emp_end2': 'date',
95    'fst_sit_results': 'result',
96    'scd_sit_results': 'result',
97    'alr_results': 'result',
98    'email': 'email',
99    'fst_sit_type': 'sittype',
100    'scd_sit_type': 'sittype',
101    'resp_pay_reference': 'no_int',
102    'type': 'company',
103    'date': 'date',
104    'category': 'p_category',
105    'reg_transition': 'reg_state',  # we completely change this column,
106                                    # since reg_state import is usually intended
107    'transition': 'reg_transition',
108    'payment_date': 'date',
109    }
110
111# Mapping input file colnames --> output file colnames
112COLNAME_MAPPING = {
113    # base data
114    'id': 'student_id',
115    'reg_state': 'state',
116    'reg_transition': 'state',
117    'jamb_reg_no': 'reg_number',
118    'matric_no': 'matric_number',
119    'birthday': 'date_of_birth',
120    'clr_ac_pin': 'clr_code',
121    # clearance
122    'hq_grade': 'hq_degree',
123    'uniben': 'former_matric',
124    'hq_type2': 'hq2_type',
125    'hq_grade2': 'hq2_degree',
126    'hq_school2': 'hq2_school',
127    'hq_matric_no2': 'hq2_matric_no',
128    'hq_session2': 'hq2_session',
129    'hq_disc2': 'hq2_disc',
130    'emp': 'employer',
131    'emp2': 'employer2',
132    'emp_position2': 'emp2_position',
133    'emp_start2': 'emp2_start',
134    'emp_end2': 'emp2_end',
135    'emp_reason2': 'emp2_reason',
136    # study course
137    'study_course': 'certificate',
138    # study level
139    'session': 'level_session',
140    'verdict': 'level_verdict',
141    # course ticket
142    'level_id': 'level',
143    'core_or_elective': 'mandatory',
144    # payment ticket
145    'order_id': 'p_id',
146    'status': 'p_state',
147    'category': 'p_category',
148    'resp_pay_reference': 'r_pay_reference',
149    'resp_desc': 'r_desc',
150    'resp_approved_amount': 'r_amount_approved',
151    'item': 'p_item',
152    'amount': 'amount_auth',
153    'resp_card_num': 'r_card_num',
154    'resp_code': 'r_code',
155    'date': 'creation_date',
156    'surcharge': 'surcharge_1',
157    'session_id': 'p_session',
158    'type': 'r_company',
159    }
160
161# Mapping input state --> output state
162REGSTATE_MAPPING = {
163    'student_created': 'created',
164    'admitted': 'admitted',
165    'objection_raised': 'clearance started',
166    'clearance_pin_entered': 'clearance started',
167    'clearance_requested': 'clearance requested',
168    'cleared_and_validated': 'cleared',
169    'school_fee_paid': 'school fee paid',
170    'returning': 'returning',
171    'courses_registered': 'courses registered',
172    'courses_validated': 'courses validated',
173    'admit': 'admitted',
174    'return': 'returning'
175    }
176
177##
178## END OF CONFIG
179##
180
181# Look for the first sequence of numbers
182RE_PHONE = re.compile('[^\d]*(\d*)[^\d]*')
183
184def convert_fieldnames(fieldnames):
185    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
186    """
187    # Remove whitespaces
188    header = dict([(name, name.strip()) for name in fieldnames])
189    for in_name, out_name in COLNAME_MAPPING.items():
190        if in_name not in header.values():
191            continue
192        # Inverse dictionary lookup
193        key = [key for key,value in header.items() if value==in_name][0]
194        header[key] = out_name
195    return header
196
197class Converters():
198    """Converters to turn old-style values into new ones.
199    """
200
201    @classmethod
202    def student_id(self, value, row):
203        """ 'A123456' --> 'EA123456'
204        """
205        if len(value) == 7:
206            return 'B' + value
207        return value
208
209    @classmethod
210    def reg_state(self, value, row):
211        """ 'courses_validated' --> 'courses validated'
212        """
213        return REGSTATE_MAPPING.get(value,value)
214
215    @classmethod
216    def reg_transition(self, value, row):
217        if value == "admitted":
218            return "admit"
219        if value == "returning":
220            return "return"
221        return value
222
223    @classmethod
224    def level(self, value, row):
225        """ '000' --> '10'
226        '800' --> '999' if pg student
227        """
228        try:
229            number = int(value)
230        except ValueError:
231            return 9999
232        if number == 0:
233            return 10
234        if row.get('entry_mode') and row.get('entry_mode').startswith('pg'):
235            return 999
236        return number
237
238    @classmethod
239    def semester(self, value, row):
240        """ '0' --> '9'
241        """
242        try:
243            number = int(value)
244        except ValueError:
245            return 9999
246        if number == 0:
247            return 9
248        return number
249
250    @classmethod
251    def application_category(self, value, row):
252        """ '' --> 'no'
253        """
254        if value == '':
255            return 'no'
256        return value
257
258    @classmethod
259    def lga(self, value, row):
260        """ Remove apostrophe
261        """
262        if value == 'akwa_ibom_uru_offong_oruko':
263            return 'akwa_ibom_urue-offong-oruko'
264        if value == 'edo_ohionmwon':
265            return 'edo_orhionmwon'
266
267        if value == 'nassarawa_nassarawa':
268            return 'nassarawa_nassawara'
269
270        if value == 'kogi_mopa-muro-mopi':
271            return 'kogi_mopa-muro'
272
273        if value == 'delta_osimili-north':
274            return 'delta_oshielli-north'
275
276        if value == 'delta_osimili':
277            return 'delta_oshimili'
278
279        if value == 'delta_osimili-south':
280            return 'delta_oshimili-south'
281        try:
282            value = value.replace("'","")
283        except:
284            return ''
285        lower = value.lower()
286        if lower in LGAS_dict.keys():
287            return lower
288        # If real names are given, let's see if a similar value
289        # in LGAS exist.
290        value = LGAS_inverted_stripped.get(strip(lower), value)
291        return value
292
293
294    @classmethod
295    def session(self, value, row):
296        """ '08' --> '2008'
297        '2008/2009' --> '2008'
298        """
299        if '/' in value:
300            numbers = value.split('/')
301            number = int(numbers[0])
302            if number in range(2000,2015):
303                return number
304            else:
305                return 9999
306        try:
307            number = int(value)
308        except ValueError:
309            #import pdb; pdb.set_trace()
310            return 9999
311        if number < 14:
312            return number + 2000
313        elif number in range(2000,2015):
314            return number
315        else:
316            return 9999
317
318    @classmethod
319    def former(self, value, row):
320        """ True --> yes
321        '2008/2009' --> '2008'
322        """
323        if value == 'True':
324            return 'yes'
325        return
326
327    @classmethod
328    def year(self, value, row):
329        """ '0' --> ''
330        """
331        if value == '0':
332            return
333        if value == 'None':
334            return
335        return value
336
337
338    @classmethod
339    def marit_stat(self, value, row):
340        """ 'True'/'False' --> 'married'/'unmarried'
341        """
342        if value in ('True','married'):
343            value = 'married'
344        elif value in ('False','unmarried'):
345            value = 'unmarried'
346        else:
347            value = ''
348        return value
349
350    @classmethod
351    def gender(self, value, row):
352        """ 'True'/'False' --> 'f'/'m'
353        """
354        if value.strip() in ('F', 'True','f'):
355            value = 'f'
356        elif value.strip() in ('M', 'False','m'):
357            value = 'm'
358        else:
359            value = ''
360        return value
361
362    @classmethod
363    def date(self, value, row):
364        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
365        """
366        if value == "None":
367            value = ""
368        elif value == "":
369            value = ""
370        else:
371            value = value.replace('/', '-')
372            # We add the hash symbol to avoid automatic date transformation
373            # in Excel and Calc for further processing
374            value += '#'
375        return value
376
377    @classmethod
378    def no_int(self, value, row):
379        """ Add hash and skip numbers starting with 999999
380        """
381        # We add the hash symbol to avoid automatic number transformation
382        # in Excel and Calc for further processing
383        try:
384            intvalue = int(value)
385            value += '#'
386        except:
387            pass
388        if value.startswith('999999'):
389            return
390        return value
391
392    @classmethod
393    def mode(self, value, row):
394        if value == "transfer_fulltime":
395            return "transfer_ft"
396        if value == "ume_ft":
397            return "utme_ft"
398        return value
399
400    @classmethod
401    def password(self, value, row):
402        if value == "not set":
403            return ""
404        return value
405
406    @classmethod
407    def nationality(self, value, row):
408        if value == "nigeria":
409            return "NG"
410        if value == "niger":
411            return "NE"
412        return value
413
414    @classmethod
415    def sittype(self, value, row):
416        if value == "nabtec":
417            return "nabteb"
418        return value
419
420    @classmethod
421    def company(self, value, row):
422        if value == "online":
423            return "interswitch"
424        return value
425
426    @classmethod
427    def p_category(self, value, row):
428        if value == "acceptance":
429            return "clearance"
430        return value
431
432    @classmethod
433    def email(self, value, row):
434        return value.strip()
435
436    @classmethod
437    def phone(self, value, row):
438        """ '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
439
440        Dashes and slashes are removed before looking for sequences
441        of numbers.
442        """
443        if not value:
444            return
445        value = value.strip('#')
446        value = value.replace('-', '')
447        value = value.replace('/', '')
448        match = RE_PHONE.match(value)
449        phone = match.groups()[0]
450        if value.startswith('234'):
451            value = '+' + value[:3] + '-' + value[3:]
452        else:
453            value = '-%s' % phone
454        return value + '#'
455
456    @classmethod
457    def result(self, value, row):
458        try:
459            liste = eval(value)
460        except:
461            return
462        if isinstance(liste,list):
463            return [(i[0].lower(),i[1]) for i in liste]
464        return
465
466
467def main():
468    input_file = '%s' % sys.argv[1]
469    output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
470    reader = csv.DictReader(open(input_file, 'rb'))
471    writer = None
472
473    for num, row in enumerate(reader):
474        if num == 0:
475            writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
476            print "FIELDS: "
477            for x, y in enumerate(reader.fieldnames):
478                print x, y
479            header = convert_fieldnames(reader.fieldnames)
480            writer.writerow(header)
481        if row.get('reg_state') == 'student_created':
482            # We do not reimport student records which have never been accessed.
483            continue
484        if row.get('status') == 'started':
485            # We do not reimport started payments.
486            continue
487        for key, value in row.items():
488            # Remove unwanted whitespaces.
489            row[key] = row[key].strip()
490            if not key in OPTIONS.keys():
491                continue
492            conv_name = OPTIONS[key]
493            converter = getattr(Converters, conv_name, None)
494            if converter is None:
495                print "WARNING: cannot find converter %s" % conv_name
496                continue
497            row[key] = converter(row[key], row)
498        try:
499            writer.writerow(row)
500        except:
501            print row['student_id']
502
503    print "Output written to %s" % output_file
504
505
506if __name__ == '__main__':
507    if len(sys.argv) != 2:
508        print 'Usage: %s <filename>' % __file__
509        sys.exit(1)
510    main()
Note: See TracBrowser for help on using the repository browser.