source: main/waeup.kofa/trunk/tools/fix_import_file.py @ 8939

Last change on this file since 8939 was 8939, checked in by Henrik Bettermann, 12 years ago

More conversions.

Skip student_created.

  • Property svn:keywords set to Id
File size: 12.4 KB
Line 
1## $Id: fix_import_file.py 8939 2012-07-08 05:17:11Z henrik $
2##
3## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""
19Fix exports from old SRP portal and other data sources to make
20them importable by current portal.
21
22Usage:
23
24Change into this directory, set the options below (files are assumed
25to be in the same directory) and then run
26
27  python fix_import_file.py <filename>
28
29Errors/warnings will be displayed on the shell, the output will be put
30into the specified output file.
31
32
33The lgas.py module must be copied into the same folder where this script
34is started.
35"""
36import csv
37import datetime
38import re
39import sys
40
41try:
42    from lgas import LGAS
43except:
44    print 'ERROR: lgas.py is missing.'
45    sys.exit(1)
46
47def strip(string):
48    string = string.replace('_', '')
49    string = string.replace('/', '')
50    string = string.replace('-', '')
51    string = string.replace(' ', '')
52    string = string.lower()
53    return string
54
55LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
56LGAS_dict = dict(LGAS)
57
58##
59## CONFIGURATION SECTION
60##
61# keys are fieldnames in input file, values are methods of class
62# Converter (see below)
63OPTIONS = {
64    'student_id': 'student_id',
65    'sex': 'gender',
66    'birthday': 'date',
67    'request_date': 'datetime',
68    'marit_stat': 'marit_stat',
69    'session': 'session',
70    'entry_session': 'session',
71    'current_session': 'session',
72    'session_id': 'session',
73    'entry_mode': 'mode',
74    'reg_state': 'reg_state',
75    'password': 'password',
76    'phone': 'phone',
77    'nationality': 'nationality',
78    'level': 'level',
79    'start_level': 'level',
80    'end_level': 'level',
81    'level_id': 'level',
82    'current_level': 'level',
83    'semester': 'semester',
84    'application_category': 'application_category',
85    'lga': 'lga',
86    'order_id': 'no_int',
87    'uniben': 'former',
88    'nysc_year': 'year',
89    'alr_date': 'date',
90    'fst_sit_date': 'date',
91    'scd_sit_date': 'date',
92    'emp_start': 'date',
93    'emp_end': 'date',
94    'emp_start2': 'date',
95    'emp_end2': 'date',
96    'fst_sit_results': 'result',
97    'scd_sit_results': 'result',
98    'alr_results': 'result',
99    'email': 'email',
100    'fst_sit_type': 'sittype',
101    'scd_sit_type': 'sittype',
102    }
103
104# Mapping input file colnames --> output file colnames
105COLNAME_MAPPING = {
106    # base data
107    'reg_state': 'state',
108    'jamb_reg_no': 'reg_number',
109    'matric_no': 'matric_number',
110    'birthday': 'date_of_birth',
111    'clr_ac_pin': 'clr_code',
112    # study course
113    'study_course': 'certificate',
114    # study level
115    'session': 'level_session',
116    'verdict': 'level_verdict',
117    # course ticket
118    'level_id': 'level',
119    'core_or_elective': 'mandatory',
120    # payment ticket
121    'order_id': 'p_id',
122    'status': 'p_state',
123    'category': 'p_category',
124    'resp_pay_reference': 'r_pay_reference',
125    'resp_desc': 'r_desc',
126    'resp_approved_amount': 'r_amount_approved',
127    'item': 'p_item',
128    'amount': 'amount_auth',
129    'resp_card_num': 'r_card_num',
130    'resp_code': 'r_code',
131    'date': 'creation_date',
132    'surcharge': 'surcharge_1',
133    'session_id': 'p_session',
134    'hq_grade': 'hq_degree',
135    'uniben': 'former_matric',
136    'hq_type2': 'hq2_type',
137    'hq_grade2': 'hq2_degree',
138    'hq_school2': 'hq2_school',
139    'hq_matric_no2': 'hq2_matric_no',
140    'hq_session2': 'hq2_session',
141    'hq_disc2': 'hq2_disc',
142    'emp': 'employer',
143    'emp2': 'employer2',
144    'emp_position2': 'emp2_position',
145    'emp_start2': 'emp2_start',
146    'emp_end2': 'emp2_end',
147    'emp_reason2': 'emp2_reason',
148    }
149
150# Mapping input state --> output state
151REGSTATE_MAPPING = {
152    'student_created': 'created',
153    'admitted': 'admitted',
154    'objection_raised': 'clearance started',
155    'clearance_pin_entered': 'clearance started',
156    'clearance_requested': 'clearance requested',
157    'cleared_and_validated': 'cleared',
158    'school_fee_paid': 'school fee paid',
159    'returning': 'returning',
160    'courses_registered': 'courses registered',
161    'courses_validated': 'courses validated',
162    }
163
164##
165## END OF CONFIG
166##
167
168# Look for the first sequence of numbers
169RE_PHONE = re.compile('[^\d]*(\d*)[^\d]*')
170
171def convert_fieldnames(fieldnames):
172    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
173    """
174    header = dict([(name, name) for name in fieldnames])
175    for in_name, out_name in COLNAME_MAPPING.items():
176        if in_name not in header:
177            continue
178        header[in_name] = out_name
179    return header
180
181class Converters():
182    """Converters to turn old-style values into new ones.
183    """
184
185    @classmethod
186    def student_id(self, value):
187        """ 'A123456' --> 'EA123456'
188        """
189        if len(value) == 7:
190            return 'E' + value
191        return value
192
193    @classmethod
194    def reg_state(self, value):
195        """ 'courses_validated' --> 'courses validated'
196        """
197        return REGSTATE_MAPPING.get(value,value)
198
199    @classmethod
200    def level(self, value):
201        """ '000' --> '10'
202        """
203        try:
204            number = int(value)
205        except ValueError:
206            return 9999
207        if number == 0:
208            return 10
209        return number
210
211    @classmethod
212    def semester(self, value):
213        """ '0' --> '9'
214        """
215        try:
216            number = int(value)
217        except ValueError:
218            return 9999
219        if number == 0:
220            return 9
221        return number
222
223    @classmethod
224    def application_category(self, value):
225        """ '' --> 'no'
226        """
227        if value == '':
228            return 'no'
229        return value
230
231    @classmethod
232    def lga(self, value):
233        """ Remove apostrophe
234        """
235        if value == 'akwa_ibom_uru_offong_oruko':
236            return 'akwa_ibom_urue-offong-oruko'
237        if value == 'edo_ohionmwon':
238            return 'edo_orhionmwon'
239
240        if value == 'nassarawa_nassarawa':
241            return 'nassarawa_nassawara'
242
243        if value == 'kogi_mopa-muro-mopi':
244            return 'kogi_mopa-muro'
245
246        if value == 'delta_osimili-north':
247            return 'delta_oshielli-north'
248
249        if value == 'delta_osimili':
250            return 'delta_oshimili'
251
252        if value == 'delta_osimili-south':
253            return 'delta_oshimili-south'
254        try:
255            value = value.replace("'","")
256        except:
257            return ''
258        lower = value.lower()
259        if lower in LGAS_dict.keys():
260            return lower
261        # If real names are given, let's see if a similar value
262        # in LGAS exist.
263        value = LGAS_inverted_stripped.get(strip(lower), value)
264        return value
265
266
267    @classmethod
268    def session(self, value):
269        """ '08' --> '2008'
270        '2008/2009' --> '2008'
271        """
272        if '/' in value:
273            numbers = value.split('/')
274            number = int(numbers[0])
275            if number in range(2000,2015):
276                return number
277            else:
278                return 9999
279        try:
280            number = int(value)
281        except ValueError:
282            #import pdb; pdb.set_trace()
283            return 9999
284        if number < 14:
285            return number + 2000
286        elif number in range(2000,2015):
287            return number
288        else:
289            return 9999
290
291    @classmethod
292    def former(self, value):
293        """ True --> yes
294        '2008/2009' --> '2008'
295        """
296        if value == 'True':
297            return 'yes'
298        return
299
300    @classmethod
301    def year(self, value):
302        """ '0' --> ''
303        """
304        if value == '0':
305            return
306        if value == 'None':
307            return
308        return value
309
310
311    @classmethod
312    def marit_stat(self, value):
313        """ 'True'/'False' --> 'married'/'unmarried'
314        """
315        if value in ('True','married'):
316            value = 'married'
317        elif value in ('False','unmarried'):
318            value = 'unmarried'
319        else:
320            value = ''
321        return value
322
323    @classmethod
324    def gender(self, value):
325        """ 'True'/'False' --> 'f'/'m'
326        """
327        if value.strip() in ('F', 'True','f'):
328            value = 'f'
329        elif value.strip() in ('M', 'False','m'):
330            value = 'm'
331        else:
332            value = ''
333        return value
334
335    @classmethod
336    def date(self, value):
337        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
338        """
339        if value == "None":
340            value = ""
341        elif value == "":
342            value = ""
343        else:
344            value = value.replace('/', '-')
345            # We add the hash symbol to avoid automatic date transformation
346            # in Excel and Calc for further processing
347            value += '#'
348        return value
349
350    @classmethod
351    def no_int(self, value):
352        """ Add hash.
353        """
354        # We add the hash symbol to avoid automatic number transformation
355        # in Excel and Calc for further processing
356        value += '#'
357        return value
358
359
360    @classmethod
361    def datetime(self, value):
362        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
363        """
364        #print  "IN: ", value
365        if value == "None":
366            value = ""
367        elif value == "":
368            value = ""
369        else:
370            #value = datetime.datetime.strptime(value, '%Y/%m/%d')
371            #value = datetime.datetime.strftime(value, '%Y-%m-%d')
372            pass
373        #print "OUT: ", value
374        return value
375
376    @classmethod
377    def mode(self, value):
378        if value == "transfer_fulltime":
379            return "transfer_ft"
380        if value == "ume_ft":
381            return "utme_ft"
382        return value
383
384    @classmethod
385    def password(self, value):
386        if value == "not set":
387            return ""
388        return value
389
390    @classmethod
391    def nationality(self, value):
392        if value == "nigeria":
393            return "NG"
394        if value == "niger":
395            return "NE"
396        return value
397
398    @classmethod
399    def sittype(self, value):
400        if value == "nabtec":
401            return "nabteb"
402        return value
403
404    @classmethod
405    def email(self, value):
406        return value.strip()
407
408    @classmethod
409    def phone(self, value):
410        """ '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
411
412        Dashes and slashes are removed before looking for sequences
413        of numbers.
414        """
415        if not value:
416            return
417        value = value.replace('-', '')
418        value = value.replace('/', '')
419        match = RE_PHONE.match(value)
420        phone = match.groups()[0]
421        value = '-%s' % phone
422        return value + '#'
423
424    @classmethod
425    def result(self, value):
426        try:
427            liste = eval(value)
428        except:
429            return
430        if isinstance(liste,list):
431            return [(i[0].lower(),i[1]) for i in liste]
432        return
433
434def main():
435    input_file = '%s' % sys.argv[1]
436    output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
437    reader = csv.DictReader(open(input_file, 'rb'))
438    writer = None
439
440    for num, row in enumerate(reader):
441        if num == 0:
442            writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
443            print "FIELDS: "
444            for x, y in enumerate(reader.fieldnames):
445                print x, y
446            header = convert_fieldnames(reader.fieldnames)
447            writer.writerow(header)
448        if row.get('reg_state') == 'student_created':
449            continue
450        for key, value in row.items():
451            if not key in OPTIONS.keys():
452                continue
453            conv_name = OPTIONS[key]
454            converter = getattr(Converters, conv_name, None)
455            if converter is None:
456                print "WARNING: cannot find converter %s" % conv_name
457                continue
458            row[key] = converter(row[key])
459        try:
460            writer.writerow(row)
461        except:
462            print row['student_id']
463
464    print "Output written to %s" % output_file
465
466
467if __name__ == '__main__':
468    if len(sys.argv) != 2:
469        print 'Usage: %s <filename>' % __file__
470        sys.exit(1)
471    main()
Note: See TracBrowser for help on using the repository browser.