source: main/waeup.kofa/trunk/tools/fix_import_file.py @ 8942

Last change on this file since 8942 was 8941, checked in by Henrik Bettermann, 13 years ago

More conversions.

Skip 'started' payments.

  • Property svn:keywords set to Id
File size: 13.1 KB
Line 
1## $Id: fix_import_file.py 8941 2012-07-08 08:58:45Z henrik $
2##
3## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""
19Fix exports from old SRP portal and other data sources to make
20them importable by current portal.
21
22Usage:
23
24Change into this directory, set the options below (files are assumed
25to be in the same directory) and then run
26
27  python fix_import_file.py <filename>
28
29Errors/warnings will be displayed on the shell, the output will be put
30into the specified output file.
31
32
33The lgas.py module must be copied into the same folder where this script
34is started.
35"""
36import csv
37import datetime
38import re
39import sys
40
41try:
42    from lgas import LGAS
43except:
44    print 'ERROR: lgas.py is missing.'
45    sys.exit(1)
46
47def strip(string):
48    string = string.replace('_', '')
49    string = string.replace('/', '')
50    string = string.replace('-', '')
51    string = string.replace(' ', '')
52    string = string.lower()
53    return string
54
55LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
56LGAS_dict = dict(LGAS)
57
58##
59## CONFIGURATION SECTION
60##
61# keys are fieldnames in input file, values are methods of class
62# Converter (see below)
63OPTIONS = {
64    'student_id': 'student_id',
65    'sex': 'gender',
66    'birthday': 'date',
67    'request_date': 'datetime',
68    'marit_stat': 'marit_stat',
69    'session': 'session',
70    'entry_session': 'session',
71    'current_session': 'session',
72    'session_id': 'session',
73    'entry_mode': 'mode',
74    'reg_state': 'reg_state',
75    'password': 'password',
76    'phone': 'phone',
77    'nationality': 'nationality',
78    'level': 'level',
79    'start_level': 'level',
80    'end_level': 'level',
81    'level_id': 'level',
82    'current_level': 'level',
83    'semester': 'semester',
84    'application_category': 'application_category',
85    'lga': 'lga',
86    'order_id': 'no_int',
87    'uniben': 'former',
88    'nysc_year': 'year',
89    'alr_date': 'date',
90    'fst_sit_date': 'date',
91    'scd_sit_date': 'date',
92    'emp_start': 'date',
93    'emp_end': 'date',
94    'emp_start2': 'date',
95    'emp_end2': 'date',
96    'fst_sit_results': 'result',
97    'scd_sit_results': 'result',
98    'alr_results': 'result',
99    'email': 'email',
100    'fst_sit_type': 'sittype',
101    'scd_sit_type': 'sittype',
102    'resp_pay_reference': 'no_int',
103    'type': 'company',
104    'date': 'date',
105    'p_category': 'p_category',
106    }
107
108# Mapping input file colnames --> output file colnames
109COLNAME_MAPPING = {
110    # base data
111    'reg_state': 'state',
112    'jamb_reg_no': 'reg_number',
113    'matric_no': 'matric_number',
114    'birthday': 'date_of_birth',
115    'clr_ac_pin': 'clr_code',
116    # clearance
117    'hq_grade': 'hq_degree',
118    'uniben': 'former_matric',
119    'hq_type2': 'hq2_type',
120    'hq_grade2': 'hq2_degree',
121    'hq_school2': 'hq2_school',
122    'hq_matric_no2': 'hq2_matric_no',
123    'hq_session2': 'hq2_session',
124    'hq_disc2': 'hq2_disc',
125    'emp': 'employer',
126    'emp2': 'employer2',
127    'emp_position2': 'emp2_position',
128    'emp_start2': 'emp2_start',
129    'emp_end2': 'emp2_end',
130    'emp_reason2': 'emp2_reason',
131    # study course
132    'study_course': 'certificate',
133    # study level
134    'session': 'level_session',
135    'verdict': 'level_verdict',
136    # course ticket
137    'level_id': 'level',
138    'core_or_elective': 'mandatory',
139    # payment ticket
140    'order_id': 'p_id',
141    'status': 'p_state',
142    'category': 'p_category',
143    'resp_pay_reference': 'r_pay_reference',
144    'resp_desc': 'r_desc',
145    'resp_approved_amount': 'r_amount_approved',
146    'item': 'p_item',
147    'amount': 'amount_auth',
148    'resp_card_num': 'r_card_num',
149    'resp_code': 'r_code',
150    'date': 'creation_date',
151    'surcharge': 'surcharge_1',
152    'session_id': 'p_session',
153    'type': 'r_company',
154    }
155
156# Mapping input state --> output state
157REGSTATE_MAPPING = {
158    'student_created': 'created',
159    'admitted': 'admitted',
160    'objection_raised': 'clearance started',
161    'clearance_pin_entered': 'clearance started',
162    'clearance_requested': 'clearance requested',
163    'cleared_and_validated': 'cleared',
164    'school_fee_paid': 'school fee paid',
165    'returning': 'returning',
166    'courses_registered': 'courses registered',
167    'courses_validated': 'courses validated',
168    }
169
170##
171## END OF CONFIG
172##
173
174# Look for the first sequence of numbers
175RE_PHONE = re.compile('[^\d]*(\d*)[^\d]*')
176
177def convert_fieldnames(fieldnames):
178    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
179    """
180    header = dict([(name, name) for name in fieldnames])
181    for in_name, out_name in COLNAME_MAPPING.items():
182        if in_name not in header:
183            continue
184        header[in_name] = out_name
185    return header
186
187class Converters():
188    """Converters to turn old-style values into new ones.
189    """
190
191    @classmethod
192    def student_id(self, value):
193        """ 'A123456' --> 'EA123456'
194        """
195        if len(value) == 7:
196            return 'E' + value
197        return value
198
199    @classmethod
200    def reg_state(self, value):
201        """ 'courses_validated' --> 'courses validated'
202        """
203        return REGSTATE_MAPPING.get(value,value)
204
205    @classmethod
206    def level(self, value):
207        """ '000' --> '10'
208        """
209        try:
210            number = int(value)
211        except ValueError:
212            return 9999
213        if number == 0:
214            return 10
215        return number
216
217    @classmethod
218    def semester(self, value):
219        """ '0' --> '9'
220        """
221        try:
222            number = int(value)
223        except ValueError:
224            return 9999
225        if number == 0:
226            return 9
227        return number
228
229    @classmethod
230    def application_category(self, value):
231        """ '' --> 'no'
232        """
233        if value == '':
234            return 'no'
235        return value
236
237    @classmethod
238    def lga(self, value):
239        """ Remove apostrophe
240        """
241        if value == 'akwa_ibom_uru_offong_oruko':
242            return 'akwa_ibom_urue-offong-oruko'
243        if value == 'edo_ohionmwon':
244            return 'edo_orhionmwon'
245
246        if value == 'nassarawa_nassarawa':
247            return 'nassarawa_nassawara'
248
249        if value == 'kogi_mopa-muro-mopi':
250            return 'kogi_mopa-muro'
251
252        if value == 'delta_osimili-north':
253            return 'delta_oshielli-north'
254
255        if value == 'delta_osimili':
256            return 'delta_oshimili'
257
258        if value == 'delta_osimili-south':
259            return 'delta_oshimili-south'
260        try:
261            value = value.replace("'","")
262        except:
263            return ''
264        lower = value.lower()
265        if lower in LGAS_dict.keys():
266            return lower
267        # If real names are given, let's see if a similar value
268        # in LGAS exist.
269        value = LGAS_inverted_stripped.get(strip(lower), value)
270        return value
271
272
273    @classmethod
274    def session(self, value):
275        """ '08' --> '2008'
276        '2008/2009' --> '2008'
277        """
278        if '/' in value:
279            numbers = value.split('/')
280            number = int(numbers[0])
281            if number in range(2000,2015):
282                return number
283            else:
284                return 9999
285        try:
286            number = int(value)
287        except ValueError:
288            #import pdb; pdb.set_trace()
289            return 9999
290        if number < 14:
291            return number + 2000
292        elif number in range(2000,2015):
293            return number
294        else:
295            return 9999
296
297    @classmethod
298    def former(self, value):
299        """ True --> yes
300        '2008/2009' --> '2008'
301        """
302        if value == 'True':
303            return 'yes'
304        return
305
306    @classmethod
307    def year(self, value):
308        """ '0' --> ''
309        """
310        if value == '0':
311            return
312        if value == 'None':
313            return
314        return value
315
316
317    @classmethod
318    def marit_stat(self, value):
319        """ 'True'/'False' --> 'married'/'unmarried'
320        """
321        if value in ('True','married'):
322            value = 'married'
323        elif value in ('False','unmarried'):
324            value = 'unmarried'
325        else:
326            value = ''
327        return value
328
329    @classmethod
330    def gender(self, value):
331        """ 'True'/'False' --> 'f'/'m'
332        """
333        if value.strip() in ('F', 'True','f'):
334            value = 'f'
335        elif value.strip() in ('M', 'False','m'):
336            value = 'm'
337        else:
338            value = ''
339        return value
340
341    @classmethod
342    def date(self, value):
343        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
344        """
345        if value == "None":
346            value = ""
347        elif value == "":
348            value = ""
349        else:
350            value = value.replace('/', '-')
351            # We add the hash symbol to avoid automatic date transformation
352            # in Excel and Calc for further processing
353            value += '#'
354        return value
355
356    @classmethod
357    def no_int(self, value):
358        """ Add hash and skip numbers starting with 999999
359        """
360        # We add the hash symbol to avoid automatic number transformation
361        # in Excel and Calc for further processing
362        try:
363            intvalue = int(value)
364            value += '#'
365        except:
366            pass
367        if value.startswith('999999'):
368            return
369        return value
370
371
372    @classmethod
373    def datetime(self, value):
374        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
375        """
376        #print  "IN: ", value
377        if value == "None":
378            value = ""
379        elif value == "":
380            value = ""
381        else:
382            #value = datetime.datetime.strptime(value, '%Y/%m/%d')
383            #value = datetime.datetime.strftime(value, '%Y-%m-%d')
384            pass
385        #print "OUT: ", value
386        return value
387
388    @classmethod
389    def mode(self, value):
390        if value == "transfer_fulltime":
391            return "transfer_ft"
392        if value == "ume_ft":
393            return "utme_ft"
394        return value
395
396    @classmethod
397    def password(self, value):
398        if value == "not set":
399            return ""
400        return value
401
402    @classmethod
403    def nationality(self, value):
404        if value == "nigeria":
405            return "NG"
406        if value == "niger":
407            return "NE"
408        return value
409
410    @classmethod
411    def sittype(self, value):
412        if value == "nabtec":
413            return "nabteb"
414        return value
415
416    @classmethod
417    def company(self, value):
418        if value == "online":
419            return "interswitch"
420        return value
421
422    @classmethod
423    def p_category(self, value):
424        if value == "acceptance":
425            return "clearance"
426        return value
427
428    @classmethod
429    def email(self, value):
430        return value.strip()
431
432    @classmethod
433    def phone(self, value):
434        """ '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
435
436        Dashes and slashes are removed before looking for sequences
437        of numbers.
438        """
439        if not value:
440            return
441        value = value.replace('-', '')
442        value = value.replace('/', '')
443        match = RE_PHONE.match(value)
444        phone = match.groups()[0]
445        value = '-%s' % phone
446        return value + '#'
447
448    @classmethod
449    def result(self, value):
450        try:
451            liste = eval(value)
452        except:
453            return
454        if isinstance(liste,list):
455            return [(i[0].lower(),i[1]) for i in liste]
456        return
457
458def main():
459    input_file = '%s' % sys.argv[1]
460    output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
461    reader = csv.DictReader(open(input_file, 'rb'))
462    writer = None
463
464    for num, row in enumerate(reader):
465        if num == 0:
466            writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
467            print "FIELDS: "
468            for x, y in enumerate(reader.fieldnames):
469                print x, y
470            header = convert_fieldnames(reader.fieldnames)
471            writer.writerow(header)
472        if row.get('reg_state') == 'student_created':
473            continue
474        if row.get('status') == 'started':
475            # We do not use started payments
476            continue
477        for key, value in row.items():
478            if not key in OPTIONS.keys():
479                continue
480            conv_name = OPTIONS[key]
481            converter = getattr(Converters, conv_name, None)
482            if converter is None:
483                print "WARNING: cannot find converter %s" % conv_name
484                continue
485            row[key] = converter(row[key])
486        try:
487            writer.writerow(row)
488        except:
489            print row['student_id']
490
491    print "Output written to %s" % output_file
492
493
494if __name__ == '__main__':
495    if len(sys.argv) != 2:
496        print 'Usage: %s <filename>' % __file__
497        sys.exit(1)
498    main()
Note: See TracBrowser for help on using the repository browser.