source: main/waeup.kofa/trunk/tools/fix_import_file.py @ 8959

Last change on this file since 8959 was 8945, checked in by Henrik Bettermann, 12 years ago

We do not need a special converter for datetime values. This can be done by the date converter.

  • Property svn:keywords set to Id
File size: 12.6 KB
Line 
1## $Id: fix_import_file.py 8945 2012-07-08 15:10:12Z henrik $
2##
3## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""
19Fix exports from old SRP portal and other data sources to make
20them importable by current portal.
21
22Usage:
23
24Change into this directory, set the options below (files are assumed
25to be in the same directory) and then run
26
27  python fix_import_file.py <filename>
28
29Errors/warnings will be displayed on the shell, the output will be put
30into the specified output file.
31
32
33The lgas.py module must be copied into the same folder where this script
34is started.
35"""
36import csv
37import datetime
38import re
39import sys
40
41try:
42    from lgas import LGAS
43except:
44    print 'ERROR: lgas.py is missing.'
45    sys.exit(1)
46
47def strip(string):
48    string = string.replace('_', '')
49    string = string.replace('/', '')
50    string = string.replace('-', '')
51    string = string.replace(' ', '')
52    string = string.lower()
53    return string
54
55LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
56LGAS_dict = dict(LGAS)
57
58##
59## CONFIGURATION SECTION
60##
61# keys are fieldnames in input file, values are methods of class
62# Converter (see below)
63OPTIONS = {
64    'student_id': 'student_id',
65    'sex': 'gender',
66    'birthday': 'date',
67    'marit_stat': 'marit_stat',
68    'session': 'session',
69    'entry_session': 'session',
70    'current_session': 'session',
71    'session_id': 'session',
72    'entry_mode': 'mode',
73    'reg_state': 'reg_state',
74    'password': 'password',
75    'phone': 'phone',
76    'nationality': 'nationality',
77    'level': 'level',
78    'start_level': 'level',
79    'end_level': 'level',
80    'level_id': 'level',
81    'current_level': 'level',
82    'semester': 'semester',
83    'application_category': 'application_category',
84    'lga': 'lga',
85    'order_id': 'no_int',
86    'uniben': 'former',
87    'nysc_year': 'year',
88    'alr_date': 'date',
89    'fst_sit_date': 'date',
90    'scd_sit_date': 'date',
91    'emp_start': 'date',
92    'emp_end': 'date',
93    'emp_start2': 'date',
94    'emp_end2': 'date',
95    'fst_sit_results': 'result',
96    'scd_sit_results': 'result',
97    'alr_results': 'result',
98    'email': 'email',
99    'fst_sit_type': 'sittype',
100    'scd_sit_type': 'sittype',
101    'resp_pay_reference': 'no_int',
102    'type': 'company',
103    'date': 'date',
104    'category': 'p_category',
105    }
106
107# Mapping input file colnames --> output file colnames
108COLNAME_MAPPING = {
109    # base data
110    'reg_state': 'state',
111    'jamb_reg_no': 'reg_number',
112    'matric_no': 'matric_number',
113    'birthday': 'date_of_birth',
114    'clr_ac_pin': 'clr_code',
115    # clearance
116    'hq_grade': 'hq_degree',
117    'uniben': 'former_matric',
118    'hq_type2': 'hq2_type',
119    'hq_grade2': 'hq2_degree',
120    'hq_school2': 'hq2_school',
121    'hq_matric_no2': 'hq2_matric_no',
122    'hq_session2': 'hq2_session',
123    'hq_disc2': 'hq2_disc',
124    'emp': 'employer',
125    'emp2': 'employer2',
126    'emp_position2': 'emp2_position',
127    'emp_start2': 'emp2_start',
128    'emp_end2': 'emp2_end',
129    'emp_reason2': 'emp2_reason',
130    # study course
131    'study_course': 'certificate',
132    # study level
133    'session': 'level_session',
134    'verdict': 'level_verdict',
135    # course ticket
136    'level_id': 'level',
137    'core_or_elective': 'mandatory',
138    # payment ticket
139    'order_id': 'p_id',
140    'status': 'p_state',
141    'category': 'p_category',
142    'resp_pay_reference': 'r_pay_reference',
143    'resp_desc': 'r_desc',
144    'resp_approved_amount': 'r_amount_approved',
145    'item': 'p_item',
146    'amount': 'amount_auth',
147    'resp_card_num': 'r_card_num',
148    'resp_code': 'r_code',
149    'date': 'creation_date',
150    'surcharge': 'surcharge_1',
151    'session_id': 'p_session',
152    'type': 'r_company',
153    }
154
155# Mapping input state --> output state
156REGSTATE_MAPPING = {
157    'student_created': 'created',
158    'admitted': 'admitted',
159    'objection_raised': 'clearance started',
160    'clearance_pin_entered': 'clearance started',
161    'clearance_requested': 'clearance requested',
162    'cleared_and_validated': 'cleared',
163    'school_fee_paid': 'school fee paid',
164    'returning': 'returning',
165    'courses_registered': 'courses registered',
166    'courses_validated': 'courses validated',
167    }
168
169##
170## END OF CONFIG
171##
172
173# Look for the first sequence of numbers
174RE_PHONE = re.compile('[^\d]*(\d*)[^\d]*')
175
176def convert_fieldnames(fieldnames):
177    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
178    """
179    header = dict([(name, name) for name in fieldnames])
180    for in_name, out_name in COLNAME_MAPPING.items():
181        if in_name not in header:
182            continue
183        header[in_name] = out_name
184    return header
185
186class Converters():
187    """Converters to turn old-style values into new ones.
188    """
189
190    @classmethod
191    def student_id(self, value):
192        """ 'A123456' --> 'EA123456'
193        """
194        if len(value) == 7:
195            return 'E' + value
196        return value
197
198    @classmethod
199    def reg_state(self, value):
200        """ 'courses_validated' --> 'courses validated'
201        """
202        return REGSTATE_MAPPING.get(value,value)
203
204    @classmethod
205    def level(self, value):
206        """ '000' --> '10'
207        """
208        try:
209            number = int(value)
210        except ValueError:
211            return 9999
212        if number == 0:
213            return 10
214        return number
215
216    @classmethod
217    def semester(self, value):
218        """ '0' --> '9'
219        """
220        try:
221            number = int(value)
222        except ValueError:
223            return 9999
224        if number == 0:
225            return 9
226        return number
227
228    @classmethod
229    def application_category(self, value):
230        """ '' --> 'no'
231        """
232        if value == '':
233            return 'no'
234        return value
235
236    @classmethod
237    def lga(self, value):
238        """ Remove apostrophe
239        """
240        if value == 'akwa_ibom_uru_offong_oruko':
241            return 'akwa_ibom_urue-offong-oruko'
242        if value == 'edo_ohionmwon':
243            return 'edo_orhionmwon'
244
245        if value == 'nassarawa_nassarawa':
246            return 'nassarawa_nassawara'
247
248        if value == 'kogi_mopa-muro-mopi':
249            return 'kogi_mopa-muro'
250
251        if value == 'delta_osimili-north':
252            return 'delta_oshielli-north'
253
254        if value == 'delta_osimili':
255            return 'delta_oshimili'
256
257        if value == 'delta_osimili-south':
258            return 'delta_oshimili-south'
259        try:
260            value = value.replace("'","")
261        except:
262            return ''
263        lower = value.lower()
264        if lower in LGAS_dict.keys():
265            return lower
266        # If real names are given, let's see if a similar value
267        # in LGAS exist.
268        value = LGAS_inverted_stripped.get(strip(lower), value)
269        return value
270
271
272    @classmethod
273    def session(self, value):
274        """ '08' --> '2008'
275        '2008/2009' --> '2008'
276        """
277        if '/' in value:
278            numbers = value.split('/')
279            number = int(numbers[0])
280            if number in range(2000,2015):
281                return number
282            else:
283                return 9999
284        try:
285            number = int(value)
286        except ValueError:
287            #import pdb; pdb.set_trace()
288            return 9999
289        if number < 14:
290            return number + 2000
291        elif number in range(2000,2015):
292            return number
293        else:
294            return 9999
295
296    @classmethod
297    def former(self, value):
298        """ True --> yes
299        '2008/2009' --> '2008'
300        """
301        if value == 'True':
302            return 'yes'
303        return
304
305    @classmethod
306    def year(self, value):
307        """ '0' --> ''
308        """
309        if value == '0':
310            return
311        if value == 'None':
312            return
313        return value
314
315
316    @classmethod
317    def marit_stat(self, value):
318        """ 'True'/'False' --> 'married'/'unmarried'
319        """
320        if value in ('True','married'):
321            value = 'married'
322        elif value in ('False','unmarried'):
323            value = 'unmarried'
324        else:
325            value = ''
326        return value
327
328    @classmethod
329    def gender(self, value):
330        """ 'True'/'False' --> 'f'/'m'
331        """
332        if value.strip() in ('F', 'True','f'):
333            value = 'f'
334        elif value.strip() in ('M', 'False','m'):
335            value = 'm'
336        else:
337            value = ''
338        return value
339
340    @classmethod
341    def date(self, value):
342        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
343        """
344        if value == "None":
345            value = ""
346        elif value == "":
347            value = ""
348        else:
349            value = value.replace('/', '-')
350            # We add the hash symbol to avoid automatic date transformation
351            # in Excel and Calc for further processing
352            value += '#'
353        return value
354
355    @classmethod
356    def no_int(self, value):
357        """ Add hash and skip numbers starting with 999999
358        """
359        # We add the hash symbol to avoid automatic number transformation
360        # in Excel and Calc for further processing
361        try:
362            intvalue = int(value)
363            value += '#'
364        except:
365            pass
366        if value.startswith('999999'):
367            return
368        return value
369
370    @classmethod
371    def mode(self, value):
372        if value == "transfer_fulltime":
373            return "transfer_ft"
374        if value == "ume_ft":
375            return "utme_ft"
376        return value
377
378    @classmethod
379    def password(self, value):
380        if value == "not set":
381            return ""
382        return value
383
384    @classmethod
385    def nationality(self, value):
386        if value == "nigeria":
387            return "NG"
388        if value == "niger":
389            return "NE"
390        return value
391
392    @classmethod
393    def sittype(self, value):
394        if value == "nabtec":
395            return "nabteb"
396        return value
397
398    @classmethod
399    def company(self, value):
400        if value == "online":
401            return "interswitch"
402        return value
403
404    @classmethod
405    def p_category(self, value):
406        if value == "acceptance":
407            return "clearance"
408        return value
409
410    @classmethod
411    def email(self, value):
412        return value.strip()
413
414    @classmethod
415    def phone(self, value):
416        """ '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
417
418        Dashes and slashes are removed before looking for sequences
419        of numbers.
420        """
421        if not value:
422            return
423        value = value.replace('-', '')
424        value = value.replace('/', '')
425        match = RE_PHONE.match(value)
426        phone = match.groups()[0]
427        value = '-%s' % phone
428        return value + '#'
429
430    @classmethod
431    def result(self, value):
432        try:
433            liste = eval(value)
434        except:
435            return
436        if isinstance(liste,list):
437            return [(i[0].lower(),i[1]) for i in liste]
438        return
439
440def main():
441    input_file = '%s' % sys.argv[1]
442    output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
443    reader = csv.DictReader(open(input_file, 'rb'))
444    writer = None
445
446    for num, row in enumerate(reader):
447        if num == 0:
448            writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
449            print "FIELDS: "
450            for x, y in enumerate(reader.fieldnames):
451                print x, y
452            header = convert_fieldnames(reader.fieldnames)
453            writer.writerow(header)
454        if row.get('reg_state') == 'student_created':
455            continue
456        if row.get('status') == 'started':
457            # We do not use started payments
458            continue
459        for key, value in row.items():
460            if not key in OPTIONS.keys():
461                continue
462            conv_name = OPTIONS[key]
463            converter = getattr(Converters, conv_name, None)
464            if converter is None:
465                print "WARNING: cannot find converter %s" % conv_name
466                continue
467            row[key] = converter(row[key])
468        try:
469            writer.writerow(row)
470        except:
471            print row['student_id']
472
473    print "Output written to %s" % output_file
474
475
476if __name__ == '__main__':
477    if len(sys.argv) != 2:
478        print 'Usage: %s <filename>' % __file__
479        sys.exit(1)
480    main()
Note: See TracBrowser for help on using the repository browser.