source: main/waeup.kofa/trunk/tools/fix_import_file.py @ 8938

Last change on this file since 8938 was 8938, checked in by Henrik Bettermann, 13 years ago

More cell conversions and column title editing.

  • Property svn:keywords set to Id
File size: 11.5 KB
Line 
1## $Id: fix_import_file.py 8938 2012-07-07 22:33:03Z henrik $
2##
3## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""
19Fix exports from old SRP portal and other data sources to make
20them importable by current portal.
21
22Usage:
23
24Change into this directory, set the options below (files are assumed
25to be in the same directory) and then run
26
27  python fix_import_file.py <filename>
28
29Errors/warnings will be displayed on the shell, the output will be put
30into the specified output file.
31
32
33The lgas.py module must be copied into the same folder where this script
34is started.
35"""
36import csv
37import datetime
38import re
39import sys
40
41try:
42    from lgas import LGAS
43except:
44    print 'ERROR: lgas.py is missing.'
45    sys.exit(1)
46
47def strip(string):
48    string = string.replace('_', '')
49    string = string.replace('/', '')
50    string = string.replace('-', '')
51    string = string.replace(' ', '')
52    string = string.lower()
53    return string
54
55LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
56LGAS_dict = dict(LGAS)
57
58##
59## CONFIGURATION SECTION
60##
61# keys are fieldnames in input file, values are methods of class
62# Converter (see below)
63OPTIONS = {
64    'student_id': 'student_id',
65    'sex': 'gender',
66    'birthday': 'date',
67    'request_date': 'datetime',
68    'marit_stat': 'marit_stat',
69    'session': 'session',
70    'entry_session': 'session',
71    'current_session': 'session',
72    'session_id': 'session',
73    'entry_mode': 'mode',
74    'reg_state': 'reg_state',
75    'password': 'password',
76    'phone': 'phone',
77    'nationality': 'nationality',
78    'level': 'level',
79    'start_level': 'level',
80    'end_level': 'level',
81    'level_id': 'level',
82    'current_level': 'level',
83    'semester': 'semester',
84    'application_category': 'application_category',
85    'lga': 'lga',
86    'order_id': 'no_int',
87    'uniben': 'former',
88    'nysc_year': 'year',
89    'alr_date': 'date',
90    'fst_sit_date': 'date',
91    'scd_sit_date': 'date',
92    'emp_start': 'date',
93    'emp_end': 'date',
94    'emp_start2': 'date',
95    'emp_end2': 'date',
96    'fst_sit_results': 'result',
97    'scd_sit_results': 'result',
98    'alr_results': 'result',
99    }
100
101# Mapping input file colnames --> output file colnames
102COLNAME_MAPPING = {
103    # base data
104    'reg_state': 'state',
105    'jamb_reg_no': 'reg_number',
106    'matric_no': 'matric_number',
107    'birthday': 'date_of_birth',
108    'clr_ac_pin': 'clr_code',
109    # study course
110    'study_course': 'certificate',
111    # study level
112    'session': 'level_session',
113    'verdict': 'level_verdict',
114    # course ticket
115    'level_id': 'level',
116    'core_or_elective': 'mandatory',
117    # payment ticket
118    'order_id': 'p_id',
119    'status': 'p_state',
120    'category': 'p_category',
121    'resp_pay_reference': 'r_pay_reference',
122    'resp_desc': 'r_desc',
123    'resp_approved_amount': 'r_amount_approved',
124    'item': 'p_item',
125    'amount': 'amount_auth',
126    'resp_card_num': 'r_card_num',
127    'resp_code': 'r_code',
128    'date': 'creation_date',
129    'surcharge': 'surcharge_1',
130    'session_id': 'p_session',
131    'hq_grade': 'hq_degree',
132    'uniben': 'former_matric',
133    'hq_type2': 'hq2_type',
134    'hq_grade2': 'hq2_degree',
135    'hq_school2': 'hq2_school',
136    'hq_matric_no2': 'hq2_matric_no',
137    'hq_session2': 'hq2_session',
138    'hq_disc2': 'hq2_disc',
139    'emp': 'employer',
140    'emp2': 'employer2',
141    'emp_position2': 'emp2_position',
142    'emp_start2': 'emp2_start',
143    'emp_end2': 'emp2_end',
144    'emp_reason2': 'emp2_reason',
145    }
146
147# Mapping input state --> output state
148REGSTATE_MAPPING = {
149    'student_created': 'created',
150    'admitted': 'admitted',
151    'objection_raised': 'clearance started',
152    'clearance_pin_entered': 'clearance started',
153    'clearance_requested': 'clearance requested',
154    'cleared_and_validated': 'cleared',
155    'school_fee_paid': 'school fee paid',
156    'returning': 'returning',
157    'courses_registered': 'courses registered',
158    'courses_validated': 'courses validated',
159    }
160
161##
162## END OF CONFIG
163##
164
165# Look for the first sequence of numbers
166RE_PHONE = re.compile('[^\d]*(\d*)[^\d]*')
167
168def convert_fieldnames(fieldnames):
169    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
170    """
171    header = dict([(name, name) for name in fieldnames])
172    for in_name, out_name in COLNAME_MAPPING.items():
173        if in_name not in header:
174            continue
175        header[in_name] = out_name
176    return header
177
178class Converters():
179    """Converters to turn old-style values into new ones.
180    """
181
182    @classmethod
183    def student_id(self, value):
184        """ 'A123456' --> 'EA123456'
185        """
186        if len(value) == 7:
187            return 'E' + value
188        return value
189
190    @classmethod
191    def reg_state(self, value):
192        """ 'courses_validated' --> 'courses validated'
193        """
194        return REGSTATE_MAPPING.get(value,value)
195
196    @classmethod
197    def level(self, value):
198        """ '000' --> '10'
199        """
200        try:
201            number = int(value)
202        except ValueError:
203            return 9999
204        if number == 0:
205            return 10
206        return number
207
208    @classmethod
209    def semester(self, value):
210        """ '0' --> '9'
211        """
212        try:
213            number = int(value)
214        except ValueError:
215            return 9999
216        if number == 0:
217            return 9
218        return number
219
220    @classmethod
221    def application_category(self, value):
222        """ '' --> 'no'
223        """
224        if value == '':
225            return 'no'
226        return value
227
228    @classmethod
229    def lga(self, value):
230        """ Remove apostrophe
231        """
232        if value == 'akwa_ibom_uru_offong_oruko':
233            return 'akwa_ibom_urue-offong-oruko'
234        try:
235            value = value.replace("'","")
236        except:
237            return ''
238        lower = value.lower()
239        if lower in LGAS_dict.keys():
240            return lower
241        # If real names are given, let's see if a similar value
242        # in LGAS exist.
243        value = LGAS_inverted_stripped.get(strip(lower), value)
244        return value
245
246
247    @classmethod
248    def session(self, value):
249        """ '08' --> '2008'
250        '2008/2009' --> '2008'
251        """
252        if '/' in value:
253            numbers = value.split('/')
254            number = int(numbers[0])
255            if number in range(2000,2015):
256                return number
257            else:
258                return 9999
259        try:
260            number = int(value)
261        except ValueError:
262            #import pdb; pdb.set_trace()
263            return 9999
264        if number < 14:
265            return number + 2000
266        elif number in range(2000,2015):
267            return number
268        else:
269            return 9999
270
271    @classmethod
272    def former(self, value):
273        """ True --> yes
274        '2008/2009' --> '2008'
275        """
276        if value == 'True':
277            return 'yes'
278        return
279
280    @classmethod
281    def year(self, value):
282        """ '0' --> ''
283        """
284        if value == '0':
285            return
286        if value == 'None':
287            return
288        return value
289
290
291    @classmethod
292    def marit_stat(self, value):
293        """ 'True'/'False' --> 'married'/'unmarried'
294        """
295        if value in ('True','married'):
296            value = 'married'
297        elif value in ('False','unmarried'):
298            value = 'unmarried'
299        else:
300            value = ''
301        return value
302
303    @classmethod
304    def gender(self, value):
305        """ 'True'/'False' --> 'f'/'m'
306        """
307        if value.strip() in ('F', 'True','f'):
308            value = 'f'
309        elif value.strip() in ('M', 'False','m'):
310            value = 'm'
311        else:
312            value = ''
313        return value
314
315    @classmethod
316    def date(self, value):
317        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
318        """
319        if value == "None":
320            value = ""
321        elif value == "":
322            value = ""
323        else:
324            value = value.replace('/', '-')
325            # We add the hash symbol to avoid automatic date transformation
326            # in Excel and Calc for further processing
327            value += '#'
328        return value
329
330    @classmethod
331    def no_int(self, value):
332        """ Add hash.
333        """
334        # We add the hash symbol to avoid automatic number transformation
335        # in Excel and Calc for further processing
336        value += '#'
337        return value
338
339
340    @classmethod
341    def datetime(self, value):
342        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
343        """
344        #print  "IN: ", value
345        if value == "None":
346            value = ""
347        elif value == "":
348            value = ""
349        else:
350            #value = datetime.datetime.strptime(value, '%Y/%m/%d')
351            #value = datetime.datetime.strftime(value, '%Y-%m-%d')
352            pass
353        #print "OUT: ", value
354        return value
355
356    @classmethod
357    def mode(self, value):
358        if value == "transfer_fulltime":
359            return "transfer_ft"
360        if value == "ume_ft":
361            return "utme_ft"
362        return value
363
364    @classmethod
365    def password(self, value):
366        if value == "not set":
367            return ""
368        return value
369
370    @classmethod
371    def nationality(self, value):
372        if value == "nigeria":
373            return "NG"
374        return value
375
376    @classmethod
377    def phone(self, value):
378        """ '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
379
380        Dashes and slashes are removed before looking for sequences
381        of numbers.
382        """
383        if not value:
384            return
385        value = value.replace('-', '')
386        value = value.replace('/', '')
387        match = RE_PHONE.match(value)
388        phone = match.groups()[0]
389        value = '-%s' % phone
390        return value + '#'
391
392    @classmethod
393    def result(self, value):
394        try:
395            liste = eval(value)
396        except:
397            return
398        if isinstance(liste,list):
399            return [(i[0].lower(),i[1]) for i in liste]
400        return
401
402def main():
403    input_file = '%s' % sys.argv[1]
404    output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
405    reader = csv.DictReader(open(input_file, 'rb'))
406    writer = None
407
408    for num, row in enumerate(reader):
409        if num == 0:
410            writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
411            print "FIELDS: "
412            for x, y in enumerate(reader.fieldnames):
413                print x, y
414            header = convert_fieldnames(reader.fieldnames)
415            writer.writerow(header)
416        for key, value in row.items():
417            if not key in OPTIONS.keys():
418                continue
419            conv_name = OPTIONS[key]
420            converter = getattr(Converters, conv_name, None)
421            if converter is None:
422                print "WARNING: cannot find converter %s" % conv_name
423                continue
424            row[key] = converter(row[key])
425        try:
426            writer.writerow(row)
427        except:
428            print row['student_id']
429
430    print "Output written to %s" % output_file
431
432
433if __name__ == '__main__':
434    if len(sys.argv) != 2:
435        print 'Usage: %s <filename>' % __file__
436        sys.exit(1)
437    main()
Note: See TracBrowser for help on using the repository browser.