source: main/waeup.sirp/trunk/tools/fix_import_file.py @ 7628

Last change on this file since 7628 was 7628, checked in by Henrik Bettermann, 13 years ago

Add converters for payment files.

  • Property svn:keywords set to Id
File size: 8.5 KB
Line 
1## $Id: fix_import_file.py 7628 2012-02-10 20:45:33Z henrik $
2##
3## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""
19Fix exports from old SRP portal to make them importable by current portal.
20
21Usage:
22
23Change into this directory, set the options below (files are assumed
24to be in the same directory) and then run
25
26  python fix_import_file.py <filename>
27
28Errors/warnings will be displayed on the shell, the output will be put
29into the specified output file.
30"""
31import csv
32import datetime
33import re
34import sys
35
36##
37## CONFIGURATION SECTION
38##
39# keys are fieldnames in input file, values are methods of class
40# Converter (see below)
41OPTIONS = {
42    'sex': 'gender',
43    'birthday': 'date',
44    'request_date': 'datetime',
45    'marit_stat': 'marit_stat',
46    'session': 'session',
47    'entry_session': 'session',
48    'current_session': 'session',
49    'session_id': 'session',
50    'reg_state': 'reg_state',
51    'password': 'password',
52    'phone': 'phone',
53    'level': 'level',
54    'start_level': 'level',
55    'end_level': 'level',
56    'level_id': 'level',
57    'current_level': 'level',
58    'semester': 'semester',
59    'application_category': 'application_category',
60    'lga': 'lga',
61    'order_id': 'no_int',
62    }
63
64# Mapping input file colnames --> output file colnames
65COLNAME_MAPPING = {
66    # base date
67    'jamb_reg_no': 'reg_number',
68    'birthday': 'date_of_birth',
69    'clr_ac_pin': 'clr_code',
70    # study course
71    'study_course': 'certificate',
72    # study level
73    'session': 'level_session',
74    'verdict': 'level_verdict',
75    # course ticket
76    'level_id': 'level',
77    # payment ticket
78    'order_id': 'p_id',
79    'status': 'p_state',
80    'category': 'p_category',
81    'resp_pay_reference': 'r_pay_reference',
82    'resp_desc': 'r_desc',
83    'resp_approved_amount': 'r_amount_approved',
84    'item': 'p_item',
85    'amount': 'amount_auth',
86    'resp_card_num': 'r_card_num',
87    'resp_code': 'r_code',
88    'date': 'creation_date',
89    'surcharge': 'surcharge_1',
90    'session_id': 'p_session',
91    }
92
93# Mapping input regh_state --> output reg_state
94REGSTATE_MAPPING = {
95    'student_created': 'created',
96    'admitted': 'admitted',
97    'objection_raised': 'clearance started',
98    'clearance_pin_entered': 'clearance started',
99    'clearance_requested': 'clearance requested',
100    'cleared_and_validated': 'cleared',
101    'school_fee_paid': 'school fee paid',
102    'returning': 'returning',
103    'courses_registered': 'courses registered',
104    'courses_validated': 'courses validated',
105    }
106
107##
108## END OF CONFIG
109##
110
111# Look for the first sequence of numbers
112RE_PHONE = re.compile('[^\d]*(\d*)[^\d]*')
113
114def convert_fieldnames(fieldnames):
115    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
116    """
117    header = dict([(name, name) for name in fieldnames])
118    for in_name, out_name in COLNAME_MAPPING.items():
119        if in_name not in header:
120            continue
121        header[in_name] = out_name
122    return header
123
124class Converters():
125    """Converters to turn old-style values into new ones.
126    """
127    @classmethod
128    def reg_state(self, value):
129        """ 'courses_validated' --> 'courses validated'
130        """
131        return REGSTATE_MAPPING.get(value,value)
132
133    @classmethod
134    def level(self, value):
135        """ '000' --> '10'
136        """
137        try:
138            number = int(value)
139        except ValueError:
140            return 9999
141        if number == 0:
142            return 10
143        return number
144
145    @classmethod
146    def semester(self, value):
147        """ '0' --> '9'
148        """
149        try:
150            number = int(value)
151        except ValueError:
152            return 9999
153        if number == 0:
154            return 9
155        return number
156
157    @classmethod
158    def application_category(self, value):
159        """ '' --> 'no'
160        """
161        if value == '':
162            return 'no'
163        return value
164
165    @classmethod
166    def lga(self, value):
167        """ Remove apostrophe
168        """
169        if value == 'akwa_ibom_uru_offong_oruko':
170            return 'akwa_ibom_urue-offong-oruko'
171        try:
172            value = value.replace("'","")
173            value = value.lower()
174        except:
175            return ''
176        return value
177
178
179    @classmethod
180    def session(self, value):
181        """ '08' --> '2008'
182        """
183        try:
184            number = int(value)
185        except ValueError:
186            #import pdb; pdb.set_trace()
187            return 9999
188        if number < 14:
189            return number + 2000
190        elif number in range(2000,2015):
191            return number
192        else:
193            return 9999
194
195    @classmethod
196    def marit_stat(self, value):
197        """ 'True'/'False' --> 'married'/'unmarried'
198        """
199        if value in ('True','married'):
200            value = 'married'
201        elif value in ('False','unmarried'):
202            value = 'unmarried'
203        else:
204            value = ''
205        return value
206
207    @classmethod
208    def gender(self, value):
209        """ 'True'/'False' --> 'f'/'m'
210        """
211        if value in ('True','f'):
212            value = 'f'
213        elif value in ('False','m'):
214            value = 'm'
215        else:
216            value = ''
217        return value
218
219    @classmethod
220    def date(self, value):
221        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
222        """
223        if value == "None":
224            value = ""
225        elif value == "":
226            value = ""
227        else:
228            value = value.replace('/', '-')
229            # We add the hash symbol to avoid automatic date transformation
230            # in Excel and Calc for further processing
231            value += '#'
232        return value
233
234    @classmethod
235    def no_int(self, value):
236        """ Add hash.
237        """
238        # We add the hash symbol to avoid automatic number transformation
239        # in Excel and Calc for further processing
240        value += '#'
241        return value
242
243
244    @classmethod
245    def datetime(self, value):
246        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
247        """
248        #print  "IN: ", value
249        if value == "None":
250            value = ""
251        elif value == "":
252            value = ""
253        else:
254            #value = datetime.datetime.strptime(value, '%Y/%m/%d')
255            #value = datetime.datetime.strftime(value, '%Y-%m-%d')
256            pass
257        #print "OUT: ", value
258        return value
259
260    @classmethod
261    def password(self, value):
262        if value == "not set":
263            return ""
264        return value
265
266    @classmethod
267    def phone(self, value):
268        """ '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
269
270        Dashes and slashes are removed before looking for sequences
271        of numbers.
272        """
273        value = value.replace('-', '')
274        value = value.replace('/', '')
275        match = RE_PHONE.match(value)
276        phone = match.groups()[0]
277        value = '--%s' % phone
278        return value
279
280
281def main():
282    input_file = '%s' % sys.argv[1]
283    output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
284    reader = csv.DictReader(open(input_file, 'rb'))
285    writer = None
286
287    for num, row in enumerate(reader):
288        if num == 0:
289            writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
290            print "FIELDS: "
291            for x, y in enumerate(reader.fieldnames):
292                print x, y
293            header = convert_fieldnames(reader.fieldnames)
294            writer.writerow(header)
295        for key, value in row.items():
296            if not key in OPTIONS.keys():
297                continue
298            conv_name = OPTIONS[key]
299            converter = getattr(Converters, conv_name, None)
300            if converter is None:
301                print "WARNING: cannot find converter %s" % conv_name
302                continue
303            row[key] = converter(row[key])
304        try:
305            writer.writerow(row)
306        except:
307            print row['student_id']
308
309    print "Output written to %s" % output_file
310
311
312if __name__ == '__main__':
313    if len(sys.argv) != 2:
314        print 'Usage: %s <filename>' % __file__
315        sys.exit(1)
316    main()
Note: See TracBrowser for help on using the repository browser.