source: main/waeup.kofa/trunk/tools/fix_import_file.py @ 8517

Last change on this file since 8517 was 8478, checked in by Henrik Bettermann, 13 years ago

Prepare script for migration of AAUE data.

  • Property svn:keywords set to Id
File size: 9.4 KB
Line 
1## $Id: fix_import_file.py 8478 2012-05-21 07:17:50Z henrik $
2##
3## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""
19Fix exports from old SRP portal to make them importable by current portal.
20
21Usage:
22
23Change into this directory, set the options below (files are assumed
24to be in the same directory) and then run
25
26  python fix_import_file.py <filename>
27
28Errors/warnings will be displayed on the shell, the output will be put
29into the specified output file.
30"""
31import csv
32import datetime
33import re
34import sys
35
36##
37## CONFIGURATION SECTION
38##
39# keys are fieldnames in input file, values are methods of class
40# Converter (see below)
41OPTIONS = {
42    'student_id': 'student_id',
43    'sex': 'gender',
44    'birthday': 'date',
45    'request_date': 'datetime',
46    'marit_stat': 'marit_stat',
47    'session': 'session',
48    'entry_session': 'session',
49    'current_session': 'session',
50    'session_id': 'session',
51    'entry_mode': 'mode',
52    'reg_state': 'reg_state',
53    'password': 'password',
54    'phone': 'phone',
55    'nationality': 'nationality',
56    'level': 'level',
57    'start_level': 'level',
58    'end_level': 'level',
59    'level_id': 'level',
60    'current_level': 'level',
61    'semester': 'semester',
62    'application_category': 'application_category',
63    'lga': 'lga',
64    'order_id': 'no_int',
65    }
66
67# Mapping input file colnames --> output file colnames
68COLNAME_MAPPING = {
69    # base data
70    'reg_state': 'state',
71    'jamb_reg_no': 'reg_number',
72    'matric_no': 'matric_number',
73    'birthday': 'date_of_birth',
74    'clr_ac_pin': 'clr_code',
75    # study course
76    'study_course': 'certificate',
77    # study level
78    'session': 'level_session',
79    'verdict': 'level_verdict',
80    # course ticket
81    'level_id': 'level',
82    'core_or_elective': 'mandatory',
83    # payment ticket
84    'order_id': 'p_id',
85    'status': 'p_state',
86    'category': 'p_category',
87    'resp_pay_reference': 'r_pay_reference',
88    'resp_desc': 'r_desc',
89    'resp_approved_amount': 'r_amount_approved',
90    'item': 'p_item',
91    'amount': 'amount_auth',
92    'resp_card_num': 'r_card_num',
93    'resp_code': 'r_code',
94    'date': 'creation_date',
95    'surcharge': 'surcharge_1',
96    'session_id': 'p_session',
97    }
98
99# Mapping input state --> output state
100REGSTATE_MAPPING = {
101    'student_created': 'created',
102    'admitted': 'admitted',
103    'objection_raised': 'clearance started',
104    'clearance_pin_entered': 'clearance started',
105    'clearance_requested': 'clearance requested',
106    'cleared_and_validated': 'cleared',
107    'school_fee_paid': 'school fee paid',
108    'returning': 'returning',
109    'courses_registered': 'courses registered',
110    'courses_validated': 'courses validated',
111    }
112
113##
114## END OF CONFIG
115##
116
117# Look for the first sequence of numbers
118RE_PHONE = re.compile('[^\d]*(\d*)[^\d]*')
119
120def convert_fieldnames(fieldnames):
121    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
122    """
123    header = dict([(name, name) for name in fieldnames])
124    for in_name, out_name in COLNAME_MAPPING.items():
125        if in_name not in header:
126            continue
127        header[in_name] = out_name
128    return header
129
130class Converters():
131    """Converters to turn old-style values into new ones.
132    """
133
134    @classmethod
135    def student_id(self, value):
136        """ 'A123456' --> 'EA123456'
137        """
138        if len(value) == 7:
139            return 'E' + value
140        return value
141
142    @classmethod
143    def reg_state(self, value):
144        """ 'courses_validated' --> 'courses validated'
145        """
146        return REGSTATE_MAPPING.get(value,value)
147
148    @classmethod
149    def level(self, value):
150        """ '000' --> '10'
151        """
152        try:
153            number = int(value)
154        except ValueError:
155            return 9999
156        if number == 0:
157            return 10
158        return number
159
160    @classmethod
161    def semester(self, value):
162        """ '0' --> '9'
163        """
164        try:
165            number = int(value)
166        except ValueError:
167            return 9999
168        if number == 0:
169            return 9
170        return number
171
172    @classmethod
173    def application_category(self, value):
174        """ '' --> 'no'
175        """
176        if value == '':
177            return 'no'
178        return value
179
180    @classmethod
181    def lga(self, value):
182        """ Remove apostrophe
183        """
184        if value == 'akwa_ibom_uru_offong_oruko':
185            return 'akwa_ibom_urue-offong-oruko'
186        try:
187            value = value.replace("'","")
188            value = value.lower()
189        except:
190            return ''
191        return value
192
193
194    @classmethod
195    def session(self, value):
196        """ '08' --> '2008'
197        '2008/2009' --> '2008'
198        """
199        if '/' in value:
200            numbers = value.split('/')
201            number = int(numbers[0])
202            if number in range(2000,2015):
203                return number
204            else:
205                return 9999
206        try:
207            number = int(value)
208        except ValueError:
209            #import pdb; pdb.set_trace()
210            return 9999
211        if number < 14:
212            return number + 2000
213        elif number in range(2000,2015):
214            return number
215        else:
216            return 9999
217
218    @classmethod
219    def marit_stat(self, value):
220        """ 'True'/'False' --> 'married'/'unmarried'
221        """
222        if value in ('True','married'):
223            value = 'married'
224        elif value in ('False','unmarried'):
225            value = 'unmarried'
226        else:
227            value = ''
228        return value
229
230    @classmethod
231    def gender(self, value):
232        """ 'True'/'False' --> 'f'/'m'
233        """
234        if value in ('True','f'):
235            value = 'f'
236        elif value in ('False','m'):
237            value = 'm'
238        else:
239            value = ''
240        return value
241
242    @classmethod
243    def date(self, value):
244        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
245        """
246        if value == "None":
247            value = ""
248        elif value == "":
249            value = ""
250        else:
251            value = value.replace('/', '-')
252            # We add the hash symbol to avoid automatic date transformation
253            # in Excel and Calc for further processing
254            value += '#'
255        return value
256
257    @classmethod
258    def no_int(self, value):
259        """ Add hash.
260        """
261        # We add the hash symbol to avoid automatic number transformation
262        # in Excel and Calc for further processing
263        value += '#'
264        return value
265
266
267    @classmethod
268    def datetime(self, value):
269        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
270        """
271        #print  "IN: ", value
272        if value == "None":
273            value = ""
274        elif value == "":
275            value = ""
276        else:
277            #value = datetime.datetime.strptime(value, '%Y/%m/%d')
278            #value = datetime.datetime.strftime(value, '%Y-%m-%d')
279            pass
280        #print "OUT: ", value
281        return value
282
283    @classmethod
284    def mode(self, value):
285        if value == "transfer_fulltime":
286            return "transfer_ft"
287        return value
288
289    @classmethod
290    def password(self, value):
291        if value == "not set":
292            return ""
293        return value
294
295    @classmethod
296    def nationality(self, value):
297        if value == "nigeria":
298            return "NG"
299        return value
300
301    @classmethod
302    def phone(self, value):
303        """ '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
304
305        Dashes and slashes are removed before looking for sequences
306        of numbers.
307        """
308        value = value.replace('-', '')
309        value = value.replace('/', '')
310        match = RE_PHONE.match(value)
311        phone = match.groups()[0]
312        value = '-%s' % phone
313        return value
314
315
316def main():
317    input_file = '%s' % sys.argv[1]
318    output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
319    reader = csv.DictReader(open(input_file, 'rb'))
320    writer = None
321
322    for num, row in enumerate(reader):
323        if num == 0:
324            writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
325            print "FIELDS: "
326            for x, y in enumerate(reader.fieldnames):
327                print x, y
328            header = convert_fieldnames(reader.fieldnames)
329            writer.writerow(header)
330        for key, value in row.items():
331            if not key in OPTIONS.keys():
332                continue
333            conv_name = OPTIONS[key]
334            converter = getattr(Converters, conv_name, None)
335            if converter is None:
336                print "WARNING: cannot find converter %s" % conv_name
337                continue
338            row[key] = converter(row[key])
339        try:
340            writer.writerow(row)
341        except:
342            print row['student_id']
343
344    print "Output written to %s" % output_file
345
346
347if __name__ == '__main__':
348    if len(sys.argv) != 2:
349        print 'Usage: %s <filename>' % __file__
350        sys.exit(1)
351    main()
Note: See TracBrowser for help on using the repository browser.