source: main/waeup.kofa/trunk/tools/fix_import_file.py @ 7923

Last change on this file since 7923 was 7666, checked in by Henrik Bettermann, 13 years ago

setup.py: Add zope.app.undo.

fix_import_file.py: rename core_or_elective

Some minor changes in rst files.

  • Property svn:keywords set to Id
File size: 8.7 KB
Line 
1## $Id: fix_import_file.py 7666 2012-02-19 08:21:39Z henrik $
2##
3## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""
19Fix exports from old SRP portal to make them importable by current portal.
20
21Usage:
22
23Change into this directory, set the options below (files are assumed
24to be in the same directory) and then run
25
26  python fix_import_file.py <filename>
27
28Errors/warnings will be displayed on the shell, the output will be put
29into the specified output file.
30"""
31import csv
32import datetime
33import re
34import sys
35
36##
37## CONFIGURATION SECTION
38##
39# keys are fieldnames in input file, values are methods of class
40# Converter (see below)
41OPTIONS = {
42    'sex': 'gender',
43    'birthday': 'date',
44    'request_date': 'datetime',
45    'marit_stat': 'marit_stat',
46    'session': 'session',
47    'entry_session': 'session',
48    'current_session': 'session',
49    'session_id': 'session',
50    'entry_mode': 'mode',
51    'reg_state': 'reg_state',
52    'password': 'password',
53    'phone': 'phone',
54    'level': 'level',
55    'start_level': 'level',
56    'end_level': 'level',
57    'level_id': 'level',
58    'current_level': 'level',
59    'semester': 'semester',
60    'application_category': 'application_category',
61    'lga': 'lga',
62    'order_id': 'no_int',
63    }
64
65# Mapping input file colnames --> output file colnames
66COLNAME_MAPPING = {
67    # base data
68    'jamb_reg_no': 'reg_number',
69    'matric_no': 'matric_number',
70    'birthday': 'date_of_birth',
71    'clr_ac_pin': 'clr_code',
72    # study course
73    'study_course': 'certificate',
74    # study level
75    'session': 'level_session',
76    'verdict': 'level_verdict',
77    # course ticket
78    'level_id': 'level',
79    'core_or_elective': 'mandatory',
80    # payment ticket
81    'order_id': 'p_id',
82    'status': 'p_state',
83    'category': 'p_category',
84    'resp_pay_reference': 'r_pay_reference',
85    'resp_desc': 'r_desc',
86    'resp_approved_amount': 'r_amount_approved',
87    'item': 'p_item',
88    'amount': 'amount_auth',
89    'resp_card_num': 'r_card_num',
90    'resp_code': 'r_code',
91    'date': 'creation_date',
92    'surcharge': 'surcharge_1',
93    'session_id': 'p_session',
94    }
95
96# Mapping input regh_state --> output reg_state
97REGSTATE_MAPPING = {
98    'student_created': 'created',
99    'admitted': 'admitted',
100    'objection_raised': 'clearance started',
101    'clearance_pin_entered': 'clearance started',
102    'clearance_requested': 'clearance requested',
103    'cleared_and_validated': 'cleared',
104    'school_fee_paid': 'school fee paid',
105    'returning': 'returning',
106    'courses_registered': 'courses registered',
107    'courses_validated': 'courses validated',
108    }
109
110##
111## END OF CONFIG
112##
113
114# Look for the first sequence of numbers
115RE_PHONE = re.compile('[^\d]*(\d*)[^\d]*')
116
117def convert_fieldnames(fieldnames):
118    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
119    """
120    header = dict([(name, name) for name in fieldnames])
121    for in_name, out_name in COLNAME_MAPPING.items():
122        if in_name not in header:
123            continue
124        header[in_name] = out_name
125    return header
126
127class Converters():
128    """Converters to turn old-style values into new ones.
129    """
130    @classmethod
131    def reg_state(self, value):
132        """ 'courses_validated' --> 'courses validated'
133        """
134        return REGSTATE_MAPPING.get(value,value)
135
136    @classmethod
137    def level(self, value):
138        """ '000' --> '10'
139        """
140        try:
141            number = int(value)
142        except ValueError:
143            return 9999
144        if number == 0:
145            return 10
146        return number
147
148    @classmethod
149    def semester(self, value):
150        """ '0' --> '9'
151        """
152        try:
153            number = int(value)
154        except ValueError:
155            return 9999
156        if number == 0:
157            return 9
158        return number
159
160    @classmethod
161    def application_category(self, value):
162        """ '' --> 'no'
163        """
164        if value == '':
165            return 'no'
166        return value
167
168    @classmethod
169    def lga(self, value):
170        """ Remove apostrophe
171        """
172        if value == 'akwa_ibom_uru_offong_oruko':
173            return 'akwa_ibom_urue-offong-oruko'
174        try:
175            value = value.replace("'","")
176            value = value.lower()
177        except:
178            return ''
179        return value
180
181
182    @classmethod
183    def session(self, value):
184        """ '08' --> '2008'
185        """
186        try:
187            number = int(value)
188        except ValueError:
189            #import pdb; pdb.set_trace()
190            return 9999
191        if number < 14:
192            return number + 2000
193        elif number in range(2000,2015):
194            return number
195        else:
196            return 9999
197
198    @classmethod
199    def marit_stat(self, value):
200        """ 'True'/'False' --> 'married'/'unmarried'
201        """
202        if value in ('True','married'):
203            value = 'married'
204        elif value in ('False','unmarried'):
205            value = 'unmarried'
206        else:
207            value = ''
208        return value
209
210    @classmethod
211    def gender(self, value):
212        """ 'True'/'False' --> 'f'/'m'
213        """
214        if value in ('True','f'):
215            value = 'f'
216        elif value in ('False','m'):
217            value = 'm'
218        else:
219            value = ''
220        return value
221
222    @classmethod
223    def date(self, value):
224        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
225        """
226        if value == "None":
227            value = ""
228        elif value == "":
229            value = ""
230        else:
231            value = value.replace('/', '-')
232            # We add the hash symbol to avoid automatic date transformation
233            # in Excel and Calc for further processing
234            value += '#'
235        return value
236
237    @classmethod
238    def no_int(self, value):
239        """ Add hash.
240        """
241        # We add the hash symbol to avoid automatic number transformation
242        # in Excel and Calc for further processing
243        value += '#'
244        return value
245
246
247    @classmethod
248    def datetime(self, value):
249        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
250        """
251        #print  "IN: ", value
252        if value == "None":
253            value = ""
254        elif value == "":
255            value = ""
256        else:
257            #value = datetime.datetime.strptime(value, '%Y/%m/%d')
258            #value = datetime.datetime.strftime(value, '%Y-%m-%d')
259            pass
260        #print "OUT: ", value
261        return value
262
263    @classmethod
264    def mode(self, value):
265        if value == "transfer_fulltime":
266            return "transfer_ft"
267        return value
268
269    @classmethod
270    def password(self, value):
271        if value == "not set":
272            return ""
273        return value
274
275    @classmethod
276    def phone(self, value):
277        """ '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
278
279        Dashes and slashes are removed before looking for sequences
280        of numbers.
281        """
282        value = value.replace('-', '')
283        value = value.replace('/', '')
284        match = RE_PHONE.match(value)
285        phone = match.groups()[0]
286        value = '--%s' % phone
287        return value
288
289
290def main():
291    input_file = '%s' % sys.argv[1]
292    output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
293    reader = csv.DictReader(open(input_file, 'rb'))
294    writer = None
295
296    for num, row in enumerate(reader):
297        if num == 0:
298            writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
299            print "FIELDS: "
300            for x, y in enumerate(reader.fieldnames):
301                print x, y
302            header = convert_fieldnames(reader.fieldnames)
303            writer.writerow(header)
304        for key, value in row.items():
305            if not key in OPTIONS.keys():
306                continue
307            conv_name = OPTIONS[key]
308            converter = getattr(Converters, conv_name, None)
309            if converter is None:
310                print "WARNING: cannot find converter %s" % conv_name
311                continue
312            row[key] = converter(row[key])
313        try:
314            writer.writerow(row)
315        except:
316            print row['student_id']
317
318    print "Output written to %s" % output_file
319
320
321if __name__ == '__main__':
322    if len(sys.argv) != 2:
323        print 'Usage: %s <filename>' % __file__
324        sys.exit(1)
325    main()
Note: See TracBrowser for help on using the repository browser.