source: main/waeup.kofa/trunk/tools/fix_import_file.py @ 8889

Last change on this file since 8889 was 8785, checked in by Henrik Bettermann, 13 years ago

Improve gender conversion.

  • Property svn:keywords set to Id
File size: 10.1 KB
Line 
1## $Id: fix_import_file.py 8785 2012-06-24 08:34:52Z henrik $
2##
3## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""
19Fix exports from old SRP portal and other data sources to make
20them importable by current portal.
21
22Usage:
23
24Change into this directory, set the options below (files are assumed
25to be in the same directory) and then run
26
27  python fix_import_file.py <filename>
28
29Errors/warnings will be displayed on the shell, the output will be put
30into the specified output file.
31
32
33The lgas.py module must be copied into the same folder where this script
34is started.
35"""
36import csv
37import datetime
38import re
39import sys
40
41try:
42    from lgas import LGAS
43except:
44    print 'ERROR: lgas.py is missing.'
45    sys.exit(1)
46
47def strip(string):
48    string = string.replace('_', '')
49    string = string.replace('/', '')
50    string = string.replace('-', '')
51    string = string.replace(' ', '')
52    string = string.lower()
53    return string
54
55LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
56LGAS_dict = dict(LGAS)
57
58##
59## CONFIGURATION SECTION
60##
61# keys are fieldnames in input file, values are methods of class
62# Converter (see below)
63OPTIONS = {
64    'student_id': 'student_id',
65    'sex': 'gender',
66    'birthday': 'date',
67    'request_date': 'datetime',
68    'marit_stat': 'marit_stat',
69    'session': 'session',
70    'entry_session': 'session',
71    'current_session': 'session',
72    'session_id': 'session',
73    'entry_mode': 'mode',
74    'reg_state': 'reg_state',
75    'password': 'password',
76    'phone': 'phone',
77    'nationality': 'nationality',
78    'level': 'level',
79    'start_level': 'level',
80    'end_level': 'level',
81    'level_id': 'level',
82    'current_level': 'level',
83    'semester': 'semester',
84    'application_category': 'application_category',
85    'lga': 'lga',
86    'order_id': 'no_int',
87    }
88
89# Mapping input file colnames --> output file colnames
90COLNAME_MAPPING = {
91    # base data
92    'reg_state': 'state',
93    'jamb_reg_no': 'reg_number',
94    'matric_no': 'matric_number',
95    'birthday': 'date_of_birth',
96    'clr_ac_pin': 'clr_code',
97    # study course
98    'study_course': 'certificate',
99    # study level
100    'session': 'level_session',
101    'verdict': 'level_verdict',
102    # course ticket
103    'level_id': 'level',
104    'core_or_elective': 'mandatory',
105    # payment ticket
106    'order_id': 'p_id',
107    'status': 'p_state',
108    'category': 'p_category',
109    'resp_pay_reference': 'r_pay_reference',
110    'resp_desc': 'r_desc',
111    'resp_approved_amount': 'r_amount_approved',
112    'item': 'p_item',
113    'amount': 'amount_auth',
114    'resp_card_num': 'r_card_num',
115    'resp_code': 'r_code',
116    'date': 'creation_date',
117    'surcharge': 'surcharge_1',
118    'session_id': 'p_session',
119    }
120
121# Mapping input state --> output state
122REGSTATE_MAPPING = {
123    'student_created': 'created',
124    'admitted': 'admitted',
125    'objection_raised': 'clearance started',
126    'clearance_pin_entered': 'clearance started',
127    'clearance_requested': 'clearance requested',
128    'cleared_and_validated': 'cleared',
129    'school_fee_paid': 'school fee paid',
130    'returning': 'returning',
131    'courses_registered': 'courses registered',
132    'courses_validated': 'courses validated',
133    }
134
135##
136## END OF CONFIG
137##
138
139# Look for the first sequence of numbers
140RE_PHONE = re.compile('[^\d]*(\d*)[^\d]*')
141
142def convert_fieldnames(fieldnames):
143    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
144    """
145    header = dict([(name, name) for name in fieldnames])
146    for in_name, out_name in COLNAME_MAPPING.items():
147        if in_name not in header:
148            continue
149        header[in_name] = out_name
150    return header
151
152class Converters():
153    """Converters to turn old-style values into new ones.
154    """
155
156    @classmethod
157    def student_id(self, value):
158        """ 'A123456' --> 'EA123456'
159        """
160        if len(value) == 7:
161            return 'E' + value
162        return value
163
164    @classmethod
165    def reg_state(self, value):
166        """ 'courses_validated' --> 'courses validated'
167        """
168        return REGSTATE_MAPPING.get(value,value)
169
170    @classmethod
171    def level(self, value):
172        """ '000' --> '10'
173        """
174        try:
175            number = int(value)
176        except ValueError:
177            return 9999
178        if number == 0:
179            return 10
180        return number
181
182    @classmethod
183    def semester(self, value):
184        """ '0' --> '9'
185        """
186        try:
187            number = int(value)
188        except ValueError:
189            return 9999
190        if number == 0:
191            return 9
192        return number
193
194    @classmethod
195    def application_category(self, value):
196        """ '' --> 'no'
197        """
198        if value == '':
199            return 'no'
200        return value
201
202    @classmethod
203    def lga(self, value):
204        """ Remove apostrophe
205        """
206        if value == 'akwa_ibom_uru_offong_oruko':
207            return 'akwa_ibom_urue-offong-oruko'
208        try:
209            value = value.replace("'","")
210        except:
211            return ''
212        lower = value.lower()
213        if lower in LGAS_dict.keys():
214            return lower
215        # If real names are given, let's see if a similar value
216        # in LGAS exist.
217        value = LGAS_inverted_stripped.get(strip(lower), value)
218        return value
219
220
221    @classmethod
222    def session(self, value):
223        """ '08' --> '2008'
224        '2008/2009' --> '2008'
225        """
226        if '/' in value:
227            numbers = value.split('/')
228            number = int(numbers[0])
229            if number in range(2000,2015):
230                return number
231            else:
232                return 9999
233        try:
234            number = int(value)
235        except ValueError:
236            #import pdb; pdb.set_trace()
237            return 9999
238        if number < 14:
239            return number + 2000
240        elif number in range(2000,2015):
241            return number
242        else:
243            return 9999
244
245    @classmethod
246    def marit_stat(self, value):
247        """ 'True'/'False' --> 'married'/'unmarried'
248        """
249        if value in ('True','married'):
250            value = 'married'
251        elif value in ('False','unmarried'):
252            value = 'unmarried'
253        else:
254            value = ''
255        return value
256
257    @classmethod
258    def gender(self, value):
259        """ 'True'/'False' --> 'f'/'m'
260        """
261        if value.strip() in ('F', 'True','f'):
262            value = 'f'
263        elif value.strip() in ('M', 'False','m'):
264            value = 'm'
265        else:
266            value = ''
267        return value
268
269    @classmethod
270    def date(self, value):
271        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
272        """
273        if value == "None":
274            value = ""
275        elif value == "":
276            value = ""
277        else:
278            value = value.replace('/', '-')
279            # We add the hash symbol to avoid automatic date transformation
280            # in Excel and Calc for further processing
281            value += '#'
282        return value
283
284    @classmethod
285    def no_int(self, value):
286        """ Add hash.
287        """
288        # We add the hash symbol to avoid automatic number transformation
289        # in Excel and Calc for further processing
290        value += '#'
291        return value
292
293
294    @classmethod
295    def datetime(self, value):
296        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
297        """
298        #print  "IN: ", value
299        if value == "None":
300            value = ""
301        elif value == "":
302            value = ""
303        else:
304            #value = datetime.datetime.strptime(value, '%Y/%m/%d')
305            #value = datetime.datetime.strftime(value, '%Y-%m-%d')
306            pass
307        #print "OUT: ", value
308        return value
309
310    @classmethod
311    def mode(self, value):
312        if value == "transfer_fulltime":
313            return "transfer_ft"
314        if value == "ume_ft":
315            return "utme_ft"
316        return value
317
318    @classmethod
319    def password(self, value):
320        if value == "not set":
321            return ""
322        return value
323
324    @classmethod
325    def nationality(self, value):
326        if value == "nigeria":
327            return "NG"
328        return value
329
330    @classmethod
331    def phone(self, value):
332        """ '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
333
334        Dashes and slashes are removed before looking for sequences
335        of numbers.
336        """
337        value = value.replace('-', '')
338        value = value.replace('/', '')
339        match = RE_PHONE.match(value)
340        phone = match.groups()[0]
341        value = '-%s' % phone
342        return value
343
344
345def main():
346    input_file = '%s' % sys.argv[1]
347    output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
348    reader = csv.DictReader(open(input_file, 'rb'))
349    writer = None
350
351    for num, row in enumerate(reader):
352        if num == 0:
353            writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
354            print "FIELDS: "
355            for x, y in enumerate(reader.fieldnames):
356                print x, y
357            header = convert_fieldnames(reader.fieldnames)
358            writer.writerow(header)
359        for key, value in row.items():
360            if not key in OPTIONS.keys():
361                continue
362            conv_name = OPTIONS[key]
363            converter = getattr(Converters, conv_name, None)
364            if converter is None:
365                print "WARNING: cannot find converter %s" % conv_name
366                continue
367            row[key] = converter(row[key])
368        try:
369            writer.writerow(row)
370        except:
371            print row['student_id']
372
373    print "Output written to %s" % output_file
374
375
376if __name__ == '__main__':
377    if len(sys.argv) != 2:
378        print 'Usage: %s <filename>' % __file__
379        sys.exit(1)
380    main()
Note: See TracBrowser for help on using the repository browser.