source: main/waeup.kofa/trunk/tools/fix_import_file.py @ 8889

Last change on this file since 8889 was 8785, checked in by Henrik Bettermann, 13 years ago

Improve gender conversion.

  • Property svn:keywords set to Id
File size: 10.1 KB
RevLine 
[7512]1## $Id: fix_import_file.py 8785 2012-06-24 08:34:52Z henrik $
2##
[7518]3## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
[7512]4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""
[8781]19Fix exports from old SRP portal and other data sources to make
20them importable by current portal.
[7512]21
22Usage:
23
24Change into this directory, set the options below (files are assumed
25to be in the same directory) and then run
26
[7518]27  python fix_import_file.py <filename>
[7512]28
29Errors/warnings will be displayed on the shell, the output will be put
30into the specified output file.
[8781]31
32
33The lgas.py module must be copied into the same folder where this script
34is started.
[7512]35"""
[7573]36import csv
37import datetime
[7575]38import re
[7573]39import sys
[7512]40
[8781]41try:
42    from lgas import LGAS
43except:
44    print 'ERROR: lgas.py is missing.'
45    sys.exit(1)
46
47def strip(string):
48    string = string.replace('_', '')
49    string = string.replace('/', '')
50    string = string.replace('-', '')
51    string = string.replace(' ', '')
52    string = string.lower()
53    return string
54
55LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
56LGAS_dict = dict(LGAS)
57
[7512]58##
59## CONFIGURATION SECTION
60##
61# keys are fieldnames in input file, values are methods of class
62# Converter (see below)
63OPTIONS = {
[8478]64    'student_id': 'student_id',
[7512]65    'sex': 'gender',
66    'birthday': 'date',
67    'request_date': 'datetime',
[7514]68    'marit_stat': 'marit_stat',
[7537]69    'session': 'session',
[7514]70    'entry_session': 'session',
71    'current_session': 'session',
[7602]72    'session_id': 'session',
[7632]73    'entry_mode': 'mode',
[7526]74    'reg_state': 'reg_state',
75    'password': 'password',
[7575]76    'phone': 'phone',
[8478]77    'nationality': 'nationality',
[7602]78    'level': 'level',
79    'start_level': 'level',
80    'end_level': 'level',
81    'level_id': 'level',
82    'current_level': 'level',
83    'semester': 'semester',
84    'application_category': 'application_category',
[7610]85    'lga': 'lga',
[7628]86    'order_id': 'no_int',
[7512]87    }
[7516]88
89# Mapping input file colnames --> output file colnames
90COLNAME_MAPPING = {
[7666]91    # base data
[8478]92    'reg_state': 'state',
[7518]93    'jamb_reg_no': 'reg_number',
[7645]94    'matric_no': 'matric_number',
[7516]95    'birthday': 'date_of_birth',
[7526]96    'clr_ac_pin': 'clr_code',
[7550]97    # study course
[7530]98    'study_course': 'certificate',
[7550]99    # study level
[7537]100    'session': 'level_session',
101    'verdict': 'level_verdict',
[7550]102    # course ticket
103    'level_id': 'level',
[7666]104    'core_or_elective': 'mandatory',
[7628]105    # payment ticket
106    'order_id': 'p_id',
107    'status': 'p_state',
108    'category': 'p_category',
109    'resp_pay_reference': 'r_pay_reference',
110    'resp_desc': 'r_desc',
111    'resp_approved_amount': 'r_amount_approved',
112    'item': 'p_item',
113    'amount': 'amount_auth',
114    'resp_card_num': 'r_card_num',
115    'resp_code': 'r_code',
116    'date': 'creation_date',
117    'surcharge': 'surcharge_1',
118    'session_id': 'p_session',
[7516]119    }
[7526]120
[8478]121# Mapping input state --> output state
[7526]122REGSTATE_MAPPING = {
123    'student_created': 'created',
124    'admitted': 'admitted',
[7610]125    'objection_raised': 'clearance started',
[7526]126    'clearance_pin_entered': 'clearance started',
127    'clearance_requested': 'clearance requested',
128    'cleared_and_validated': 'cleared',
129    'school_fee_paid': 'school fee paid',
130    'returning': 'returning',
131    'courses_registered': 'courses registered',
132    'courses_validated': 'courses validated',
133    }
134
[7512]135##
136## END OF CONFIG
137##
138
[7575]139# Look for the first sequence of numbers
140RE_PHONE = re.compile('[^\d]*(\d*)[^\d]*')
141
[7516]142def convert_fieldnames(fieldnames):
143    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
144    """
145    header = dict([(name, name) for name in fieldnames])
146    for in_name, out_name in COLNAME_MAPPING.items():
147        if in_name not in header:
148            continue
149        header[in_name] = out_name
[7514]150    return header
151
[7512]152class Converters():
153    """Converters to turn old-style values into new ones.
154    """
[8478]155
[7512]156    @classmethod
[8478]157    def student_id(self, value):
158        """ 'A123456' --> 'EA123456'
159        """
160        if len(value) == 7:
161            return 'E' + value
162        return value
163
164    @classmethod
[7526]165    def reg_state(self, value):
166        """ 'courses_validated' --> 'courses validated'
167        """
168        return REGSTATE_MAPPING.get(value,value)
169
170    @classmethod
[7602]171    def level(self, value):
172        """ '000' --> '10'
173        """
174        try:
175            number = int(value)
176        except ValueError:
177            return 9999
178        if number == 0:
179            return 10
180        return number
181
182    @classmethod
183    def semester(self, value):
184        """ '0' --> '9'
185        """
186        try:
187            number = int(value)
188        except ValueError:
189            return 9999
190        if number == 0:
191            return 9
192        return number
193
194    @classmethod
195    def application_category(self, value):
196        """ '' --> 'no'
197        """
198        if value == '':
199            return 'no'
200        return value
201
[7610]202    @classmethod
203    def lga(self, value):
204        """ Remove apostrophe
205        """
206        if value == 'akwa_ibom_uru_offong_oruko':
207            return 'akwa_ibom_urue-offong-oruko'
208        try:
209            value = value.replace("'","")
210        except:
211            return ''
[8784]212        lower = value.lower()
213        if lower in LGAS_dict.keys():
214            return lower
[8781]215        # If real names are given, let's see if a similar value
216        # in LGAS exist.
[8784]217        value = LGAS_inverted_stripped.get(strip(lower), value)
[7610]218        return value
[7602]219
[7610]220
[7602]221    @classmethod
[7514]222    def session(self, value):
223        """ '08' --> '2008'
[8478]224        '2008/2009' --> '2008'
[7514]225        """
[8478]226        if '/' in value:
227            numbers = value.split('/')
228            number = int(numbers[0])
229            if number in range(2000,2015):
230                return number
231            else:
232                return 9999
[7514]233        try:
234            number = int(value)
235        except ValueError:
[7602]236            #import pdb; pdb.set_trace()
[7514]237            return 9999
238        if number < 14:
239            return number + 2000
240        elif number in range(2000,2015):
241            return number
242        else:
243            return 9999
244
245    @classmethod
246    def marit_stat(self, value):
247        """ 'True'/'False' --> 'married'/'unmarried'
248        """
[7610]249        if value in ('True','married'):
[7514]250            value = 'married'
[7610]251        elif value in ('False','unmarried'):
[7514]252            value = 'unmarried'
253        else:
254            value = ''
255        return value
256
257    @classmethod
258    def gender(self, value):
[7526]259        """ 'True'/'False' --> 'f'/'m'
[7514]260        """
[8785]261        if value.strip() in ('F', 'True','f'):
[7526]262            value = 'f'
[8785]263        elif value.strip() in ('M', 'False','m'):
[7526]264            value = 'm'
[7514]265        else:
266            value = ''
267        return value
268
269    @classmethod
[7512]270    def date(self, value):
271        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
272        """
273        if value == "None":
274            value = ""
275        elif value == "":
276            value = ""
277        else:
278            value = value.replace('/', '-')
[7514]279            # We add the hash symbol to avoid automatic date transformation
280            # in Excel and Calc for further processing
281            value += '#'
[7512]282        return value
283
284    @classmethod
[7628]285    def no_int(self, value):
286        """ Add hash.
287        """
288        # We add the hash symbol to avoid automatic number transformation
289        # in Excel and Calc for further processing
290        value += '#'
291        return value
292
293
294    @classmethod
[7512]295    def datetime(self, value):
296        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
297        """
298        #print  "IN: ", value
299        if value == "None":
300            value = ""
301        elif value == "":
302            value = ""
303        else:
304            #value = datetime.datetime.strptime(value, '%Y/%m/%d')
305            #value = datetime.datetime.strftime(value, '%Y-%m-%d')
306            pass
307        #print "OUT: ", value
308        return value
309
[7526]310    @classmethod
[7632]311    def mode(self, value):
312        if value == "transfer_fulltime":
313            return "transfer_ft"
[8522]314        if value == "ume_ft":
315            return "utme_ft"
[7632]316        return value
317
318    @classmethod
[7526]319    def password(self, value):
320        if value == "not set":
321            return ""
322        return value
323
[7575]324    @classmethod
[8478]325    def nationality(self, value):
326        if value == "nigeria":
327            return "NG"
328        return value
329
330    @classmethod
[7575]331    def phone(self, value):
332        """ '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
[7526]333
[7575]334        Dashes and slashes are removed before looking for sequences
335        of numbers.
336        """
337        value = value.replace('-', '')
338        value = value.replace('/', '')
339        match = RE_PHONE.match(value)
340        phone = match.groups()[0]
[8478]341        value = '-%s' % phone
[7575]342        return value
343
344
[7572]345def main():
[7573]346    input_file = '%s' % sys.argv[1]
347    output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
348    reader = csv.DictReader(open(input_file, 'rb'))
[7572]349    writer = None
[7512]350
[7572]351    for num, row in enumerate(reader):
352        if num == 0:
[7573]353            writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
[7572]354            print "FIELDS: "
355            for x, y in enumerate(reader.fieldnames):
356                print x, y
357            header = convert_fieldnames(reader.fieldnames)
358            writer.writerow(header)
359        for key, value in row.items():
360            if not key in OPTIONS.keys():
361                continue
362            conv_name = OPTIONS[key]
363            converter = getattr(Converters, conv_name, None)
364            if converter is None:
365                print "WARNING: cannot find converter %s" % conv_name
366                continue
367            row[key] = converter(row[key])
[7602]368        try:
369            writer.writerow(row)
370        except:
371            print row['student_id']
[7512]372
[7573]373    print "Output written to %s" % output_file
[7572]374
375
376if __name__ == '__main__':
377    if len(sys.argv) != 2:
378        print 'Usage: %s <filename>' % __file__
379        sys.exit(1)
380    main()
Note: See TracBrowser for help on using the repository browser.