source: main/waeup.sirp/trunk/tools/fix_import_file.py @ 7618

Last change on this file since 7618 was 7610, checked in by Henrik Bettermann, 13 years ago

Add lga converter.

  • Property svn:keywords set to Id
File size: 7.8 KB
RevLine 
[7512]1## $Id: fix_import_file.py 7610 2012-02-08 13:46:35Z henrik $
2##
[7518]3## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
[7512]4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""
[7518]19Fix exports from old SRP portal to make them importable by current portal.
[7512]20
21Usage:
22
23Change into this directory, set the options below (files are assumed
24to be in the same directory) and then run
25
[7518]26  python fix_import_file.py <filename>
[7512]27
28Errors/warnings will be displayed on the shell, the output will be put
29into the specified output file.
30"""
[7573]31import csv
32import datetime
[7575]33import re
[7573]34import sys
[7512]35
36##
37## CONFIGURATION SECTION
38##
39# keys are fieldnames in input file, values are methods of class
40# Converter (see below)
41OPTIONS = {
42    'sex': 'gender',
43    'birthday': 'date',
44    'request_date': 'datetime',
[7514]45    'marit_stat': 'marit_stat',
[7537]46    'session': 'session',
[7514]47    'entry_session': 'session',
48    'current_session': 'session',
[7602]49    'session_id': 'session',
[7526]50    'reg_state': 'reg_state',
51    'password': 'password',
[7575]52    'phone': 'phone',
[7602]53    'level': 'level',
54    'start_level': 'level',
55    'end_level': 'level',
56    'level_id': 'level',
57    'current_level': 'level',
58    'semester': 'semester',
59    'application_category': 'application_category',
[7610]60    'lga': 'lga',
[7512]61    }
[7516]62
63# Mapping input file colnames --> output file colnames
64COLNAME_MAPPING = {
[7550]65    # base date
[7518]66    'jamb_reg_no': 'reg_number',
[7516]67    'birthday': 'date_of_birth',
[7526]68    'clr_ac_pin': 'clr_code',
[7550]69    # study course
[7530]70    'study_course': 'certificate',
[7550]71    # study level
[7537]72    'session': 'level_session',
73    'verdict': 'level_verdict',
[7550]74    # course ticket
75    'level_id': 'level',
[7516]76    }
[7526]77
78# Mapping input regh_state --> output reg_state
79REGSTATE_MAPPING = {
80    'student_created': 'created',
81    'admitted': 'admitted',
[7610]82    'objection_raised': 'clearance started',
[7526]83    'clearance_pin_entered': 'clearance started',
84    'clearance_requested': 'clearance requested',
85    'cleared_and_validated': 'cleared',
86    'school_fee_paid': 'school fee paid',
87    'returning': 'returning',
88    'courses_registered': 'courses registered',
89    'courses_validated': 'courses validated',
90    }
91
[7512]92##
93## END OF CONFIG
94##
95
[7575]96# Look for the first sequence of numbers
97RE_PHONE = re.compile('[^\d]*(\d*)[^\d]*')
98
[7516]99def convert_fieldnames(fieldnames):
100    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
101    """
102    header = dict([(name, name) for name in fieldnames])
103    for in_name, out_name in COLNAME_MAPPING.items():
104        if in_name not in header:
105            continue
106        header[in_name] = out_name
[7514]107    return header
108
[7512]109class Converters():
110    """Converters to turn old-style values into new ones.
111    """
112    @classmethod
[7526]113    def reg_state(self, value):
114        """ 'courses_validated' --> 'courses validated'
115        """
116        return REGSTATE_MAPPING.get(value,value)
117
118    @classmethod
[7602]119    def level(self, value):
120        """ '000' --> '10'
121        """
122        try:
123            number = int(value)
124        except ValueError:
125            return 9999
126        if number == 0:
127            return 10
128        return number
129
130    @classmethod
131    def semester(self, value):
132        """ '0' --> '9'
133        """
134        try:
135            number = int(value)
136        except ValueError:
137            return 9999
138        if number == 0:
139            return 9
140        return number
141
142    @classmethod
143    def application_category(self, value):
144        """ '' --> 'no'
145        """
146        if value == '':
147            return 'no'
148        return value
149
[7610]150    @classmethod
151    def lga(self, value):
152        """ Remove apostrophe
153        """
154        if value == 'akwa_ibom_uru_offong_oruko':
155            return 'akwa_ibom_urue-offong-oruko'
156        try:
157            value = value.replace("'","")
158            value = value.lower()
159        except:
160            return ''
161        return value
[7602]162
[7610]163
[7602]164    @classmethod
[7514]165    def session(self, value):
166        """ '08' --> '2008'
167        """
168        try:
169            number = int(value)
170        except ValueError:
[7602]171            #import pdb; pdb.set_trace()
[7514]172            return 9999
173        if number < 14:
174            return number + 2000
175        elif number in range(2000,2015):
176            return number
177        else:
178            return 9999
179
180    @classmethod
181    def marit_stat(self, value):
182        """ 'True'/'False' --> 'married'/'unmarried'
183        """
[7610]184        if value in ('True','married'):
[7514]185            value = 'married'
[7610]186        elif value in ('False','unmarried'):
[7514]187            value = 'unmarried'
188        else:
189            value = ''
190        return value
191
192    @classmethod
193    def gender(self, value):
[7526]194        """ 'True'/'False' --> 'f'/'m'
[7514]195        """
[7610]196        if value in ('True','f'):
[7526]197            value = 'f'
[7610]198        elif value in ('False','m'):
[7526]199            value = 'm'
[7514]200        else:
201            value = ''
202        return value
203
204    @classmethod
[7512]205    def date(self, value):
206        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
207        """
208        if value == "None":
209            value = ""
210        elif value == "":
211            value = ""
212        else:
213            value = value.replace('/', '-')
[7514]214            # We add the hash symbol to avoid automatic date transformation
215            # in Excel and Calc for further processing
216            value += '#'
[7512]217        return value
218
219    @classmethod
220    def datetime(self, value):
221        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
222        """
223        #print  "IN: ", value
224        if value == "None":
225            value = ""
226        elif value == "":
227            value = ""
228        else:
229            #value = datetime.datetime.strptime(value, '%Y/%m/%d')
230            #value = datetime.datetime.strftime(value, '%Y-%m-%d')
231            pass
232        #print "OUT: ", value
233        return value
234
[7526]235    @classmethod
236    def password(self, value):
237        if value == "not set":
238            return ""
239        return value
240
[7575]241    @classmethod
242    def phone(self, value):
243        """ '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
[7526]244
[7575]245        Dashes and slashes are removed before looking for sequences
246        of numbers.
247        """
248        value = value.replace('-', '')
249        value = value.replace('/', '')
250        match = RE_PHONE.match(value)
251        phone = match.groups()[0]
252        value = '--%s' % phone
253        return value
254
255
[7572]256def main():
[7573]257    input_file = '%s' % sys.argv[1]
258    output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
259    reader = csv.DictReader(open(input_file, 'rb'))
[7572]260    writer = None
[7512]261
[7572]262    for num, row in enumerate(reader):
263        if num == 0:
[7573]264            writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
[7572]265            print "FIELDS: "
266            for x, y in enumerate(reader.fieldnames):
267                print x, y
268            header = convert_fieldnames(reader.fieldnames)
269            writer.writerow(header)
270        for key, value in row.items():
271            if not key in OPTIONS.keys():
272                continue
273            conv_name = OPTIONS[key]
274            converter = getattr(Converters, conv_name, None)
275            if converter is None:
276                print "WARNING: cannot find converter %s" % conv_name
277                continue
278            row[key] = converter(row[key])
[7602]279        try:
280            writer.writerow(row)
281        except:
282            print row['student_id']
[7512]283
[7573]284    print "Output written to %s" % output_file
[7572]285
286
287if __name__ == '__main__':
288    if len(sys.argv) != 2:
289        print 'Usage: %s <filename>' % __file__
290        sys.exit(1)
291    main()
Note: See TracBrowser for help on using the repository browser.