source: main/waeup.sirp/trunk/tools/fix_import_file.py @ 7663

Last change on this file since 7663 was 7645, checked in by Henrik Bettermann, 13 years ago

Change matric_no column name.

  • Property svn:keywords set to Id
File size: 8.7 KB
RevLine 
[7512]1## $Id: fix_import_file.py 7645 2012-02-14 09:37:40Z henrik $
2##
[7518]3## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
[7512]4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""
[7518]19Fix exports from old SRP portal to make them importable by current portal.
[7512]20
21Usage:
22
23Change into this directory, set the options below (files are assumed
24to be in the same directory) and then run
25
[7518]26  python fix_import_file.py <filename>
[7512]27
28Errors/warnings will be displayed on the shell, the output will be put
29into the specified output file.
30"""
[7573]31import csv
32import datetime
[7575]33import re
[7573]34import sys
[7512]35
36##
37## CONFIGURATION SECTION
38##
39# keys are fieldnames in input file, values are methods of class
40# Converter (see below)
41OPTIONS = {
42    'sex': 'gender',
43    'birthday': 'date',
44    'request_date': 'datetime',
[7514]45    'marit_stat': 'marit_stat',
[7537]46    'session': 'session',
[7514]47    'entry_session': 'session',
48    'current_session': 'session',
[7602]49    'session_id': 'session',
[7632]50    'entry_mode': 'mode',
[7526]51    'reg_state': 'reg_state',
52    'password': 'password',
[7575]53    'phone': 'phone',
[7602]54    'level': 'level',
55    'start_level': 'level',
56    'end_level': 'level',
57    'level_id': 'level',
58    'current_level': 'level',
59    'semester': 'semester',
60    'application_category': 'application_category',
[7610]61    'lga': 'lga',
[7628]62    'order_id': 'no_int',
[7512]63    }
[7516]64
65# Mapping input file colnames --> output file colnames
66COLNAME_MAPPING = {
[7550]67    # base date
[7518]68    'jamb_reg_no': 'reg_number',
[7645]69    'matric_no': 'matric_number',
[7516]70    'birthday': 'date_of_birth',
[7526]71    'clr_ac_pin': 'clr_code',
[7550]72    # study course
[7530]73    'study_course': 'certificate',
[7550]74    # study level
[7537]75    'session': 'level_session',
76    'verdict': 'level_verdict',
[7550]77    # course ticket
78    'level_id': 'level',
[7628]79    # payment ticket
80    'order_id': 'p_id',
81    'status': 'p_state',
82    'category': 'p_category',
83    'resp_pay_reference': 'r_pay_reference',
84    'resp_desc': 'r_desc',
85    'resp_approved_amount': 'r_amount_approved',
86    'item': 'p_item',
87    'amount': 'amount_auth',
88    'resp_card_num': 'r_card_num',
89    'resp_code': 'r_code',
90    'date': 'creation_date',
91    'surcharge': 'surcharge_1',
92    'session_id': 'p_session',
[7516]93    }
[7526]94
95# Mapping input regh_state --> output reg_state
96REGSTATE_MAPPING = {
97    'student_created': 'created',
98    'admitted': 'admitted',
[7610]99    'objection_raised': 'clearance started',
[7526]100    'clearance_pin_entered': 'clearance started',
101    'clearance_requested': 'clearance requested',
102    'cleared_and_validated': 'cleared',
103    'school_fee_paid': 'school fee paid',
104    'returning': 'returning',
105    'courses_registered': 'courses registered',
106    'courses_validated': 'courses validated',
107    }
108
[7512]109##
110## END OF CONFIG
111##
112
[7575]113# Look for the first sequence of numbers
114RE_PHONE = re.compile('[^\d]*(\d*)[^\d]*')
115
[7516]116def convert_fieldnames(fieldnames):
117    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
118    """
119    header = dict([(name, name) for name in fieldnames])
120    for in_name, out_name in COLNAME_MAPPING.items():
121        if in_name not in header:
122            continue
123        header[in_name] = out_name
[7514]124    return header
125
[7512]126class Converters():
127    """Converters to turn old-style values into new ones.
128    """
129    @classmethod
[7526]130    def reg_state(self, value):
131        """ 'courses_validated' --> 'courses validated'
132        """
133        return REGSTATE_MAPPING.get(value,value)
134
135    @classmethod
[7602]136    def level(self, value):
137        """ '000' --> '10'
138        """
139        try:
140            number = int(value)
141        except ValueError:
142            return 9999
143        if number == 0:
144            return 10
145        return number
146
147    @classmethod
148    def semester(self, value):
149        """ '0' --> '9'
150        """
151        try:
152            number = int(value)
153        except ValueError:
154            return 9999
155        if number == 0:
156            return 9
157        return number
158
159    @classmethod
160    def application_category(self, value):
161        """ '' --> 'no'
162        """
163        if value == '':
164            return 'no'
165        return value
166
[7610]167    @classmethod
168    def lga(self, value):
169        """ Remove apostrophe
170        """
171        if value == 'akwa_ibom_uru_offong_oruko':
172            return 'akwa_ibom_urue-offong-oruko'
173        try:
174            value = value.replace("'","")
175            value = value.lower()
176        except:
177            return ''
178        return value
[7602]179
[7610]180
[7602]181    @classmethod
[7514]182    def session(self, value):
183        """ '08' --> '2008'
184        """
185        try:
186            number = int(value)
187        except ValueError:
[7602]188            #import pdb; pdb.set_trace()
[7514]189            return 9999
190        if number < 14:
191            return number + 2000
192        elif number in range(2000,2015):
193            return number
194        else:
195            return 9999
196
197    @classmethod
198    def marit_stat(self, value):
199        """ 'True'/'False' --> 'married'/'unmarried'
200        """
[7610]201        if value in ('True','married'):
[7514]202            value = 'married'
[7610]203        elif value in ('False','unmarried'):
[7514]204            value = 'unmarried'
205        else:
206            value = ''
207        return value
208
209    @classmethod
210    def gender(self, value):
[7526]211        """ 'True'/'False' --> 'f'/'m'
[7514]212        """
[7610]213        if value in ('True','f'):
[7526]214            value = 'f'
[7610]215        elif value in ('False','m'):
[7526]216            value = 'm'
[7514]217        else:
218            value = ''
219        return value
220
221    @classmethod
[7512]222    def date(self, value):
223        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
224        """
225        if value == "None":
226            value = ""
227        elif value == "":
228            value = ""
229        else:
230            value = value.replace('/', '-')
[7514]231            # We add the hash symbol to avoid automatic date transformation
232            # in Excel and Calc for further processing
233            value += '#'
[7512]234        return value
235
236    @classmethod
[7628]237    def no_int(self, value):
238        """ Add hash.
239        """
240        # We add the hash symbol to avoid automatic number transformation
241        # in Excel and Calc for further processing
242        value += '#'
243        return value
244
245
246    @classmethod
[7512]247    def datetime(self, value):
248        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
249        """
250        #print  "IN: ", value
251        if value == "None":
252            value = ""
253        elif value == "":
254            value = ""
255        else:
256            #value = datetime.datetime.strptime(value, '%Y/%m/%d')
257            #value = datetime.datetime.strftime(value, '%Y-%m-%d')
258            pass
259        #print "OUT: ", value
260        return value
261
[7526]262    @classmethod
[7632]263    def mode(self, value):
264        if value == "transfer_fulltime":
265            return "transfer_ft"
266        return value
267
268    @classmethod
[7526]269    def password(self, value):
270        if value == "not set":
271            return ""
272        return value
273
[7575]274    @classmethod
275    def phone(self, value):
276        """ '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
[7526]277
[7575]278        Dashes and slashes are removed before looking for sequences
279        of numbers.
280        """
281        value = value.replace('-', '')
282        value = value.replace('/', '')
283        match = RE_PHONE.match(value)
284        phone = match.groups()[0]
285        value = '--%s' % phone
286        return value
287
288
[7572]289def main():
[7573]290    input_file = '%s' % sys.argv[1]
291    output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
292    reader = csv.DictReader(open(input_file, 'rb'))
[7572]293    writer = None
[7512]294
[7572]295    for num, row in enumerate(reader):
296        if num == 0:
[7573]297            writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
[7572]298            print "FIELDS: "
299            for x, y in enumerate(reader.fieldnames):
300                print x, y
301            header = convert_fieldnames(reader.fieldnames)
302            writer.writerow(header)
303        for key, value in row.items():
304            if not key in OPTIONS.keys():
305                continue
306            conv_name = OPTIONS[key]
307            converter = getattr(Converters, conv_name, None)
308            if converter is None:
309                print "WARNING: cannot find converter %s" % conv_name
310                continue
311            row[key] = converter(row[key])
[7602]312        try:
313            writer.writerow(row)
314        except:
315            print row['student_id']
[7512]316
[7573]317    print "Output written to %s" % output_file
[7572]318
319
320if __name__ == '__main__':
321    if len(sys.argv) != 2:
322        print 'Usage: %s <filename>' % __file__
323        sys.exit(1)
324    main()
Note: See TracBrowser for help on using the repository browser.