source: main/waeup.kofa/branches/uli-zc-async/tools/fix_import_file.py @ 9957

Last change on this file since 9957 was 9211, checked in by uli, 12 years ago

Rollback r9209. Looks like multiple merges from trunk confuse svn when merging back into trunk.

  • Property svn:keywords set to Id
File size: 10.1 KB
RevLine 
[7512]1## $Id: fix_import_file.py 9211 2012-09-21 08:19:35Z uli $
2##
[7518]3## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
[7512]4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""
[8781]19Fix exports from old SRP portal and other data sources to make
20them importable by current portal.
[7512]21
22Usage:
23
24Change into this directory, set the options below (files are assumed
25to be in the same directory) and then run
26
[7518]27  python fix_import_file.py <filename>
[7512]28
29Errors/warnings will be displayed on the shell, the output will be put
30into the specified output file.
[8781]31
32
33The lgas.py module must be copied into the same folder where this script
34is started.
[7512]35"""
[7573]36import csv
37import datetime
[7575]38import re
[7573]39import sys
[7512]40
[8781]41try:
42    from lgas import LGAS
43except:
44    print 'ERROR: lgas.py is missing.'
45    sys.exit(1)
46
47def strip(string):
48    string = string.replace('_', '')
49    string = string.replace('/', '')
50    string = string.replace('-', '')
51    string = string.replace(' ', '')
52    string = string.lower()
53    return string
54
55LGAS_inverted_stripped = dict([(strip(i[1]), i[0]) for i in LGAS])
56LGAS_dict = dict(LGAS)
57
[7512]58##
59## CONFIGURATION SECTION
60##
61# keys are fieldnames in input file, values are methods of class
62# Converter (see below)
63OPTIONS = {
[8478]64    'student_id': 'student_id',
[7512]65    'sex': 'gender',
66    'birthday': 'date',
[9211]67    'request_date': 'datetime',
[7514]68    'marit_stat': 'marit_stat',
[7537]69    'session': 'session',
[7514]70    'entry_session': 'session',
71    'current_session': 'session',
[7602]72    'session_id': 'session',
[7632]73    'entry_mode': 'mode',
[7526]74    'reg_state': 'reg_state',
75    'password': 'password',
[7575]76    'phone': 'phone',
[8478]77    'nationality': 'nationality',
[7602]78    'level': 'level',
79    'start_level': 'level',
80    'end_level': 'level',
81    'level_id': 'level',
82    'current_level': 'level',
83    'semester': 'semester',
84    'application_category': 'application_category',
[7610]85    'lga': 'lga',
[7628]86    'order_id': 'no_int',
[7512]87    }
[7516]88
89# Mapping input file colnames --> output file colnames
90COLNAME_MAPPING = {
[7666]91    # base data
[8478]92    'reg_state': 'state',
[7518]93    'jamb_reg_no': 'reg_number',
[7645]94    'matric_no': 'matric_number',
[7516]95    'birthday': 'date_of_birth',
[7526]96    'clr_ac_pin': 'clr_code',
[7550]97    # study course
[7530]98    'study_course': 'certificate',
[7550]99    # study level
[7537]100    'session': 'level_session',
101    'verdict': 'level_verdict',
[7550]102    # course ticket
103    'level_id': 'level',
[7666]104    'core_or_elective': 'mandatory',
[7628]105    # payment ticket
106    'order_id': 'p_id',
107    'status': 'p_state',
108    'category': 'p_category',
109    'resp_pay_reference': 'r_pay_reference',
110    'resp_desc': 'r_desc',
111    'resp_approved_amount': 'r_amount_approved',
112    'item': 'p_item',
113    'amount': 'amount_auth',
114    'resp_card_num': 'r_card_num',
115    'resp_code': 'r_code',
116    'date': 'creation_date',
117    'surcharge': 'surcharge_1',
118    'session_id': 'p_session',
[7516]119    }
[7526]120
[8478]121# Mapping input state --> output state
[7526]122REGSTATE_MAPPING = {
123    'student_created': 'created',
124    'admitted': 'admitted',
[7610]125    'objection_raised': 'clearance started',
[7526]126    'clearance_pin_entered': 'clearance started',
127    'clearance_requested': 'clearance requested',
128    'cleared_and_validated': 'cleared',
129    'school_fee_paid': 'school fee paid',
130    'returning': 'returning',
131    'courses_registered': 'courses registered',
132    'courses_validated': 'courses validated',
133    }
134
[7512]135##
136## END OF CONFIG
137##
138
[7575]139# Look for the first sequence of numbers
140RE_PHONE = re.compile('[^\d]*(\d*)[^\d]*')
141
[7516]142def convert_fieldnames(fieldnames):
143    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
144    """
[9211]145    header = dict([(name, name) for name in fieldnames])
[7516]146    for in_name, out_name in COLNAME_MAPPING.items():
[9211]147        if in_name not in header:
[7516]148            continue
[9211]149        header[in_name] = out_name
[7514]150    return header
151
[7512]152class Converters():
153    """Converters to turn old-style values into new ones.
154    """
[8478]155
[7512]156    @classmethod
[9211]157    def student_id(self, value):
[8478]158        """ 'A123456' --> 'EA123456'
159        """
160        if len(value) == 7:
[9211]161            return 'E' + value
[8478]162        return value
163
164    @classmethod
[9211]165    def reg_state(self, value):
[7526]166        """ 'courses_validated' --> 'courses validated'
167        """
168        return REGSTATE_MAPPING.get(value,value)
169
170    @classmethod
[9211]171    def level(self, value):
[7602]172        """ '000' --> '10'
173        """
174        try:
175            number = int(value)
176        except ValueError:
177            return 9999
178        if number == 0:
179            return 10
180        return number
181
182    @classmethod
[9211]183    def semester(self, value):
[7602]184        """ '0' --> '9'
185        """
186        try:
187            number = int(value)
188        except ValueError:
189            return 9999
190        if number == 0:
191            return 9
192        return number
193
194    @classmethod
[9211]195    def application_category(self, value):
[7602]196        """ '' --> 'no'
197        """
198        if value == '':
199            return 'no'
200        return value
201
[7610]202    @classmethod
[9211]203    def lga(self, value):
[7610]204        """ Remove apostrophe
205        """
206        if value == 'akwa_ibom_uru_offong_oruko':
207            return 'akwa_ibom_urue-offong-oruko'
208        try:
209            value = value.replace("'","")
210        except:
211            return ''
[8784]212        lower = value.lower()
213        if lower in LGAS_dict.keys():
214            return lower
[8781]215        # If real names are given, let's see if a similar value
216        # in LGAS exist.
[8784]217        value = LGAS_inverted_stripped.get(strip(lower), value)
[7610]218        return value
[7602]219
[7610]220
[7602]221    @classmethod
[9211]222    def session(self, value):
[7514]223        """ '08' --> '2008'
[8478]224        '2008/2009' --> '2008'
[7514]225        """
[8478]226        if '/' in value:
227            numbers = value.split('/')
228            number = int(numbers[0])
229            if number in range(2000,2015):
230                return number
231            else:
232                return 9999
[7514]233        try:
234            number = int(value)
235        except ValueError:
[7602]236            #import pdb; pdb.set_trace()
[7514]237            return 9999
238        if number < 14:
239            return number + 2000
240        elif number in range(2000,2015):
241            return number
242        else:
243            return 9999
244
245    @classmethod
[9211]246    def marit_stat(self, value):
[7514]247        """ 'True'/'False' --> 'married'/'unmarried'
248        """
[7610]249        if value in ('True','married'):
[7514]250            value = 'married'
[7610]251        elif value in ('False','unmarried'):
[7514]252            value = 'unmarried'
253        else:
254            value = ''
255        return value
256
257    @classmethod
[9211]258    def gender(self, value):
[7526]259        """ 'True'/'False' --> 'f'/'m'
[7514]260        """
[8785]261        if value.strip() in ('F', 'True','f'):
[7526]262            value = 'f'
[8785]263        elif value.strip() in ('M', 'False','m'):
[7526]264            value = 'm'
[7514]265        else:
266            value = ''
267        return value
268
269    @classmethod
[9211]270    def date(self, value):
[7512]271        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
272        """
273        if value == "None":
274            value = ""
275        elif value == "":
276            value = ""
277        else:
278            value = value.replace('/', '-')
[7514]279            # We add the hash symbol to avoid automatic date transformation
280            # in Excel and Calc for further processing
281            value += '#'
[7512]282        return value
283
284    @classmethod
[9211]285    def no_int(self, value):
286        """ Add hash.
[7628]287        """
288        # We add the hash symbol to avoid automatic number transformation
289        # in Excel and Calc for further processing
[9211]290        value += '#'
291        return value
292
293
294    @classmethod
295    def datetime(self, value):
296        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
297        """
298        #print  "IN: ", value
299        if value == "None":
300            value = ""
301        elif value == "":
302            value = ""
303        else:
304            #value = datetime.datetime.strptime(value, '%Y/%m/%d')
305            #value = datetime.datetime.strftime(value, '%Y-%m-%d')
[7512]306            pass
[9211]307        #print "OUT: ", value
[7512]308        return value
309
[7526]310    @classmethod
[9211]311    def mode(self, value):
[7632]312        if value == "transfer_fulltime":
313            return "transfer_ft"
[8522]314        if value == "ume_ft":
315            return "utme_ft"
[7632]316        return value
317
318    @classmethod
[9211]319    def password(self, value):
[7526]320        if value == "not set":
321            return ""
322        return value
323
[7575]324    @classmethod
[9211]325    def nationality(self, value):
[8478]326        if value == "nigeria":
327            return "NG"
328        return value
329
330    @classmethod
[9211]331    def phone(self, value):
[7575]332        """ '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
[7526]333
[7575]334        Dashes and slashes are removed before looking for sequences
335        of numbers.
336        """
337        value = value.replace('-', '')
338        value = value.replace('/', '')
339        match = RE_PHONE.match(value)
340        phone = match.groups()[0]
[9211]341        value = '-%s' % phone
342        return value
[7575]343
344
[7572]345def main():
[7573]346    input_file = '%s' % sys.argv[1]
347    output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
348    reader = csv.DictReader(open(input_file, 'rb'))
[7572]349    writer = None
[7512]350
[7572]351    for num, row in enumerate(reader):
352        if num == 0:
[7573]353            writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
[7572]354            print "FIELDS: "
355            for x, y in enumerate(reader.fieldnames):
356                print x, y
357            header = convert_fieldnames(reader.fieldnames)
358            writer.writerow(header)
359        for key, value in row.items():
360            if not key in OPTIONS.keys():
361                continue
362            conv_name = OPTIONS[key]
363            converter = getattr(Converters, conv_name, None)
364            if converter is None:
365                print "WARNING: cannot find converter %s" % conv_name
366                continue
[9211]367            row[key] = converter(row[key])
[7602]368        try:
369            writer.writerow(row)
370        except:
371            print row['student_id']
[7512]372
[7573]373    print "Output written to %s" % output_file
[7572]374
375
376if __name__ == '__main__':
377    if len(sys.argv) != 2:
378        print 'Usage: %s <filename>' % __file__
379        sys.exit(1)
380    main()
Note: See TracBrowser for help on using the repository browser.