source: main/waeup.sirp/trunk/tools/fix_import_file.py @ 7585

Last change on this file since 7585 was 7575, checked in by uli, 13 years ago

First silly phone-number converter.

  • Property svn:keywords set to Id
File size: 6.4 KB
RevLine 
[7512]1## $Id: fix_import_file.py 7575 2012-02-02 23:03:46Z uli $
2##
[7518]3## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
[7512]4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""
[7518]19Fix exports from old SRP portal to make them importable by current portal.
[7512]20
21Usage:
22
23Change into this directory, set the options below (files are assumed
24to be in the same directory) and then run
25
[7518]26  python fix_import_file.py <filename>
[7512]27
28Errors/warnings will be displayed on the shell, the output will be put
29into the specified output file.
30"""
[7573]31import csv
32import datetime
[7575]33import re
[7573]34import sys
[7512]35
36##
37## CONFIGURATION SECTION
38##
39# keys are fieldnames in input file, values are methods of class
40# Converter (see below)
41OPTIONS = {
42    'sex': 'gender',
43    'birthday': 'date',
44    'request_date': 'datetime',
[7514]45    'marit_stat': 'marit_stat',
[7537]46    'session': 'session',
[7514]47    'entry_session': 'session',
48    'current_session': 'session',
[7526]49    'reg_state': 'reg_state',
50    'password': 'password',
[7575]51    'phone': 'phone',
[7512]52    }
[7516]53
54# Mapping input file colnames --> output file colnames
55COLNAME_MAPPING = {
[7550]56    # base date
[7518]57    'jamb_reg_no': 'reg_number',
[7516]58    'birthday': 'date_of_birth',
[7526]59    'clr_ac_pin': 'clr_code',
[7550]60    # study course
[7530]61    'study_course': 'certificate',
[7550]62    # study level
[7537]63    'session': 'level_session',
64    'verdict': 'level_verdict',
[7550]65    # course ticket
66    'level_id': 'level',
[7516]67    }
[7526]68
69# Mapping input regh_state --> output reg_state
70REGSTATE_MAPPING = {
71    'student_created': 'created',
72    'admitted': 'admitted',
73    'clearance_pin_entered': 'clearance started',
74    'clearance_requested': 'clearance requested',
75    'cleared_and_validated': 'cleared',
76    'school_fee_paid': 'school fee paid',
77    'returning': 'returning',
78    'courses_registered': 'courses registered',
79    'courses_validated': 'courses validated',
80    }
81
[7512]82##
83## END OF CONFIG
84##
85
[7575]86# Look for the first sequence of numbers
87RE_PHONE = re.compile('[^\d]*(\d*)[^\d]*')
88
[7516]89def convert_fieldnames(fieldnames):
90    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
91    """
92    header = dict([(name, name) for name in fieldnames])
93    for in_name, out_name in COLNAME_MAPPING.items():
94        if in_name not in header:
95            continue
96        header[in_name] = out_name
[7514]97    return header
98
[7512]99class Converters():
100    """Converters to turn old-style values into new ones.
101    """
102    @classmethod
[7526]103    def reg_state(self, value):
104        """ 'courses_validated' --> 'courses validated'
105        """
106        return REGSTATE_MAPPING.get(value,value)
107
108    @classmethod
[7514]109    def session(self, value):
110        """ '08' --> '2008'
111        """
112        try:
113            number = int(value)
114        except ValueError:
115            return 9999
116        if number < 14:
117            return number + 2000
118        elif number in range(2000,2015):
119            return number
120        else:
121            return 9999
122
123    @classmethod
124    def marit_stat(self, value):
125        """ 'True'/'False' --> 'married'/'unmarried'
126        """
127        if value == 'True':
128            value = 'married'
129        elif value == 'False':
130            value = 'unmarried'
131        else:
132            value = ''
133        return value
134
135    @classmethod
136    def gender(self, value):
[7526]137        """ 'True'/'False' --> 'f'/'m'
[7514]138        """
139        if value == 'True':
[7526]140            value = 'f'
[7514]141        elif value == 'False':
[7526]142            value = 'm'
[7514]143        else:
144            value = ''
145        return value
146
147    @classmethod
[7512]148    def date(self, value):
149        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
150        """
151        if value == "None":
152            value = ""
153        elif value == "":
154            value = ""
155        else:
156            value = value.replace('/', '-')
[7514]157            # We add the hash symbol to avoid automatic date transformation
158            # in Excel and Calc for further processing
159            value += '#'
[7512]160        return value
161
162    @classmethod
163    def datetime(self, value):
164        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
165        """
166        #print  "IN: ", value
167        if value == "None":
168            value = ""
169        elif value == "":
170            value = ""
171        else:
172            #value = datetime.datetime.strptime(value, '%Y/%m/%d')
173            #value = datetime.datetime.strftime(value, '%Y-%m-%d')
174            pass
175        #print "OUT: ", value
176        return value
177
[7526]178    @classmethod
179    def password(self, value):
180        if value == "not set":
181            return ""
182        return value
183
[7575]184    @classmethod
185    def phone(self, value):
186        """ '<num-seq1>-<num-seq2> asd' -> '--<num-seq1><num-seq2>'
[7526]187
[7575]188        Dashes and slashes are removed before looking for sequences
189        of numbers.
190        """
191        value = value.replace('-', '')
192        value = value.replace('/', '')
193        match = RE_PHONE.match(value)
194        phone = match.groups()[0]
195        value = '--%s' % phone
196        return value
197
198
[7572]199def main():
[7573]200    input_file = '%s' % sys.argv[1]
201    output_file = '%s_edited.csv' % sys.argv[1].split('.')[0]
202    reader = csv.DictReader(open(input_file, 'rb'))
[7572]203    writer = None
[7512]204
[7572]205    for num, row in enumerate(reader):
206        if num == 0:
[7573]207            writer = csv.DictWriter(open(output_file, 'wb'), reader.fieldnames)
[7572]208            print "FIELDS: "
209            for x, y in enumerate(reader.fieldnames):
210                print x, y
211            header = convert_fieldnames(reader.fieldnames)
212            writer.writerow(header)
213        for key, value in row.items():
214            if not key in OPTIONS.keys():
215                continue
216            conv_name = OPTIONS[key]
217            converter = getattr(Converters, conv_name, None)
218            if converter is None:
219                print "WARNING: cannot find converter %s" % conv_name
220                continue
221            row[key] = converter(row[key])
222        writer.writerow(row)
[7512]223
[7573]224    print "Output written to %s" % output_file
[7572]225
226
227if __name__ == '__main__':
228    if len(sys.argv) != 2:
229        print 'Usage: %s <filename>' % __file__
230        sys.exit(1)
231    main()
Note: See TracBrowser for help on using the repository browser.