source: main/waeup.sirp/trunk/tools/fix_import_file.py @ 7523

Last change on this file since 7523 was 7518, checked in by Henrik Bettermann, 13 years ago

Filename is now an argument of the script.

  • Property svn:keywords set to Id
File size: 4.8 KB
Line 
1## $Id: fix_import_file.py 7518 2012-01-27 06:54:13Z henrik $
2##
3## Copyright (C) 2012 Uli Fouquet & Henrik Bettermann
4## This program is free software; you can redistribute it and/or modify
5## it under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 2 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program; if not, write to the Free Software
16## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17##
18"""
19Fix exports from old SRP portal to make them importable by current portal.
20
21Usage:
22
23Change into this directory, set the options below (files are assumed
24to be in the same directory) and then run
25
26  python fix_import_file.py <filename>
27
28Errors/warnings will be displayed on the shell, the output will be put
29into the specified output file.
30"""
31
32import sys
33
34
35if len(sys.argv) != 2:
36    print 'Usage: python fix_import_file.py <filename>'
37    sys.exit(1)
38
39##
40## CONFIGURATION SECTION
41##
42# file with input data
43INPUT_FILE = '%s' % sys.argv[1]
44
45# file written with modified output
46OUTPUT_FILE = '%s_edited.csv' % sys.argv[1].split('.')[0]
47
48# keys are fieldnames in input file, values are methods of class
49# Converter (see below)
50OPTIONS = {
51    'sex': 'gender',
52    'birthday': 'date',
53    'request_date': 'datetime',
54    'marit_stat': 'marit_stat',
55    'entry_session': 'session',
56    'current_session': 'session',
57    }
58
59# Mapping input file colnames --> output file colnames
60COLNAME_MAPPING = {
61    'jamb_reg_no': 'reg_number',
62    'birthday': 'date_of_birth',
63    }
64##
65## END OF CONFIG
66##
67
68import csv
69import datetime
70import sys
71
72def convert_fieldnames(fieldnames):
73    """Replace input fieldnames by fieldnames of COLNAME_MAPPING.
74    """
75    header = dict([(name, name) for name in fieldnames])
76    for in_name, out_name in COLNAME_MAPPING.items():
77        if in_name not in header:
78            continue
79        header[in_name] = out_name
80    return header
81
82class Converters():
83    """Converters to turn old-style values into new ones.
84    """
85    @classmethod
86    def session(self, value):
87        """ '08' --> '2008'
88        """
89        try:
90            number = int(value)
91        except ValueError:
92            return 9999
93        if number < 14:
94            return number + 2000
95        elif number in range(2000,2015):
96            return number
97        else:
98            return 9999
99
100    @classmethod
101    def marit_stat(self, value):
102        """ 'True'/'False' --> 'married'/'unmarried'
103        """
104        if value == 'True':
105            value = 'married'
106        elif value == 'False':
107            value = 'unmarried'
108        else:
109            value = ''
110        return value
111
112    @classmethod
113    def gender(self, value):
114        """ 'True'/'False' --> 'female'/'male'
115        """
116        if value == 'True':
117            value = 'female'
118        elif value == 'False':
119            value = 'male'
120        else:
121            value = ''
122        return value
123
124    @classmethod
125    def date(self, value):
126        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
127        """
128        if value == "None":
129            value = ""
130        elif value == "":
131            value = ""
132        else:
133            value = value.replace('/', '-')
134            # We add the hash symbol to avoid automatic date transformation
135            # in Excel and Calc for further processing
136            value += '#'
137        return value
138
139    @classmethod
140    def datetime(self, value):
141        """ 'yyyy/mm/dd' --> 'yyyy-mm-dd'
142        """
143        #print  "IN: ", value
144        if value == "None":
145            value = ""
146        elif value == "":
147            value = ""
148        else:
149            #value = datetime.datetime.strptime(value, '%Y/%m/%d')
150            #value = datetime.datetime.strftime(value, '%Y-%m-%d')
151            pass
152        #print "OUT: ", value
153        return value
154
155reader = csv.DictReader(open(INPUT_FILE, 'rb'))
156writer = None
157
158for num, row in enumerate(reader):
159    if num == 0:
160        writer = csv.DictWriter(open(OUTPUT_FILE, 'wb'), reader.fieldnames)
161        print "FIELDS: "
162        for x, y in enumerate(reader.fieldnames):
163            print x, y
164        header = convert_fieldnames(reader.fieldnames)
165        writer.writerow(header)
166    for key, value in row.items():
167        if not key in OPTIONS.keys():
168            continue
169        conv_name = OPTIONS[key]
170        converter = getattr(Converters, conv_name, None)
171        if converter is None:
172            print "WARNING: cannot find converter %s" % conv_name
173            continue
174        row[key] = converter(row[key])
175    writer.writerow(row)
176
177print "Output written to %s" % OUTPUT_FILE
Note: See TracBrowser for help on using the repository browser.