1 | """Support for CSV files. |
---|
2 | |
---|
3 | See README.txt and interfaces.py to learn more. |
---|
4 | """ |
---|
5 | |
---|
6 | import csv |
---|
7 | import grok |
---|
8 | from waeup.csvfile.interfaces import ICSVFile |
---|
9 | |
---|
10 | class CSVFile(grok.Adapter): |
---|
11 | """A CSV file abstraction. |
---|
12 | """ |
---|
13 | grok.context(basestring) |
---|
14 | grok.implements(ICSVFile) |
---|
15 | |
---|
16 | required_fields = [] |
---|
17 | path = None |
---|
18 | |
---|
19 | def __init__(self, context): |
---|
20 | self.path = context |
---|
21 | self.checkFields() |
---|
22 | |
---|
23 | def checkFields(self): |
---|
24 | headers = self.getHeaderFields() |
---|
25 | missing = [x for x in self.required_fields |
---|
26 | if x not in headers] |
---|
27 | if len(missing): |
---|
28 | raise TypeError('Missing columns in CSV file: %s' % missing) |
---|
29 | return |
---|
30 | |
---|
31 | def getDictReader(self): |
---|
32 | return csv.DictReader(open(self.path, 'rb')) |
---|
33 | |
---|
34 | def getData(self): |
---|
35 | reader = self.getDictReader() |
---|
36 | for row in reader: |
---|
37 | row = self.processRow(row) |
---|
38 | yield row |
---|
39 | |
---|
40 | def processRow(self, row): |
---|
41 | return row |
---|
42 | |
---|
43 | def getHeaderFields(self): |
---|
44 | path = self.path |
---|
45 | reader = self.getDictReader() |
---|
46 | reader.next() |
---|
47 | fields = reader.fieldnames |
---|
48 | return sorted(reader.fieldnames) |
---|
49 | |
---|
50 | |
---|
51 | # The registry for CSV file wrappers... |
---|
52 | csvwrappers = [CSVFile,] |
---|
53 | |
---|
54 | def getScore(path, wrapperclass): |
---|
55 | """Compute a score for a certain wrapper related to a certain CSV |
---|
56 | file. |
---|
57 | """ |
---|
58 | wrapper = None |
---|
59 | try: |
---|
60 | wrapper = wrapperclass(path) |
---|
61 | except: |
---|
62 | return -1 |
---|
63 | required = wrapper.required_fields |
---|
64 | if required == []: |
---|
65 | return 0 |
---|
66 | available_headers = wrapper.getHeaderFields() |
---|
67 | score = len([x for x in required if x in available_headers]) |
---|
68 | if score < len(required): |
---|
69 | return -1 |
---|
70 | return score |
---|
71 | |
---|
72 | |
---|
73 | def getCSVFile(path): |
---|
74 | """Get a CSVFile instance for a path. |
---|
75 | """ |
---|
76 | best_score = -1 |
---|
77 | best_wrapper = None |
---|
78 | |
---|
79 | for wrapperclass in csvwrappers: |
---|
80 | score = getScore(path, wrapperclass) |
---|
81 | if score < best_score or score == -1: |
---|
82 | continue |
---|
83 | best_score, best_wrapper = score, wrapperclass |
---|
84 | if best_wrapper is not None: |
---|
85 | return best_wrapper(path) |
---|
86 | return None |
---|
87 | |
---|