Ignore:
Timestamp:
23 Mar 2013, 10:48:31 (12 years ago)
Author:
uli
Message:

Make logfile parsing unicode-aware.

Location:
main/waeup.kofa/trunk/src/waeup/kofa
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • main/waeup.kofa/trunk/src/waeup/kofa/datacenter.py

    r9769 r10045  
    2121tools for importing/exporting CSV data.
    2222"""
     23import codecs
     24import fnmatch
     25import grok
    2326import os
    24 import fnmatch
    2527import re
    2628import shutil
    27 import grok
    2829from datetime import datetime
    2930from zope.component import getUtility
     
    291292        Matches are found per line only (no multiline matches).
    292293
     294        Result lines are returned as unicode instances decoded from
     295        UTF-8 encoding. This means that logfiles must provide UTF-8
     296        encoding for umlauts etc. if these should be rendered
     297        properly. The returned unicode lines can be fed to page
     298        templates even if they contain non-ASCII characters.
     299
    293300        This method raises ValueError if some basic condition is not
    294301        met, for instance if the given query string is not a valid
     
    312319            if not os.path.isfile(path):
    313320                continue
    314             for line in open(path, 'rb'):
     321            for line in codecs.open(path, 'rb', 'utf-8'):
    315322                if not re_query.search(line):
    316323                    continue
     
    386393        if self._parsed:
    387394            return
    388         for line in open(self.context, 'rb'):
     395        for line in codecs.open(self.context, 'rb', 'utf-8'):
    389396            line = line.strip()
    390397            if not ':' in line:
  • main/waeup.kofa/trunk/src/waeup/kofa/tests/test_datacenter.py

    r9217 r10045  
     1# -*- coding: utf-8 -*-
    12## Tests for datacenter
    23##
     
    4243                fp.write('Msg %d\n' % (n + 1))
    4344                fp.write('Other Line %d\n' % (n + 1))
     45            fp.write('A Message with Ümläüt')
    4446            fp.close()
    4547            path = self.logpath + '.%d' % m
     
    5759        open(self.logpath, 'wb').write('Message 1\n')
    5860        result = list(datacenter.queryLogfiles('myapp.log', 'Message'))
    59         self.assertEqual(result, ['Message 1\n'])
     61        self.assertEqual(result, [u'Message 1\n'])
    6062        return
    6163
     
    6870        result = list(datacenter.queryLogfiles('myapp.log', 'Msg'))
    6971        # entry of logfile .10 comes after entry of logfile .2
    70         self.assertEqual(result, ['Msg 1\n', 'Msg 2\n', 'Msg 3\n'])
     72        self.assertEqual(result, [u'Msg 1\n', u'Msg 2\n', u'Msg 3\n'])
    7173        return
    7274
     
    7981        result = list(datacenter.queryLogfiles('myapp.log', 'Msg'))
    8082        # Msg 2 and 3 won't show up in results.
    81         self.assertEqual(result, ['Msg 1\n'])
     83        self.assertEqual(result, [u'Msg 1\n'])
    8284        return
    8385
     
    116118            'myapp.log', 'Msg', start=25))
    117119        self.assertEqual(len(result), 25)
    118         self.assertEqual(result[0], 'Msg 26\n')
     120        self.assertEqual(result[0], u'Msg 26\n')
    119121        return
    120122
     
    126128            'myapp.log', 'Msg', start=25, limit=10))
    127129        self.assertEqual(len(result), 10)
    128         self.assertEqual(result[0], 'Msg 26\n')
    129         self.assertEqual(result[-1], 'Msg 35\n')
     130        self.assertEqual(result[0], u'Msg 26\n')
     131        self.assertEqual(result[-1], u'Msg 35\n')
    130132        return
    131133
     
    137139            'myapp.log', 'Msg', start=45, limit=10))
    138140        self.assertEqual(len(result), 5)
    139         self.assertEqual(result[0], 'Msg 46\n')
    140         self.assertEqual(result[-1], 'Msg 50\n')
     141        self.assertEqual(result[0], u'Msg 46\n')
     142        self.assertEqual(result[-1], u'Msg 50\n')
    141143        return
    142144
     
    148150            'myapp.log', 'Msg', start=45, limit=10))
    149151        self.assertEqual(len(result), 10)
    150         self.assertEqual(result[0], 'Msg 46\n')
    151         self.assertEqual(result[-1], 'Msg 55\n')
     152        self.assertEqual(result[0], u'Msg 46\n')
     153        self.assertEqual(result[-1], u'Msg 55\n')
    152154        return
    153155
     
    158160        result = list(datacenter.queryLogfiles('myapp.log', 'sg 1\n'))
    159161        self.assertEqual(len(result), 1)
    160         self.assertEqual(result[0], 'Msg 1\n')
     162        self.assertEqual(result[0], u'Msg 1\n')
     163        return
     164
     165    def test_query_logfiles_umlauts(self):
     166        # we return results as unicode decoded from utf-8
     167        datacenter = DataCenter()
     168        self.fill_logfile()
     169        result = list(datacenter.queryLogfiles(
     170            'myapp.log', u'Ümläüt'))
     171        self.assertTrue(isinstance(result[0], unicode))
     172        self.assertEqual(result, [u'A Message with Ümläüt'])
    161173        return
    162174
Note: See TracChangeset for help on using the changeset viewer.