Changeset 8466


Ignore:
Timestamp:
17 May 2012, 16:30:02 (13 years ago)
Author:
uli
Message:

Add helper funcs to determine fileformat of media files.

Location:
main/waeup.kofa/trunk/src/waeup/kofa/utils
Files:
3 added
2 edited

Legend:

Unmodified
Added
Removed
  • main/waeup.kofa/trunk/src/waeup/kofa/utils/helpers.py

    r8370 r8466  
    1919"""
    2020import datetime
     21import imghdr
    2122import os
    2223import pytz
     
    588589        dt = pytz.utc.localize(dt)
    589590    return tz.normalize(dt.tzinfo.normalize(dt).astimezone(tz))
     591
     592def get_fileformat(path, bytestream=None):
     593    """Try to determine the file format of a given media file.
     594
     595    Although checks done here are not done very thoroughly, they make
     596    no assumptions about the filetype by looking at its filename
     597    extension or similar. Instead they check header data to comply
     598    with common known rules (Magic Words).
     599
     600    If bytestream is not `None` the `path` is ignored.
     601
     602    Returns filetype as string (something like ``'jpg'``) if
     603    file-format can be recognized, ``None`` else.
     604
     605    Tested recognized filetypes currently are `jpg`, `png`, and `pdf`.
     606
     607    More filetypes (though untested in waeup.kofa) are automatically
     608    recognized because we deploy the stdlib `imghdr` library. See this
     609    module's docs for a complete list of filetypes recognized.
     610    """
     611    if path is None and bytestream is None:
     612        return None
     613
     614    img_type = None
     615    if bytestream is not None:
     616        img_type = imghdr.what(path, bytestream)
     617    else:
     618        img_type = imghdr.what(path)
     619    for name, replacement in (('jpeg', 'jpg'), ('tiff', 'tif')):
     620        if img_type == name:
     621            img_type = replacement
     622    return img_type
     623
     624def check_pdf(bytestream, file):
     625    """Tell whether a file or bytestream is a PDF file.
     626
     627    Works as a test/plugin for the stdlib `imghdr` library.
     628    """
     629    if file is not None:
     630        file.seek(0)
     631        bytestream = file.read(4)
     632        file.seek(0)
     633
     634    if bytestream.startswith('%PDF'):
     635        return 'pdf'
     636    return None
     637
     638# register check_pdf as header check function with `imghdr`
     639if check_pdf not in imghdr.tests:
     640    imghdr.tests.append(check_pdf)
  • main/waeup.kofa/trunk/src/waeup/kofa/utils/tests/test_helpers.py

    r8369 r8466  
    317317        return
    318318
     319class GetFileFormatTestCase(unittest.TestCase):
     320    # Tests for the get_fileformat helper.
     321
     322    def setUp(self):
     323        self.valid_jpg_path = os.path.join(
     324            os.path.dirname(__file__), 'sample_jpg_valid.jpg')
     325        self.valid_jpg = open(self.valid_jpg_path, 'rb').read()
     326        self.valid_png_path = os.path.join(
     327            os.path.dirname(__file__), 'sample_png_valid.png')
     328        self.valid_png = open(self.valid_png_path, 'rb').read()
     329        self.valid_pdf_path = os.path.join(
     330            os.path.dirname(__file__), 'sample_pdf_valid.pdf')
     331        self.valid_pdf = open(self.valid_pdf_path, 'rb').read()
     332        return
     333
     334    def test_none(self):
     335        # ``None`` is not a file and not a valid file format
     336        self.assertEqual(helpers.get_fileformat(None), None)
     337        return
     338
     339    def test_path_and_bytestream(self):
     340        # get_fileformat accepts bytestreams and paths as arg.
     341        self.assertEqual(
     342            helpers.get_fileformat(None, self.valid_jpg), 'jpg')
     343        self.assertEqual(
     344            helpers.get_fileformat(self.valid_jpg_path), 'jpg')
     345        # path is ignored when giving a bytestream
     346        self.assertEqual(
     347            helpers.get_fileformat('blah', self.valid_jpg), 'jpg')
     348        return
     349
     350    def test_jpg(self):
     351        # we recognize jpeg images.
     352        self.assertEqual(
     353            helpers.get_fileformat(self.valid_jpg_path), 'jpg')
     354        self.assertEqual(
     355            helpers.get_fileformat(None, self.valid_jpg), 'jpg')
     356        return
     357
     358    def test_png(self):
     359        # we recognize png images.
     360        self.assertEqual(
     361            helpers.get_fileformat(self.valid_png_path), 'png')
     362        self.assertEqual(
     363            helpers.get_fileformat(None, self.valid_png), 'png')
     364        return
     365
     366    def test_pdf(self):
     367        # we recognize pdf documents.
     368        self.assertEqual(
     369            helpers.get_fileformat(self.valid_pdf_path), 'pdf')
     370        self.assertEqual(
     371            helpers.get_fileformat(None, self.valid_pdf), 'pdf')
     372        return
     373
    319374def test_suite():
    320375    suite = unittest.TestSuite()
     
    330385        IfaceNamesTestCase,
    331386        DateTimeHelpersTestCase,
     387        GetFileFormatTestCase,
    332388        ]:
    333389        suite.addTests(
Note: See TracChangeset for help on using the changeset viewer.