diff options
author | André Erdmann <dywi@mailerd.de> | 2014-02-22 15:26:44 +0100 |
---|---|---|
committer | André Erdmann <dywi@mailerd.de> | 2014-02-22 15:35:32 +0100 |
commit | 580f55bc730343381d0d6f596867a6601f821d28 (patch) | |
tree | c75208aa3d6cf0fc35fc2b99b8af84ac02de5384 | |
parent | roverlay/depres, rule reader: properly count files (diff) | |
download | R_overlay-580f55bc730343381d0d6f596867a6601f821d28.tar.gz R_overlay-580f55bc730343381d0d6f596867a6601f821d28.tar.bz2 R_overlay-580f55bc730343381d0d6f596867a6601f821d28.zip |
roverlay/util/fileio: support xz compression
* roverlay/util/fileio:
** read/write_text_file(): add support for xz
** read_text_file(): catch compression-related errors
for the first text line only
** read_text_file(): make sure to close the file(!)
** TextFile: add get_default_compression()
* move SUPPORTED_COMPRESSION dict to roverlay/util/compression
* roverlay/config/entrymap: use roverlay/util/compression
* roverlay/db/distmap: get_default_compression()
-rw-r--r-- | Makefile | 9 | ||||
-rw-r--r-- | roverlay/config/entrymap.py | 14 | ||||
-rw-r--r-- | roverlay/db/distmap.py | 5 | ||||
-rw-r--r-- | roverlay/util/compression.py | 63 | ||||
-rw-r--r-- | roverlay/util/fileio.py | 131 |
5 files changed, 159 insertions, 63 deletions
@@ -28,9 +28,10 @@ ROVERLAY_MAIN := ./roverlay.py PYVER := PYTHON := python$(PYVER) -PYDOC_SH = ./bin/build/do_pydoc.sh +PYDOC_SH := ./bin/build/do_pydoc.sh +X_COMPRESS := bzip2 -RST_HTML = ./bin/build/roverlay_rst2html.sh +RST_HTML := ./bin/build/roverlay_rst2html.sh SRC_DOCDIR := ./doc @@ -130,8 +131,8 @@ dist: distclean release compress-config: $(BUILDDIR) @install -d $(BUILDDIR)/config cp -vLr -p --no-preserve=ownership config/simple-deprules.d $(BUILDDIR)/config/ - find $(BUILDDIR)/config/simple-deprules.d/ -type f -print0 | xargs -0 -n 5 --verbose bzip2 - bzip2 -k -c config/license.map > $(BUILDDIR)/config/license.map + find $(BUILDDIR)/config/simple-deprules.d/ -type f -print0 | xargs -0 -n 5 --verbose $(X_COMPRESS) + $(X_COMPRESS) -c config/license.map > $(BUILDDIR)/config/license.map install-roverlay: ./roverlay.py install -T -D -- ./roverlay.py $(BINDIR)/roverlay diff --git a/roverlay/config/entrymap.py b/roverlay/config/entrymap.py index eb24b39..b53d5a6 100644 --- a/roverlay/config/entrymap.py +++ b/roverlay/config/entrymap.py @@ -47,6 +47,8 @@ known dict keys are 'path', 'description'/'desc' and 'value_type': __all__ = [ 'CONFIG_ENTRY_MAP', 'prune_description', ] +import roverlay.util.compression + fs_file = 'fs_file' fs_abslist = 'list:fs_abs' yesno = 'yesno' @@ -60,6 +62,10 @@ is_yesno = { 'value_type' : 'yesno' } CAPSLOCK = ( 'CAPSLOCK', ) LOG_LEVEL = ( "DEBUG", "INFO", "WARN", "WARNING", "ERROR", "CRITICAL" ) +COMP_FORMATS = [ "default", "none" ] + sorted ( + roverlay.util.compression.get_all_compression_formats() + ##roverlay.util.compression.get_supported_compression_formats() +) is_log_level = { 'choices' : LOG_LEVEL, 'flags' : CAPSLOCK } @@ -324,10 +330,10 @@ CONFIG_ENTRY_MAP = dict ( ), overlay_distmap_compression = dict ( - description = 'distmap compression format (none, bzip2 or gzip)', - choices = frozenset ({ - 'none', 'default', 'bz2', 'bzip2', 'gz', 'gzip' - }), + description = 'distmap compression format ({})'.format ( + ', '.join ( COMP_FORMATS ) + ), + choices = COMP_FORMATS, ), overlay_distmap_file = dict ( diff --git a/roverlay/db/distmap.py b/roverlay/db/distmap.py index 8ed3a16..818d5d0 100644 --- a/roverlay/db/distmap.py +++ b/roverlay/db/distmap.py @@ -647,6 +647,11 @@ class FileDistMap ( roverlay.util.fileio.TextFile, _DistMapBase ): # file format (reserved for future usage) FILE_FORMAT = '0' + @classmethod + def get_default_compression ( cls ): + return "bzip2" if cls.check_compression_supported ( "bzip2" ) else None + # --- end of get_default_compression (...) --- + def __init__ ( self, distmap_file, distmap_compression=None, ignore_missing=False ): diff --git a/roverlay/util/compression.py b/roverlay/util/compression.py new file mode 100644 index 0000000..172b916 --- /dev/null +++ b/roverlay/util/compression.py @@ -0,0 +1,63 @@ +# R overlay -- util, compression +# -*- coding: utf-8 -*- +# Copyright (C) 2012-2014 André Erdmann <dywi@mailerd.de> +# Distributed under the terms of the GNU General Public License; +# either version 2 of the License, or (at your option) any later version. + +__all__ = [ + 'COMP_GZIP', 'COMP_BZIP2', 'COMP_XZ', + 'get_all_compression_formats', 'get_supported_compression_formats', + 'check_compression_supported', 'get_compress_open', +] + +import gzip +import bz2 + +try: + import lzma +except ImportError: + # python < 3.3 without backported lzma + _HAVE_LZMA_MODULE = False + # COULDFIX: compat hack, always catch IOError before LZMAError! + LZMAError = IOError +else: + _HAVE_LZMA_MODULE = True + LZMAError = lzma.LZMAError + + +COMP_GZIP = 1 +COMP_BZIP2 = 2 +COMP_XZ = 3 + +SUPPORTED_COMPRESSION = { + 'gzip' : gzip.GzipFile, + 'gz' : gzip.GzipFile, + COMP_GZIP : gzip.GzipFile, + 'bzip2' : bz2.BZ2File, + 'bz2' : bz2.BZ2File, + COMP_BZIP2 : bz2.BZ2File, +} + +if _HAVE_LZMA_MODULE: + SUPPORTED_COMPRESSION ['xz'] = lzma.LZMAFile + SUPPORTED_COMPRESSION [COMP_XZ] = lzma.LZMAFile +# -- end if _HAVE_LZMA_MODULE + +def get_all_compression_formats(): + return [ 'gzip', 'gz', 'bzip2', 'bz2', 'xz' ] +# --- end of get_all_compression_formats (...) --- + +def get_supported_compression_formats(): + return [ k for k in SUPPORTED_COMPRESSION if isinstance ( k, str ) ] +# --- end of get_supported_compression_formats (...) --- + +def check_compression_supported ( compression ): + return compression in SUPPORTED_COMPRESSION +# --- end of check_compression_supported (...) --- + +def get_compress_open ( compression, *args ): + if args: + return SUPPORTED_COMPRESSION.get ( compression, *args ) + else: + return SUPPORTED_COMPRESSION [compression] +# --- end of get_compress_open (...) --- diff --git a/roverlay/util/fileio.py b/roverlay/util/fileio.py index 372cdd6..dc96119 100644 --- a/roverlay/util/fileio.py +++ b/roverlay/util/fileio.py @@ -1,11 +1,9 @@ # R overlay -- util, file read operations # -*- coding: utf-8 -*- -# Copyright (C) 2012 André Erdmann <dywi@mailerd.de> +# Copyright (C) 2012-2014 André Erdmann <dywi@mailerd.de> # Distributed under the terms of the GNU General Public License; # either version 2 of the License, or (at your option) any later version. -import gzip -import bz2 import mimetypes import sys import os.path @@ -14,25 +12,17 @@ import errno import roverlay.util.common import roverlay.util.objects +import roverlay.util.compression import roverlay.strutil from roverlay.strutil import bytes_try_decode +from roverlay.util.compression import \ + COMP_XZ, COMP_BZIP2, COMP_GZIP, LZMAError, \ + get_compress_open, check_compression_supported _MIME = mimetypes.MimeTypes() - guess_filetype = _MIME.guess_type -COMP_GZIP = 1 -COMP_BZIP2 = 2 - -SUPPORTED_COMPRESSION = { - 'gzip' : gzip.GzipFile, - 'gz' : gzip.GzipFile, - COMP_GZIP : gzip.GzipFile, - 'bzip2' : bz2.BZ2File, - 'bz2' : bz2.BZ2File, - COMP_BZIP2 : bz2.BZ2File, -} def strip_newline ( s ): return s.rstrip ( '\n' ) @@ -61,59 +51,75 @@ def read_text_file ( filepath, preparse=None, try_harder=True ): be detected (defaults to True) """ - ftype = guess_filetype ( filepath ) - compress_open = SUPPORTED_COMPRESSION.get ( ftype[1], None ) + compress_open = get_compress_open ( ftype[1], None ) if compress_open is not None: with compress_open ( filepath, mode='r' ) as CH: for line in read_compressed_file_handle ( CH, preparse ): yield line + return + elif try_harder: # guess_filetype detects file extensions only # # try known compression formats # - for comp in ( COMP_BZIP2, COMP_GZIP ): + for comp in ( COMP_BZIP2, COMP_XZ, COMP_GZIP ): CH = None - try: - CH = SUPPORTED_COMPRESSION [comp] ( filepath, mode='r' ) - for line in read_compressed_file_handle ( CH, preparse ): - yield line - CH.close() - except IOError as ioerr: - if CH: + copen = get_compress_open ( comp, None ) + if copen is not None: + try: + CH = copen ( filepath, mode='r' ) + creader = read_compressed_file_handle ( CH, preparse ) + # safely read first line only + line = next ( creader ) + + except StopIteration: + # empty file (?) CH.close() - if ioerr.errno is not None: + return + + except IOError as ioerr: + # failed to open (gzip, bzip2) + if CH: CH.close() + CH = None + if ioerr.errno is not None: + raise + + except LZMAError as err: + # failed to open (xz) + if CH: CH.close() + CH = None + + except: + if CH: CH.close() raise - else: - break - else: - with open ( filepath, 'rt' ) as FH: - if preparse is None: - for line in FH.readlines(): - yield line - elif preparse is True: - for line in FH.readlines(): - yield strip_newline ( line ) + else: - for line in FH.readlines(): - yield preparse ( line ) + # read remaining lines + for line in creader: + yield line + CH.close() + return + # -- end try + # -- end if # -- end for <comp> - else: - with open ( filepath, 'rt' ) as FH: - if preparse is None: - for line in FH.readlines(): - yield line - elif preparse is True: - for line in FH.readlines(): - yield strip_newline ( line ) - else: - for line in FH.readlines(): - yield preparse ( line ) - # -- end if <compress_open?, try_harder?> + # -- end if <try to read filepath as compressed file> + + # file doesn't seem to be compressed (or not supported) + with open ( filepath, 'rt' ) as FH: + if preparse is None: + for line in FH.readlines(): + yield line + elif preparse is True: + for line in FH.readlines(): + yield strip_newline ( line ) + else: + for line in FH.readlines(): + yield preparse ( line ) # --- end of read_text_file (...) --- def write_text_file ( @@ -121,9 +127,7 @@ def write_text_file ( append_newlines=True, append_newline_eof=False, create_dir=True, newline='\n' ): - compress_open = ( - SUPPORTED_COMPRESSION [compression] if compression else None - ) + compress_open = get_compress_open ( compression ) if compression else None if create_dir: roverlay.util.common.dodir_for_file ( filepath ) @@ -156,6 +160,16 @@ class TextFile ( roverlay.util.objects.PersistentContent ): READ_PREPARSE = True READ_TRY_HARDER = True + @classmethod + def get_default_compression ( cls ): + return None + # --- end of get_default_compression (...) --- + + @classmethod + def check_compression_supported ( cls, compression ): + return check_compression_supported ( compression ) + # --- end of check_compression_supported (...) --- + def __init__ ( self, filepath, compression=None ): super ( TextFile, self ).__init__() @@ -197,9 +211,16 @@ class TextFile ( roverlay.util.objects.PersistentContent ): # --- end of set_filepath (...) --- def set_compression ( self, compression ): - if not compression or compression in { 'default', 'none' }: + if not compression or compression == 'none': self._compression = None - elif compression in SUPPORTED_COMPRESSION: + elif compression == 'default': + if __debug__: + comp = self.get_default_compression() + assert self.check_compression_supported ( comp ) + self._compression = comp + else: + self._compression = self.get_default_compression() + elif self.check_compression_supported ( compression ): self._compression = compression else: raise ValueError ( |