aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndré Erdmann <dywi@mailerd.de>2014-02-22 15:26:44 +0100
committerAndré Erdmann <dywi@mailerd.de>2014-02-22 15:35:32 +0100
commit580f55bc730343381d0d6f596867a6601f821d28 (patch)
treec75208aa3d6cf0fc35fc2b99b8af84ac02de5384
parentroverlay/depres, rule reader: properly count files (diff)
downloadR_overlay-580f55bc730343381d0d6f596867a6601f821d28.tar.gz
R_overlay-580f55bc730343381d0d6f596867a6601f821d28.tar.bz2
R_overlay-580f55bc730343381d0d6f596867a6601f821d28.zip
roverlay/util/fileio: support xz compression
* roverlay/util/fileio: ** read/write_text_file(): add support for xz ** read_text_file(): catch compression-related errors for the first text line only ** read_text_file(): make sure to close the file(!) ** TextFile: add get_default_compression() * move SUPPORTED_COMPRESSION dict to roverlay/util/compression * roverlay/config/entrymap: use roverlay/util/compression * roverlay/db/distmap: get_default_compression()
-rw-r--r--Makefile9
-rw-r--r--roverlay/config/entrymap.py14
-rw-r--r--roverlay/db/distmap.py5
-rw-r--r--roverlay/util/compression.py63
-rw-r--r--roverlay/util/fileio.py131
5 files changed, 159 insertions, 63 deletions
diff --git a/Makefile b/Makefile
index 4f2c60e..495f6c8 100644
--- a/Makefile
+++ b/Makefile
@@ -28,9 +28,10 @@ ROVERLAY_MAIN := ./roverlay.py
PYVER :=
PYTHON := python$(PYVER)
-PYDOC_SH = ./bin/build/do_pydoc.sh
+PYDOC_SH := ./bin/build/do_pydoc.sh
+X_COMPRESS := bzip2
-RST_HTML = ./bin/build/roverlay_rst2html.sh
+RST_HTML := ./bin/build/roverlay_rst2html.sh
SRC_DOCDIR := ./doc
@@ -130,8 +131,8 @@ dist: distclean release
compress-config: $(BUILDDIR)
@install -d $(BUILDDIR)/config
cp -vLr -p --no-preserve=ownership config/simple-deprules.d $(BUILDDIR)/config/
- find $(BUILDDIR)/config/simple-deprules.d/ -type f -print0 | xargs -0 -n 5 --verbose bzip2
- bzip2 -k -c config/license.map > $(BUILDDIR)/config/license.map
+ find $(BUILDDIR)/config/simple-deprules.d/ -type f -print0 | xargs -0 -n 5 --verbose $(X_COMPRESS)
+ $(X_COMPRESS) -c config/license.map > $(BUILDDIR)/config/license.map
install-roverlay: ./roverlay.py
install -T -D -- ./roverlay.py $(BINDIR)/roverlay
diff --git a/roverlay/config/entrymap.py b/roverlay/config/entrymap.py
index eb24b39..b53d5a6 100644
--- a/roverlay/config/entrymap.py
+++ b/roverlay/config/entrymap.py
@@ -47,6 +47,8 @@ known dict keys are 'path', 'description'/'desc' and 'value_type':
__all__ = [ 'CONFIG_ENTRY_MAP', 'prune_description', ]
+import roverlay.util.compression
+
fs_file = 'fs_file'
fs_abslist = 'list:fs_abs'
yesno = 'yesno'
@@ -60,6 +62,10 @@ is_yesno = { 'value_type' : 'yesno' }
CAPSLOCK = ( 'CAPSLOCK', )
LOG_LEVEL = ( "DEBUG", "INFO", "WARN", "WARNING", "ERROR", "CRITICAL" )
+COMP_FORMATS = [ "default", "none" ] + sorted (
+ roverlay.util.compression.get_all_compression_formats()
+ ##roverlay.util.compression.get_supported_compression_formats()
+)
is_log_level = { 'choices' : LOG_LEVEL, 'flags' : CAPSLOCK }
@@ -324,10 +330,10 @@ CONFIG_ENTRY_MAP = dict (
),
overlay_distmap_compression = dict (
- description = 'distmap compression format (none, bzip2 or gzip)',
- choices = frozenset ({
- 'none', 'default', 'bz2', 'bzip2', 'gz', 'gzip'
- }),
+ description = 'distmap compression format ({})'.format (
+ ', '.join ( COMP_FORMATS )
+ ),
+ choices = COMP_FORMATS,
),
overlay_distmap_file = dict (
diff --git a/roverlay/db/distmap.py b/roverlay/db/distmap.py
index 8ed3a16..818d5d0 100644
--- a/roverlay/db/distmap.py
+++ b/roverlay/db/distmap.py
@@ -647,6 +647,11 @@ class FileDistMap ( roverlay.util.fileio.TextFile, _DistMapBase ):
# file format (reserved for future usage)
FILE_FORMAT = '0'
+ @classmethod
+ def get_default_compression ( cls ):
+ return "bzip2" if cls.check_compression_supported ( "bzip2" ) else None
+ # --- end of get_default_compression (...) ---
+
def __init__ (
self, distmap_file, distmap_compression=None, ignore_missing=False
):
diff --git a/roverlay/util/compression.py b/roverlay/util/compression.py
new file mode 100644
index 0000000..172b916
--- /dev/null
+++ b/roverlay/util/compression.py
@@ -0,0 +1,63 @@
+# R overlay -- util, compression
+# -*- coding: utf-8 -*-
+# Copyright (C) 2012-2014 André Erdmann <dywi@mailerd.de>
+# Distributed under the terms of the GNU General Public License;
+# either version 2 of the License, or (at your option) any later version.
+
+__all__ = [
+ 'COMP_GZIP', 'COMP_BZIP2', 'COMP_XZ',
+ 'get_all_compression_formats', 'get_supported_compression_formats',
+ 'check_compression_supported', 'get_compress_open',
+]
+
+import gzip
+import bz2
+
+try:
+ import lzma
+except ImportError:
+ # python < 3.3 without backported lzma
+ _HAVE_LZMA_MODULE = False
+ # COULDFIX: compat hack, always catch IOError before LZMAError!
+ LZMAError = IOError
+else:
+ _HAVE_LZMA_MODULE = True
+ LZMAError = lzma.LZMAError
+
+
+COMP_GZIP = 1
+COMP_BZIP2 = 2
+COMP_XZ = 3
+
+SUPPORTED_COMPRESSION = {
+ 'gzip' : gzip.GzipFile,
+ 'gz' : gzip.GzipFile,
+ COMP_GZIP : gzip.GzipFile,
+ 'bzip2' : bz2.BZ2File,
+ 'bz2' : bz2.BZ2File,
+ COMP_BZIP2 : bz2.BZ2File,
+}
+
+if _HAVE_LZMA_MODULE:
+ SUPPORTED_COMPRESSION ['xz'] = lzma.LZMAFile
+ SUPPORTED_COMPRESSION [COMP_XZ] = lzma.LZMAFile
+# -- end if _HAVE_LZMA_MODULE
+
+def get_all_compression_formats():
+ return [ 'gzip', 'gz', 'bzip2', 'bz2', 'xz' ]
+# --- end of get_all_compression_formats (...) ---
+
+def get_supported_compression_formats():
+ return [ k for k in SUPPORTED_COMPRESSION if isinstance ( k, str ) ]
+# --- end of get_supported_compression_formats (...) ---
+
+def check_compression_supported ( compression ):
+ return compression in SUPPORTED_COMPRESSION
+# --- end of check_compression_supported (...) ---
+
+def get_compress_open ( compression, *args ):
+ if args:
+ return SUPPORTED_COMPRESSION.get ( compression, *args )
+ else:
+ return SUPPORTED_COMPRESSION [compression]
+# --- end of get_compress_open (...) ---
diff --git a/roverlay/util/fileio.py b/roverlay/util/fileio.py
index 372cdd6..dc96119 100644
--- a/roverlay/util/fileio.py
+++ b/roverlay/util/fileio.py
@@ -1,11 +1,9 @@
# R overlay -- util, file read operations
# -*- coding: utf-8 -*-
-# Copyright (C) 2012 André Erdmann <dywi@mailerd.de>
+# Copyright (C) 2012-2014 André Erdmann <dywi@mailerd.de>
# Distributed under the terms of the GNU General Public License;
# either version 2 of the License, or (at your option) any later version.
-import gzip
-import bz2
import mimetypes
import sys
import os.path
@@ -14,25 +12,17 @@ import errno
import roverlay.util.common
import roverlay.util.objects
+import roverlay.util.compression
import roverlay.strutil
from roverlay.strutil import bytes_try_decode
+from roverlay.util.compression import \
+ COMP_XZ, COMP_BZIP2, COMP_GZIP, LZMAError, \
+ get_compress_open, check_compression_supported
_MIME = mimetypes.MimeTypes()
-
guess_filetype = _MIME.guess_type
-COMP_GZIP = 1
-COMP_BZIP2 = 2
-
-SUPPORTED_COMPRESSION = {
- 'gzip' : gzip.GzipFile,
- 'gz' : gzip.GzipFile,
- COMP_GZIP : gzip.GzipFile,
- 'bzip2' : bz2.BZ2File,
- 'bz2' : bz2.BZ2File,
- COMP_BZIP2 : bz2.BZ2File,
-}
def strip_newline ( s ):
return s.rstrip ( '\n' )
@@ -61,59 +51,75 @@ def read_text_file ( filepath, preparse=None, try_harder=True ):
be detected (defaults to True)
"""
-
ftype = guess_filetype ( filepath )
- compress_open = SUPPORTED_COMPRESSION.get ( ftype[1], None )
+ compress_open = get_compress_open ( ftype[1], None )
if compress_open is not None:
with compress_open ( filepath, mode='r' ) as CH:
for line in read_compressed_file_handle ( CH, preparse ):
yield line
+ return
+
elif try_harder:
# guess_filetype detects file extensions only
#
# try known compression formats
#
- for comp in ( COMP_BZIP2, COMP_GZIP ):
+ for comp in ( COMP_BZIP2, COMP_XZ, COMP_GZIP ):
CH = None
- try:
- CH = SUPPORTED_COMPRESSION [comp] ( filepath, mode='r' )
- for line in read_compressed_file_handle ( CH, preparse ):
- yield line
- CH.close()
- except IOError as ioerr:
- if CH:
+ copen = get_compress_open ( comp, None )
+ if copen is not None:
+ try:
+ CH = copen ( filepath, mode='r' )
+ creader = read_compressed_file_handle ( CH, preparse )
+ # safely read first line only
+ line = next ( creader )
+
+ except StopIteration:
+ # empty file (?)
CH.close()
- if ioerr.errno is not None:
+ return
+
+ except IOError as ioerr:
+ # failed to open (gzip, bzip2)
+ if CH: CH.close()
+ CH = None
+ if ioerr.errno is not None:
+ raise
+
+ except LZMAError as err:
+ # failed to open (xz)
+ if CH: CH.close()
+ CH = None
+
+ except:
+ if CH: CH.close()
raise
- else:
- break
- else:
- with open ( filepath, 'rt' ) as FH:
- if preparse is None:
- for line in FH.readlines():
- yield line
- elif preparse is True:
- for line in FH.readlines():
- yield strip_newline ( line )
+
else:
- for line in FH.readlines():
- yield preparse ( line )
+ # read remaining lines
+ for line in creader:
+ yield line
+ CH.close()
+ return
+ # -- end try
+ # -- end if
# -- end for <comp>
- else:
- with open ( filepath, 'rt' ) as FH:
- if preparse is None:
- for line in FH.readlines():
- yield line
- elif preparse is True:
- for line in FH.readlines():
- yield strip_newline ( line )
- else:
- for line in FH.readlines():
- yield preparse ( line )
- # -- end if <compress_open?, try_harder?>
+ # -- end if <try to read filepath as compressed file>
+
+ # file doesn't seem to be compressed (or not supported)
+ with open ( filepath, 'rt' ) as FH:
+ if preparse is None:
+ for line in FH.readlines():
+ yield line
+ elif preparse is True:
+ for line in FH.readlines():
+ yield strip_newline ( line )
+ else:
+ for line in FH.readlines():
+ yield preparse ( line )
# --- end of read_text_file (...) ---
def write_text_file (
@@ -121,9 +127,7 @@ def write_text_file (
append_newlines=True, append_newline_eof=False, create_dir=True,
newline='\n'
):
- compress_open = (
- SUPPORTED_COMPRESSION [compression] if compression else None
- )
+ compress_open = get_compress_open ( compression ) if compression else None
if create_dir:
roverlay.util.common.dodir_for_file ( filepath )
@@ -156,6 +160,16 @@ class TextFile ( roverlay.util.objects.PersistentContent ):
READ_PREPARSE = True
READ_TRY_HARDER = True
+ @classmethod
+ def get_default_compression ( cls ):
+ return None
+ # --- end of get_default_compression (...) ---
+
+ @classmethod
+ def check_compression_supported ( cls, compression ):
+ return check_compression_supported ( compression )
+ # --- end of check_compression_supported (...) ---
+
def __init__ ( self, filepath, compression=None ):
super ( TextFile, self ).__init__()
@@ -197,9 +211,16 @@ class TextFile ( roverlay.util.objects.PersistentContent ):
# --- end of set_filepath (...) ---
def set_compression ( self, compression ):
- if not compression or compression in { 'default', 'none' }:
+ if not compression or compression == 'none':
self._compression = None
- elif compression in SUPPORTED_COMPRESSION:
+ elif compression == 'default':
+ if __debug__:
+ comp = self.get_default_compression()
+ assert self.check_compression_supported ( comp )
+ self._compression = comp
+ else:
+ self._compression = self.get_default_compression()
+ elif self.check_compression_supported ( compression ):
self._compression = compression
else:
raise ValueError (