roverlay/strutil.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177

# R overlay -- roverlay package, strutil
# -*- coding: utf-8 -*-
# Copyright (C) 2012 André Erdmann <dywi@mailerd.de>
# Distributed under the terms of the GNU General Public License;
# either version 2 of the License, or (at your option) any later version.

"""provides utility functions for string manipulation"""

__all__ = [ 'ascii_filter', 'bytes_try_decode', 'fix_ebuild_name',
   'pipe_lines', 'shorten_str', 'unquote', 'unquote_all', 'foreach_str',
   'str_to_bool',
]

import re

_DEFAULT_ENCODINGS = ( 'utf-8', 'ascii', 'iso8859_15', 'utf-16', 'latin_1' )

_EBUILD_NAME_ILLEGAL_CHARS            = re.compile ( "[.:]{1,}" )
_EBUILD_NAME_ILLEGAL_CHARS_REPLACE_BY = '_'


def fix_ebuild_name ( name ):
   """Removes illegal chars from an ebuild name by replacing them with an
   underscore char '_'.

   arguments:
   * name --

   returns: string without illegal chars
   """
   return _EBUILD_NAME_ILLEGAL_CHARS.sub (
      _EBUILD_NAME_ILLEGAL_CHARS_REPLACE_BY,
      name
   )
# --- end of fix_ebuild_name (...) ---

def ascii_filter ( _str, additional_filter=None ):
   """Removes all non-ascii chars from a string and returns the result.

   arguments:
   * _str              -- string to be filtered
   * additional_filter -- a function that is called for each ascii char
                          and returns true if the char is allowed (i.e.,
                          should be kept in the resulting string), else False.
                          Defaults to None, which means "keep all".
   """
   if additional_filter is None:
      return ''.join ( c for c in _str if ord ( c ) < 128 )
   else:
      return ''.join (
         c for c in _str if ord ( c ) < 128 and additional_filter ( c )
      )
# --- end of ascii_filter (...) ---

def shorten_str ( s, maxlen, replace_end=None ):
   """Shortens a string s so that it isn't longer than maxlen chars.
   Optionally replaces the end of a shortened string with another string.
   Does nothing if len(s) <= maxlen.

   arguments:
   * s           --
   * maxlen      --
   * replace_end -- optional; replace the end of a shortened string by this
                    string (e.g. "abcdefghijk", 6, " (s)" => "ab (s)")

   returns: shortened string
   """
   if not replace_end is None:
      rlen = maxlen - len ( replace_end )
      if rlen >= 0:
         return s[:rlen] + replace_end if len (s) > maxlen else s

   return s[:maxlen] if len (s) > maxlen else s
# --- end of shorten_str (...) ---

def pipe_lines ( _pipe, use_filter=False, filter_func=None ):
   """Returns text lines read from a pipe.

   arguments:
   * _pipe       -- pipe to read
   * use_filter  -- whether to use a filter or not. Defaults to False.
   * filter_func -- filter function to use (this can also be 'None')

   returns: text lines
   """
   lines = _pipe.decode().split ('\n')
   if use_filter:
      return filter ( filter_func, lines )
   else:
      return lines
# --- end of pipe_lines (...) ---

def unquote ( _str, keep_going=False ):
   """Removes enclosing quotes from a string.

   arguments:
   * _str --
   * keep_going -- remove all enclosing quotes ("'"a"'" -> a)
   """
   if len ( _str ) < 2: return _str
   chars  = '\"\''

   if _str [0] == _str [-1] and _str [0] in chars:
      return unquote ( _str[1:-1], True ) if keep_going else _str[1:-1]

   return _str
# --- end of unquote (...) ---

def unquote_all ( s ):
   return unquote ( s, keep_going=True )
# --- end of unquote_all (...) ---

def bytes_try_decode (
   byte_str,
   encodings=_DEFAULT_ENCODINGS,
   charwise_only=False,
   force_decode=False
):
   """Tries to decode a bytes object to str whose encoding is unknown
   but predictable (with charwise conversion as last resort).
   Returns byte_str if byte_str is already a str and force_decode is False,
   else a decoded str.

   arguments:
   * byte_str      -- bytes object to decode
   * encodings     -- encodings to try (None, str or list/iterable of str)
   * charwise_only -- do charwise conversion only
   * force_decode  -- decode byte_str even if it's already a str
   """
   if not isinstance ( byte_str, str ) or force_decode:
      if not charwise_only and encodings:
         ret = None
         if not isinstance ( encodings, str ):
            try_enc = encodings
         else:
            try_enc = ( encodings, )

         for enc in try_enc:
            try:
               ret = byte_str.decode ( enc )
               break
            except:
               ret = None

         if ret is not None:
            return ret

      ret = ""
      for c in byte_str:
         ret += chr ( c )
      return ret
   else:
      return byte_str
# --- end of bytes_try_decode() ---

def foreach_str ( func, _str ):
   if isinstance ( _str, str ) or not hasattr ( _str, '__iter__' ):
      return func ( str ( _str ) )
   else:
      return [ func(str(s)) for s in _str ]
# --- end of foreach_str (...) ---

def str_to_bool ( s, empty_return=None, nofail=False ):
   if not s:
      return empty_return

   slow = s.lower()

   if slow in { 'y', 'yes', '1', 'true', 'enabled', 'on' }:
      return True
   elif slow in { 'n', 'no', '0', 'false', 'disabled', 'off' }:
      return False
   elif nofail:
      return None
   else:
      raise ValueError(s)
# --- end of str_to_bool (...) ---