copy an optimization from CPython: when the search string of str.replace and

str.split doesn't occur in the string, don't create a copy but just reuse self.
author: Carl Friedrich Bolz-Tereick <cfbolz@gmx.de> 2021-02-25 09:53:25 +0100
committer: Carl Friedrich Bolz-Tereick <cfbolz@gmx.de> 2021-02-25 09:53:25 +0100
commit: 1714320d105280624ab1b0ea3274668377b80ad4 (patch)
tree: 5e3c411f2c93a64de2bc2a86d142d3cdf3073ee8
parent: more upstream syncing - move inttypes.h into pyport.h (issue 3407) (diff)
download: pypy-1714320d105280624ab1b0ea3274668377b80ad4.tar.gz
pypy-1714320d105280624ab1b0ea3274668377b80ad4.tar.bz2
pypy-1714320d105280624ab1b0ea3274668377b80ad4.zip
6 files changed, 48 insertions, 3 deletions
diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
index 6315c5d6cf..2316f6e513 100644
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -690,15 +690,33 @@ class W_BytesObject(W_AbstractBytesObject):
                 self_as_unicode._utf8.find(w_sub._utf8) >= 0)
         return self._StringMethods_descr_contains(space, w_sub)
 
-    _StringMethods_descr_replace = descr_replace
     @unwrap_spec(count=int)
     def descr_replace(self, space, w_old, w_new, count=-1):
+        from rpython.rlib.rstring import replace
         old_is_unicode = space.isinstance_w(w_old, space.w_unicode)
         new_is_unicode = space.isinstance_w(w_new, space.w_unicode)
         if old_is_unicode or new_is_unicode:
             self_as_uni = unicode_from_encoded_object(space, self, None, None)
             return self_as_uni.descr_replace(space, w_old, w_new, count)
-        return self._StringMethods_descr_replace(space, w_old, w_new, count)
+
+        # almost copy of StringMethods.descr_replace :-(
+        input = self._value
+
+        sub = self._op_val(space, w_old)
+        by = self._op_val(space, w_new)
+        # the following two lines are for being bug-to-bug compatible
+        # with CPython: see issue #2448
+        if count >= 0 and len(input) == 0:
+            return self._empty()
+        try:
+            res = replace(input, sub, by, count)
+        except OverflowError:
+            raise oefmt(space.w_OverflowError, "replace string is too long")
+        # difference: reuse self if no replacement was done
+        if type(self) is W_BytesObject and res is input:
+            return self
+
+        return self._new(res)
 
     _StringMethods_descr_join = descr_join
     def descr_join(self, space, w_list):
diff --git a/pypy/objspace/std/test/test_bytesobject.py b/pypy/objspace/std/test/test_bytesobject.py
index cc15f97d54..2feca7ab5e 100644
--- a/pypy/objspace/std/test/test_bytesobject.py
+++ b/pypy/objspace/std/test/test_bytesobject.py
@@ -342,6 +342,10 @@ class AppTestBytesObject:
         assert 'one'.replace(buffer('o'), buffer('n'), 1) == 'nne'
         assert 'one'.replace(buffer('o'), buffer('n')) == 'nne'
 
+    def test_replace_no_occurrence(self):
+        x = b"xyz"
+        assert x.replace(b"a", b"b") is x
+
     def test_strip(self):
         s = " a b "
         assert s.strip() == "a b"
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
index 51faff763d..6b1c7315da 100644
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -1303,3 +1303,7 @@ class AppTestUnicodeString:
 
     def test_newlist_utf8_non_ascii(self):
         'ä'.split("\n")[0] # does not crash
+
+    def test_replace_no_occurrence(self):
+        x = u"xyz"
+        assert x.replace(u"a", u"b") is x
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
index 1dcd415912..1b7a8d07b4 100644
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -880,6 +880,8 @@ class W_UnicodeObject(W_Root):
                                               count, isutf8=True)
         except OverflowError:
             raise oefmt(space.w_OverflowError, "replace string is too long")
+        if type(self) is W_UnicodeObject and replacements == 0:
+            return self
 
         newlength = self._length + replacements * (w_by._length - w_sub._length)
         return W_UnicodeObject(res, newlength)
diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py
index 29e1495381..efb9b41cfd 100644
--- a/rpython/rlib/rstring.py
+++ b/rpython/rlib/rstring.py
@@ -101,9 +101,13 @@ def _split_by(value, by, maxsplit):
 
     start = 0
     if bylen == 1:
-        # fast path: uses str.rfind(character) and str.count(character)
+        # fast path: uses str.find(character) and str.count(character)
         by = by[0]    # annotator hack: string -> char
         cnt = count(value, by, 0, len(value))
+        if cnt == 0:
+            if isinstance(value, str):
+                return [value]
+            return [value[0:len(value)]]
         if 0 <= maxsplit < cnt:
             cnt = maxsplit
         res = newlist_hint(cnt + 1)
@@ -250,6 +254,8 @@ def replace_count(input, sub, by, maxsplit=-1, isutf8=False):
         # First compute the exact result size
         if sub:
             cnt = count(input, sub, 0, len(input))
+            if isinstance(input, str) and cnt == 0:
+                return input, 0
         else:
             assert isutf8
             from rpython.rlib import rutf8
diff --git a/rpython/rlib/test/test_rstring.py b/rpython/rlib/test/test_rstring.py
index 18b5103e54..b8b0cd8482 100644
--- a/rpython/rlib/test/test_rstring.py
+++ b/rpython/rlib/test/test_rstring.py
@@ -27,6 +27,11 @@ def test_split():
     check_split('endcase test', 'test', res=['endcase ', ''])
     py.test.raises(ValueError, split, 'abc', '')
 
+def test_split_no_occurrence():
+    x = "abc"
+    assert x.split("d")[0] is x
+    assert x.rsplit("d")[0] is x
+
 def test_split_None():
     assert split("") == []
     assert split(' a\ta\na b') == ['a', 'a', 'a', 'b']
@@ -164,6 +169,12 @@ def test_unicode_replace_overflow():
     with py.test.raises(OverflowError):
         replace(s, u"a", s, len(s) - 10)
 
+def test_replace_no_occurrence():
+    s = "xyz"
+    assert replace(s, "a", "b") is s
+    s = "xyz"
+    assert replace(s, "abc", "b") is s
+
 def test_startswith():
     def check_startswith(value, sub, *args, **kwargs):
         result = kwargs['res']
author	Carl Friedrich Bolz-Tereick <cfbolz@gmx.de>	2021-02-25 09:53:25 +0100
committer	Carl Friedrich Bolz-Tereick <cfbolz@gmx.de>	2021-02-25 09:53:25 +0100
commit	1714320d105280624ab1b0ea3274668377b80ad4 (patch)
tree	5e3c411f2c93a64de2bc2a86d142d3cdf3073ee8
parent	more upstream syncing - move inttypes.h into pyport.h (issue 3407) (diff)
download	pypy-1714320d105280624ab1b0ea3274668377b80ad4.tar.gz pypy-1714320d105280624ab1b0ea3274668377b80ad4.tar.bz2 pypy-1714320d105280624ab1b0ea3274668377b80ad4.zip