Use the new absl::StringResizeAndOverwrite() in string escaping utilities PiperOrigin-RevId: 823159114 Change-Id: I94ccf42464a5fee6233b15dc8d44503b0b3f9db2

commit: 09c29fe055fe6ac91f249e941ee6986ccf023932 [log] [tgz]
author: Derek Mauro <dmauro@google.com> Thu Oct 23 13:09:30 2025 -0700
committer: Copybara-Service <copybara-worker@google.com> Thu Oct 23 13:10:11 2025 -0700
tree: 207c57fd8fddae957edf283a8462712a6d39f399
parent: 4099f7429ae468f1e874f903fd601fecce063943 [diff]
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel
index 1ecc069..105133f 100644
--- a/absl/strings/BUILD.bazel
+++ b/absl/strings/BUILD.bazel

@@ -135,6 +135,7 @@
     copts = ABSL_DEFAULT_COPTS,
     linkopts = ABSL_DEFAULT_LINKOPTS,
     deps = [
+        ":resize_and_overwrite",
         "//absl/base:config",
         "//absl/base:core_headers",
         "//absl/base:endian",

diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt
index da44ef7..ccb7233 100644
--- a/absl/strings/CMakeLists.txt
+++ b/absl/strings/CMakeLists.txt

@@ -139,6 +139,7 @@
     absl::core_headers
     absl::endian
     absl::raw_logging_internal
+    absl::strings_resize_and_overwrite
     absl::type_traits
 )
 

diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc
index e551c66..308c472 100644
--- a/absl/strings/escaping.cc
+++ b/absl/strings/escaping.cc

@@ -28,6 +28,7 @@
 #include "absl/base/internal/endian.h"
 #include "absl/base/internal/raw_logging.h"
 #include "absl/base/internal/unaligned_access.h"
+#include "absl/base/macros.h"
 #include "absl/base/nullability.h"
 #include "absl/strings/ascii.h"
 #include "absl/strings/charset.h"
@@ -35,6 +36,7 @@
 #include "absl/strings/internal/resize_uninitialized.h"
 #include "absl/strings/internal/utf8.h"
 #include "absl/strings/numbers.h"
+#include "absl/strings/resize_and_overwrite.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/string_view.h"
 
@@ -76,30 +78,31 @@
 //
 //    Unescapes C escape sequences and is the reverse of CEscape().
 //
-//    If `src` is valid, stores the unescaped string `dst`, and returns
-//    true. Otherwise returns false and optionally stores the error
-//    description in `error`. Set `error` to nullptr to disable error
-//    reporting.
+//    If `src` is valid, stores the unescaped string in `dst` and the length of
+//    unescaped string in `dst_size`, and returns true. Otherwise returns false
+//    and optionally stores the error description in `error`. Set `error` to
+//    nullptr to disable error reporting.
 //
-//    `src` and `dst` may use the same underlying buffer.
+//    `src` and `dst` may use the same underlying buffer (but keep in mind
+//    that if this returns an error, it will leave both `src` and `dst` in
+//    an unspecified state because they are using the same underlying buffer.)
+//    `dst` must have at least as much space as `src`.
 // ----------------------------------------------------------------------
 
 bool CUnescapeInternal(absl::string_view src, bool leave_nulls_escaped,
-                       std::string* absl_nonnull dst,
+                       char* absl_nonnull dst, size_t* absl_nonnull dst_size,
                        std::string* absl_nullable error) {
-  strings_internal::STLStringResizeUninitialized(dst, src.size());
-
   absl::string_view::size_type p = 0;  // Current src position.
-  std::string::size_type d = 0;        // Current dst position.
+  size_t d = 0;                        // Current dst position.
 
   // When unescaping in-place, skip any prefix that does not have escaping.
-  if (src.data() == dst->data()) {
+  if (src.data() == dst) {
     while (p < src.size() && src[p] != '\\') p++, d++;
   }
 
   while (p < src.size()) {
     if (src[p] != '\\') {
-      (*dst)[d++] = src[p++];
+      dst[d++] = src[p++];
     } else {
       if (++p >= src.size()) {  // skip past the '\\'
         if (error != nullptr) {
@@ -108,17 +111,19 @@
         return false;
       }
       switch (src[p]) {
-        case 'a':  (*dst)[d++] = '\a';  break;
-        case 'b':  (*dst)[d++] = '\b';  break;
-        case 'f':  (*dst)[d++] = '\f';  break;
-        case 'n':  (*dst)[d++] = '\n';  break;
-        case 'r':  (*dst)[d++] = '\r';  break;
-        case 't':  (*dst)[d++] = '\t';  break;
-        case 'v':  (*dst)[d++] = '\v';  break;
-        case '\\': (*dst)[d++] = '\\';  break;
-        case '?':  (*dst)[d++] = '\?';  break;
-        case '\'': (*dst)[d++] = '\'';  break;
-        case '"':  (*dst)[d++] = '\"';  break;
+          // clang-format off
+        case 'a':  dst[d++] = '\a';  break;
+        case 'b':  dst[d++] = '\b';  break;
+        case 'f':  dst[d++] = '\f';  break;
+        case 'n':  dst[d++] = '\n';  break;
+        case 'r':  dst[d++] = '\r';  break;
+        case 't':  dst[d++] = '\t';  break;
+        case 'v':  dst[d++] = '\v';  break;
+        case '\\': dst[d++] = '\\';  break;
+        case '?':  dst[d++] = '\?';  break;
+        case '\'': dst[d++] = '\'';  break;
+        case '"':  dst[d++] = '\"';  break;
+        // clang-format on
         case '0':
         case '1':
         case '2':
@@ -145,13 +150,13 @@
           }
           if ((ch == 0) && leave_nulls_escaped) {
             // Copy the escape sequence for the null character
-            (*dst)[d++] = '\\';
+            dst[d++] = '\\';
             while (octal_start <= p) {
-              (*dst)[d++] = src[octal_start++];
+              dst[d++] = src[octal_start++];
             }
             break;
           }
-          (*dst)[d++] = static_cast<char>(ch);
+          dst[d++] = static_cast<char>(ch);
           break;
         }
         case 'x':
@@ -185,13 +190,13 @@
           }
           if ((ch == 0) && leave_nulls_escaped) {
             // Copy the escape sequence for the null character
-            (*dst)[d++] = '\\';
+            dst[d++] = '\\';
             while (hex_start <= p) {
-              (*dst)[d++] = src[hex_start++];
+              dst[d++] = src[hex_start++];
             }
             break;
           }
-          (*dst)[d++] = static_cast<char>(ch);
+          dst[d++] = static_cast<char>(ch);
           break;
         }
         case 'u': {
@@ -218,16 +223,16 @@
           }
           if ((rune == 0) && leave_nulls_escaped) {
             // Copy the escape sequence for the null character
-            (*dst)[d++] = '\\';
+            dst[d++] = '\\';
             while (hex_start <= p) {
-              (*dst)[d++] = src[hex_start++];
+              dst[d++] = src[hex_start++];
             }
             break;
           }
           if (IsSurrogate(rune, src.substr(hex_start, 5), error)) {
             return false;
           }
-          d += strings_internal::EncodeUTF8Char(dst->data() + d, rune);
+          d += strings_internal::EncodeUTF8Char(dst + d, rune);
           break;
         }
         case 'U': {
@@ -267,17 +272,17 @@
           }
           if ((rune == 0) && leave_nulls_escaped) {
             // Copy the escape sequence for the null character
-            (*dst)[d++] = '\\';
+            dst[d++] = '\\';
             // U00000000
             while (hex_start <= p) {
-              (*dst)[d++] = src[hex_start++];
+              dst[d++] = src[hex_start++];
             }
             break;
           }
           if (IsSurrogate(rune, src.substr(hex_start, 9), error)) {
             return false;
           }
-          d += strings_internal::EncodeUTF8Char(dst->data() + d, rune);
+          d += strings_internal::EncodeUTF8Char(dst + d, rune);
           break;
         }
         default: {
@@ -291,7 +296,7 @@
     }
   }
 
-  dst->erase(d);
+  *dst_size = d;
   return true;
 }
 
@@ -807,23 +812,18 @@
   // 4 characters.  Any leftover chars are added directly for good measure.
   const size_t dest_len = 3 * (slen / 4) + (slen % 4);
 
-  strings_internal::STLStringResizeUninitialized(dest, dest_len);
-
-  // We are getting the destination buffer by getting the beginning of the
-  // string and converting it into a char *.
-  size_t len;
-  const bool ok =
-      Base64UnescapeInternal(src, slen, &(*dest)[0], dest_len, unbase64, &len);
-  if (!ok) {
-    dest->clear();
-    return false;
-  }
-
-  // could be shorter if there was padding
-  assert(len <= dest_len);
-  dest->erase(len);
-
-  return true;
+  bool ok;
+  StringResizeAndOverwrite(
+      *dest, dest_len, [src, slen, unbase64, &ok](char* buf, size_t buf_size) {
+        size_t len;
+        ok = Base64UnescapeInternal(src, slen, buf, buf_size, unbase64, &len);
+        if (!ok) {
+          len = 0;
+        }
+        assert(len <= buf_size);  // Could be shorter if there was padding.
+        return len;
+      });
+  return ok;
 }
 
 /* clang-format off */
@@ -878,15 +878,11 @@
   }
 }
 
-// This is a templated function so that T can be either a char* or a
-// std::string.
-template <typename T>
-void BytesToHexStringInternal(const unsigned char* absl_nullable src, T dest,
-                              size_t num) {
-  auto dest_ptr = &dest[0];
-  for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) {
+void BytesToHexStringInternal(const unsigned char* absl_nullable src,
+                              char* dest, size_t num) {
+  for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest += 2) {
     const char* hex_p = &numbers_internal::kHexTable[*src_ptr * 2];
-    std::copy(hex_p, hex_p + 2, dest_ptr);
+    std::copy(hex_p, hex_p + 2, dest);
   }
 }
 
@@ -899,7 +895,32 @@
 // ----------------------------------------------------------------------
 bool CUnescape(absl::string_view source, std::string* absl_nonnull dest,
                std::string* absl_nullable error) {
-  return CUnescapeInternal(source, kUnescapeNulls, dest, error);
+  bool success;
+
+  // `CUnescape()` allows for in-place unescaping, which means `source` may
+  // alias `*dest`.  However, absl::StringResizeAndOverwrite() invalidates
+  // invalidates all iterators, pointers, and references into the string,
+  // regardless whether reallocation occurs. Therefore we need to avoid calling
+  // absl::StringResizeAndOverwrite() when `source.data() ==
+  // dest->data()`. Comparing the sizes is sufficient to cover this case.
+  if (dest->size() >= source.size()) {
+    size_t dest_size = 0;
+    success = CUnescapeInternal(source, kUnescapeNulls, dest->data(),
+                                &dest_size, error);
+    ABSL_ASSERT(dest_size <= dest->size());
+    dest->erase(dest_size);
+  } else {
+    StringResizeAndOverwrite(
+        *dest, source.size(),
+        [source, error, &success](char* buf, size_t buf_size) {
+          size_t dest_size = 0;
+          success =
+              CUnescapeInternal(source, kUnescapeNulls, buf, &dest_size, error);
+          ABSL_ASSERT(dest_size <= buf_size);
+          return dest_size;
+        });
+  }
+  return success;
 }
 
 std::string CEscape(absl::string_view src) {
@@ -966,19 +987,23 @@
     return false;
   }
 
-  absl::strings_internal::STLStringResizeUninitialized(&output, num_bytes);
-  auto hex_p = hex.cbegin();
-  for (std::string::iterator bin_p = output.begin(); bin_p != output.end();
-       ++bin_p) {
-    int h1 = absl::kHexValueStrict[static_cast<size_t>(*hex_p++)];
-    int h2 = absl::kHexValueStrict[static_cast<size_t>(*hex_p++)];
-    if (h1 == -1 || h2 == -1) {
-      output.resize(static_cast<size_t>(bin_p - output.begin()));
-      return false;
-    }
-    *bin_p = static_cast<char>((h1 << 4) + h2);
-  }
+  StringResizeAndOverwrite(
+      output, num_bytes, [hex](char* buf, size_t buf_size) {
+        auto hex_p = hex.cbegin();
+        for (size_t i = 0; i < buf_size; ++i) {
+          int h1 = absl::kHexValueStrict[static_cast<size_t>(*hex_p++)];
+          int h2 = absl::kHexValueStrict[static_cast<size_t>(*hex_p++)];
+          if (h1 == -1 || h2 == -1) {
+            return size_t{0};
+          }
+          buf[i] = static_cast<char>((h1 << 4) + h2);
+        }
+        return buf_size;
+      });
 
+  if (output.size() != num_bytes) {
+    return false;
+  }
   *bytes = std::move(output);
   return true;
 }
@@ -986,16 +1011,22 @@
 std::string HexStringToBytes(absl::string_view from) {
   std::string result;
   const auto num = from.size() / 2;
-  strings_internal::STLStringResizeUninitialized(&result, num);
-  absl::HexStringToBytesInternal<std::string&>(from.data(), result, num);
+  StringResizeAndOverwrite(result, num, [from](char* buf, size_t buf_size) {
+    absl::HexStringToBytesInternal<char*>(from.data(), buf, buf_size);
+    return buf_size;
+  });
   return result;
 }
 
 std::string BytesToHexString(absl::string_view from) {
   std::string result;
-  strings_internal::STLStringResizeUninitialized(&result, 2 * from.size());
-  absl::BytesToHexStringInternal<std::string&>(
-      reinterpret_cast<const unsigned char*>(from.data()), result, from.size());
+  StringResizeAndOverwrite(
+      result, 2 * from.size(), [from](char* buf, size_t buf_size) {
+        absl::BytesToHexStringInternal(
+            reinterpret_cast<const unsigned char*>(from.data()), buf,
+            from.size());
+        return buf_size;
+      });
   return result;
 }
 

diff --git a/absl/strings/internal/escaping.h b/absl/strings/internal/escaping.h
index 2186f77..b71fb7e 100644
--- a/absl/strings/internal/escaping.h
+++ b/absl/strings/internal/escaping.h

@@ -17,7 +17,7 @@
 
 #include <cassert>
 
-#include "absl/strings/internal/resize_uninitialized.h"
+#include "absl/strings/resize_and_overwrite.h"
 
 namespace absl {
 ABSL_NAMESPACE_BEGIN
@@ -42,12 +42,14 @@
                           bool do_padding, const char* base64_chars) {
   const size_t calc_escaped_size =
       CalculateBase64EscapedLenInternal(szsrc, do_padding);
-  STLStringResizeUninitialized(dest, calc_escaped_size);
-
-  const size_t escaped_len = Base64EscapeInternal(
-      src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding);
-  assert(calc_escaped_size == escaped_len);
-  dest->erase(escaped_len);
+  StringResizeAndOverwrite(
+      *dest, calc_escaped_size,
+      [src, szsrc, base64_chars, do_padding](char* buf, size_t buf_size) {
+        const size_t escaped_len = Base64EscapeInternal(
+            src, szsrc, buf, buf_size, base64_chars, do_padding);
+        assert(escaped_len == buf_size);
+        return escaped_len;
+      });
 }
 
 }  // namespace strings_internal
commit	09c29fe055fe6ac91f249e941ee6986ccf023932	[log] [tgz]
author	Derek Mauro <dmauro@google.com>	Thu Oct 23 13:09:30 2025 -0700
committer	Copybara-Service <copybara-worker@google.com>	Thu Oct 23 13:10:11 2025 -0700
tree	207c57fd8fddae957edf283a8462712a6d39f399
parent	4099f7429ae468f1e874f903fd601fecce063943 [diff]