Use the new absl::StringResizeAndOverwrite() in string escaping utilities
PiperOrigin-RevId: 823159114
Change-Id: I94ccf42464a5fee6233b15dc8d44503b0b3f9db2
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel
index 1ecc069..105133f 100644
--- a/absl/strings/BUILD.bazel
+++ b/absl/strings/BUILD.bazel
@@ -135,6 +135,7 @@
copts = ABSL_DEFAULT_COPTS,
linkopts = ABSL_DEFAULT_LINKOPTS,
deps = [
+ ":resize_and_overwrite",
"//absl/base:config",
"//absl/base:core_headers",
"//absl/base:endian",
diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt
index da44ef7..ccb7233 100644
--- a/absl/strings/CMakeLists.txt
+++ b/absl/strings/CMakeLists.txt
@@ -139,6 +139,7 @@
absl::core_headers
absl::endian
absl::raw_logging_internal
+ absl::strings_resize_and_overwrite
absl::type_traits
)
diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc
index e551c66..308c472 100644
--- a/absl/strings/escaping.cc
+++ b/absl/strings/escaping.cc
@@ -28,6 +28,7 @@
#include "absl/base/internal/endian.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/base/internal/unaligned_access.h"
+#include "absl/base/macros.h"
#include "absl/base/nullability.h"
#include "absl/strings/ascii.h"
#include "absl/strings/charset.h"
@@ -35,6 +36,7 @@
#include "absl/strings/internal/resize_uninitialized.h"
#include "absl/strings/internal/utf8.h"
#include "absl/strings/numbers.h"
+#include "absl/strings/resize_and_overwrite.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
@@ -76,30 +78,31 @@
//
// Unescapes C escape sequences and is the reverse of CEscape().
//
-// If `src` is valid, stores the unescaped string `dst`, and returns
-// true. Otherwise returns false and optionally stores the error
-// description in `error`. Set `error` to nullptr to disable error
-// reporting.
+// If `src` is valid, stores the unescaped string in `dst` and the length of
+// unescaped string in `dst_size`, and returns true. Otherwise returns false
+// and optionally stores the error description in `error`. Set `error` to
+// nullptr to disable error reporting.
//
-// `src` and `dst` may use the same underlying buffer.
+// `src` and `dst` may use the same underlying buffer (but keep in mind
+// that if this returns an error, it will leave both `src` and `dst` in
+// an unspecified state because they are using the same underlying buffer.)
+// `dst` must have at least as much space as `src`.
// ----------------------------------------------------------------------
bool CUnescapeInternal(absl::string_view src, bool leave_nulls_escaped,
- std::string* absl_nonnull dst,
+ char* absl_nonnull dst, size_t* absl_nonnull dst_size,
std::string* absl_nullable error) {
- strings_internal::STLStringResizeUninitialized(dst, src.size());
-
absl::string_view::size_type p = 0; // Current src position.
- std::string::size_type d = 0; // Current dst position.
+ size_t d = 0; // Current dst position.
// When unescaping in-place, skip any prefix that does not have escaping.
- if (src.data() == dst->data()) {
+ if (src.data() == dst) {
while (p < src.size() && src[p] != '\\') p++, d++;
}
while (p < src.size()) {
if (src[p] != '\\') {
- (*dst)[d++] = src[p++];
+ dst[d++] = src[p++];
} else {
if (++p >= src.size()) { // skip past the '\\'
if (error != nullptr) {
@@ -108,17 +111,19 @@
return false;
}
switch (src[p]) {
- case 'a': (*dst)[d++] = '\a'; break;
- case 'b': (*dst)[d++] = '\b'; break;
- case 'f': (*dst)[d++] = '\f'; break;
- case 'n': (*dst)[d++] = '\n'; break;
- case 'r': (*dst)[d++] = '\r'; break;
- case 't': (*dst)[d++] = '\t'; break;
- case 'v': (*dst)[d++] = '\v'; break;
- case '\\': (*dst)[d++] = '\\'; break;
- case '?': (*dst)[d++] = '\?'; break;
- case '\'': (*dst)[d++] = '\''; break;
- case '"': (*dst)[d++] = '\"'; break;
+ // clang-format off
+ case 'a': dst[d++] = '\a'; break;
+ case 'b': dst[d++] = '\b'; break;
+ case 'f': dst[d++] = '\f'; break;
+ case 'n': dst[d++] = '\n'; break;
+ case 'r': dst[d++] = '\r'; break;
+ case 't': dst[d++] = '\t'; break;
+ case 'v': dst[d++] = '\v'; break;
+ case '\\': dst[d++] = '\\'; break;
+ case '?': dst[d++] = '\?'; break;
+ case '\'': dst[d++] = '\''; break;
+ case '"': dst[d++] = '\"'; break;
+ // clang-format on
case '0':
case '1':
case '2':
@@ -145,13 +150,13 @@
}
if ((ch == 0) && leave_nulls_escaped) {
// Copy the escape sequence for the null character
- (*dst)[d++] = '\\';
+ dst[d++] = '\\';
while (octal_start <= p) {
- (*dst)[d++] = src[octal_start++];
+ dst[d++] = src[octal_start++];
}
break;
}
- (*dst)[d++] = static_cast<char>(ch);
+ dst[d++] = static_cast<char>(ch);
break;
}
case 'x':
@@ -185,13 +190,13 @@
}
if ((ch == 0) && leave_nulls_escaped) {
// Copy the escape sequence for the null character
- (*dst)[d++] = '\\';
+ dst[d++] = '\\';
while (hex_start <= p) {
- (*dst)[d++] = src[hex_start++];
+ dst[d++] = src[hex_start++];
}
break;
}
- (*dst)[d++] = static_cast<char>(ch);
+ dst[d++] = static_cast<char>(ch);
break;
}
case 'u': {
@@ -218,16 +223,16 @@
}
if ((rune == 0) && leave_nulls_escaped) {
// Copy the escape sequence for the null character
- (*dst)[d++] = '\\';
+ dst[d++] = '\\';
while (hex_start <= p) {
- (*dst)[d++] = src[hex_start++];
+ dst[d++] = src[hex_start++];
}
break;
}
if (IsSurrogate(rune, src.substr(hex_start, 5), error)) {
return false;
}
- d += strings_internal::EncodeUTF8Char(dst->data() + d, rune);
+ d += strings_internal::EncodeUTF8Char(dst + d, rune);
break;
}
case 'U': {
@@ -267,17 +272,17 @@
}
if ((rune == 0) && leave_nulls_escaped) {
// Copy the escape sequence for the null character
- (*dst)[d++] = '\\';
+ dst[d++] = '\\';
// U00000000
while (hex_start <= p) {
- (*dst)[d++] = src[hex_start++];
+ dst[d++] = src[hex_start++];
}
break;
}
if (IsSurrogate(rune, src.substr(hex_start, 9), error)) {
return false;
}
- d += strings_internal::EncodeUTF8Char(dst->data() + d, rune);
+ d += strings_internal::EncodeUTF8Char(dst + d, rune);
break;
}
default: {
@@ -291,7 +296,7 @@
}
}
- dst->erase(d);
+ *dst_size = d;
return true;
}
@@ -807,23 +812,18 @@
// 4 characters. Any leftover chars are added directly for good measure.
const size_t dest_len = 3 * (slen / 4) + (slen % 4);
- strings_internal::STLStringResizeUninitialized(dest, dest_len);
-
- // We are getting the destination buffer by getting the beginning of the
- // string and converting it into a char *.
- size_t len;
- const bool ok =
- Base64UnescapeInternal(src, slen, &(*dest)[0], dest_len, unbase64, &len);
- if (!ok) {
- dest->clear();
- return false;
- }
-
- // could be shorter if there was padding
- assert(len <= dest_len);
- dest->erase(len);
-
- return true;
+ bool ok;
+ StringResizeAndOverwrite(
+ *dest, dest_len, [src, slen, unbase64, &ok](char* buf, size_t buf_size) {
+ size_t len;
+ ok = Base64UnescapeInternal(src, slen, buf, buf_size, unbase64, &len);
+ if (!ok) {
+ len = 0;
+ }
+ assert(len <= buf_size); // Could be shorter if there was padding.
+ return len;
+ });
+ return ok;
}
/* clang-format off */
@@ -878,15 +878,11 @@
}
}
-// This is a templated function so that T can be either a char* or a
-// std::string.
-template <typename T>
-void BytesToHexStringInternal(const unsigned char* absl_nullable src, T dest,
- size_t num) {
- auto dest_ptr = &dest[0];
- for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) {
+void BytesToHexStringInternal(const unsigned char* absl_nullable src,
+ char* dest, size_t num) {
+ for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest += 2) {
const char* hex_p = &numbers_internal::kHexTable[*src_ptr * 2];
- std::copy(hex_p, hex_p + 2, dest_ptr);
+ std::copy(hex_p, hex_p + 2, dest);
}
}
@@ -899,7 +895,32 @@
// ----------------------------------------------------------------------
bool CUnescape(absl::string_view source, std::string* absl_nonnull dest,
std::string* absl_nullable error) {
- return CUnescapeInternal(source, kUnescapeNulls, dest, error);
+ bool success;
+
+ // `CUnescape()` allows for in-place unescaping, which means `source` may
+ // alias `*dest`. However, absl::StringResizeAndOverwrite() invalidates
+ // invalidates all iterators, pointers, and references into the string,
+ // regardless whether reallocation occurs. Therefore we need to avoid calling
+ // absl::StringResizeAndOverwrite() when `source.data() ==
+ // dest->data()`. Comparing the sizes is sufficient to cover this case.
+ if (dest->size() >= source.size()) {
+ size_t dest_size = 0;
+ success = CUnescapeInternal(source, kUnescapeNulls, dest->data(),
+ &dest_size, error);
+ ABSL_ASSERT(dest_size <= dest->size());
+ dest->erase(dest_size);
+ } else {
+ StringResizeAndOverwrite(
+ *dest, source.size(),
+ [source, error, &success](char* buf, size_t buf_size) {
+ size_t dest_size = 0;
+ success =
+ CUnescapeInternal(source, kUnescapeNulls, buf, &dest_size, error);
+ ABSL_ASSERT(dest_size <= buf_size);
+ return dest_size;
+ });
+ }
+ return success;
}
std::string CEscape(absl::string_view src) {
@@ -966,19 +987,23 @@
return false;
}
- absl::strings_internal::STLStringResizeUninitialized(&output, num_bytes);
- auto hex_p = hex.cbegin();
- for (std::string::iterator bin_p = output.begin(); bin_p != output.end();
- ++bin_p) {
- int h1 = absl::kHexValueStrict[static_cast<size_t>(*hex_p++)];
- int h2 = absl::kHexValueStrict[static_cast<size_t>(*hex_p++)];
- if (h1 == -1 || h2 == -1) {
- output.resize(static_cast<size_t>(bin_p - output.begin()));
- return false;
- }
- *bin_p = static_cast<char>((h1 << 4) + h2);
- }
+ StringResizeAndOverwrite(
+ output, num_bytes, [hex](char* buf, size_t buf_size) {
+ auto hex_p = hex.cbegin();
+ for (size_t i = 0; i < buf_size; ++i) {
+ int h1 = absl::kHexValueStrict[static_cast<size_t>(*hex_p++)];
+ int h2 = absl::kHexValueStrict[static_cast<size_t>(*hex_p++)];
+ if (h1 == -1 || h2 == -1) {
+ return size_t{0};
+ }
+ buf[i] = static_cast<char>((h1 << 4) + h2);
+ }
+ return buf_size;
+ });
+ if (output.size() != num_bytes) {
+ return false;
+ }
*bytes = std::move(output);
return true;
}
@@ -986,16 +1011,22 @@
std::string HexStringToBytes(absl::string_view from) {
std::string result;
const auto num = from.size() / 2;
- strings_internal::STLStringResizeUninitialized(&result, num);
- absl::HexStringToBytesInternal<std::string&>(from.data(), result, num);
+ StringResizeAndOverwrite(result, num, [from](char* buf, size_t buf_size) {
+ absl::HexStringToBytesInternal<char*>(from.data(), buf, buf_size);
+ return buf_size;
+ });
return result;
}
std::string BytesToHexString(absl::string_view from) {
std::string result;
- strings_internal::STLStringResizeUninitialized(&result, 2 * from.size());
- absl::BytesToHexStringInternal<std::string&>(
- reinterpret_cast<const unsigned char*>(from.data()), result, from.size());
+ StringResizeAndOverwrite(
+ result, 2 * from.size(), [from](char* buf, size_t buf_size) {
+ absl::BytesToHexStringInternal(
+ reinterpret_cast<const unsigned char*>(from.data()), buf,
+ from.size());
+ return buf_size;
+ });
return result;
}
diff --git a/absl/strings/internal/escaping.h b/absl/strings/internal/escaping.h
index 2186f77..b71fb7e 100644
--- a/absl/strings/internal/escaping.h
+++ b/absl/strings/internal/escaping.h
@@ -17,7 +17,7 @@
#include <cassert>
-#include "absl/strings/internal/resize_uninitialized.h"
+#include "absl/strings/resize_and_overwrite.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
@@ -42,12 +42,14 @@
bool do_padding, const char* base64_chars) {
const size_t calc_escaped_size =
CalculateBase64EscapedLenInternal(szsrc, do_padding);
- STLStringResizeUninitialized(dest, calc_escaped_size);
-
- const size_t escaped_len = Base64EscapeInternal(
- src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding);
- assert(calc_escaped_size == escaped_len);
- dest->erase(escaped_len);
+ StringResizeAndOverwrite(
+ *dest, calc_escaped_size,
+ [src, szsrc, base64_chars, do_padding](char* buf, size_t buf_size) {
+ const size_t escaped_len = Base64EscapeInternal(
+ src, szsrc, buf, buf_size, base64_chars, do_padding);
+ assert(escaped_len == buf_size);
+ return escaped_len;
+ });
}
} // namespace strings_internal