Use the new absl::StringResizeAndOverwrite() in string escaping utilities PiperOrigin-RevId: 823159114 Change-Id: I94ccf42464a5fee6233b15dc8d44503b0b3f9db2
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index 1ecc069..105133f 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel
@@ -135,6 +135,7 @@ copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, deps = [ + ":resize_and_overwrite", "//absl/base:config", "//absl/base:core_headers", "//absl/base:endian",
diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index da44ef7..ccb7233 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt
@@ -139,6 +139,7 @@ absl::core_headers absl::endian absl::raw_logging_internal + absl::strings_resize_and_overwrite absl::type_traits )
diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc index e551c66..308c472 100644 --- a/absl/strings/escaping.cc +++ b/absl/strings/escaping.cc
@@ -28,6 +28,7 @@ #include "absl/base/internal/endian.h" #include "absl/base/internal/raw_logging.h" #include "absl/base/internal/unaligned_access.h" +#include "absl/base/macros.h" #include "absl/base/nullability.h" #include "absl/strings/ascii.h" #include "absl/strings/charset.h" @@ -35,6 +36,7 @@ #include "absl/strings/internal/resize_uninitialized.h" #include "absl/strings/internal/utf8.h" #include "absl/strings/numbers.h" +#include "absl/strings/resize_and_overwrite.h" #include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" @@ -76,30 +78,31 @@ // // Unescapes C escape sequences and is the reverse of CEscape(). // -// If `src` is valid, stores the unescaped string `dst`, and returns -// true. Otherwise returns false and optionally stores the error -// description in `error`. Set `error` to nullptr to disable error -// reporting. +// If `src` is valid, stores the unescaped string in `dst` and the length of +// unescaped string in `dst_size`, and returns true. Otherwise returns false +// and optionally stores the error description in `error`. Set `error` to +// nullptr to disable error reporting. // -// `src` and `dst` may use the same underlying buffer. +// `src` and `dst` may use the same underlying buffer (but keep in mind +// that if this returns an error, it will leave both `src` and `dst` in +// an unspecified state because they are using the same underlying buffer.) +// `dst` must have at least as much space as `src`. // ---------------------------------------------------------------------- bool CUnescapeInternal(absl::string_view src, bool leave_nulls_escaped, - std::string* absl_nonnull dst, + char* absl_nonnull dst, size_t* absl_nonnull dst_size, std::string* absl_nullable error) { - strings_internal::STLStringResizeUninitialized(dst, src.size()); - absl::string_view::size_type p = 0; // Current src position. - std::string::size_type d = 0; // Current dst position. + size_t d = 0; // Current dst position. // When unescaping in-place, skip any prefix that does not have escaping. - if (src.data() == dst->data()) { + if (src.data() == dst) { while (p < src.size() && src[p] != '\\') p++, d++; } while (p < src.size()) { if (src[p] != '\\') { - (*dst)[d++] = src[p++]; + dst[d++] = src[p++]; } else { if (++p >= src.size()) { // skip past the '\\' if (error != nullptr) { @@ -108,17 +111,19 @@ return false; } switch (src[p]) { - case 'a': (*dst)[d++] = '\a'; break; - case 'b': (*dst)[d++] = '\b'; break; - case 'f': (*dst)[d++] = '\f'; break; - case 'n': (*dst)[d++] = '\n'; break; - case 'r': (*dst)[d++] = '\r'; break; - case 't': (*dst)[d++] = '\t'; break; - case 'v': (*dst)[d++] = '\v'; break; - case '\\': (*dst)[d++] = '\\'; break; - case '?': (*dst)[d++] = '\?'; break; - case '\'': (*dst)[d++] = '\''; break; - case '"': (*dst)[d++] = '\"'; break; + // clang-format off + case 'a': dst[d++] = '\a'; break; + case 'b': dst[d++] = '\b'; break; + case 'f': dst[d++] = '\f'; break; + case 'n': dst[d++] = '\n'; break; + case 'r': dst[d++] = '\r'; break; + case 't': dst[d++] = '\t'; break; + case 'v': dst[d++] = '\v'; break; + case '\\': dst[d++] = '\\'; break; + case '?': dst[d++] = '\?'; break; + case '\'': dst[d++] = '\''; break; + case '"': dst[d++] = '\"'; break; + // clang-format on case '0': case '1': case '2': @@ -145,13 +150,13 @@ } if ((ch == 0) && leave_nulls_escaped) { // Copy the escape sequence for the null character - (*dst)[d++] = '\\'; + dst[d++] = '\\'; while (octal_start <= p) { - (*dst)[d++] = src[octal_start++]; + dst[d++] = src[octal_start++]; } break; } - (*dst)[d++] = static_cast<char>(ch); + dst[d++] = static_cast<char>(ch); break; } case 'x': @@ -185,13 +190,13 @@ } if ((ch == 0) && leave_nulls_escaped) { // Copy the escape sequence for the null character - (*dst)[d++] = '\\'; + dst[d++] = '\\'; while (hex_start <= p) { - (*dst)[d++] = src[hex_start++]; + dst[d++] = src[hex_start++]; } break; } - (*dst)[d++] = static_cast<char>(ch); + dst[d++] = static_cast<char>(ch); break; } case 'u': { @@ -218,16 +223,16 @@ } if ((rune == 0) && leave_nulls_escaped) { // Copy the escape sequence for the null character - (*dst)[d++] = '\\'; + dst[d++] = '\\'; while (hex_start <= p) { - (*dst)[d++] = src[hex_start++]; + dst[d++] = src[hex_start++]; } break; } if (IsSurrogate(rune, src.substr(hex_start, 5), error)) { return false; } - d += strings_internal::EncodeUTF8Char(dst->data() + d, rune); + d += strings_internal::EncodeUTF8Char(dst + d, rune); break; } case 'U': { @@ -267,17 +272,17 @@ } if ((rune == 0) && leave_nulls_escaped) { // Copy the escape sequence for the null character - (*dst)[d++] = '\\'; + dst[d++] = '\\'; // U00000000 while (hex_start <= p) { - (*dst)[d++] = src[hex_start++]; + dst[d++] = src[hex_start++]; } break; } if (IsSurrogate(rune, src.substr(hex_start, 9), error)) { return false; } - d += strings_internal::EncodeUTF8Char(dst->data() + d, rune); + d += strings_internal::EncodeUTF8Char(dst + d, rune); break; } default: { @@ -291,7 +296,7 @@ } } - dst->erase(d); + *dst_size = d; return true; } @@ -807,23 +812,18 @@ // 4 characters. Any leftover chars are added directly for good measure. const size_t dest_len = 3 * (slen / 4) + (slen % 4); - strings_internal::STLStringResizeUninitialized(dest, dest_len); - - // We are getting the destination buffer by getting the beginning of the - // string and converting it into a char *. - size_t len; - const bool ok = - Base64UnescapeInternal(src, slen, &(*dest)[0], dest_len, unbase64, &len); - if (!ok) { - dest->clear(); - return false; - } - - // could be shorter if there was padding - assert(len <= dest_len); - dest->erase(len); - - return true; + bool ok; + StringResizeAndOverwrite( + *dest, dest_len, [src, slen, unbase64, &ok](char* buf, size_t buf_size) { + size_t len; + ok = Base64UnescapeInternal(src, slen, buf, buf_size, unbase64, &len); + if (!ok) { + len = 0; + } + assert(len <= buf_size); // Could be shorter if there was padding. + return len; + }); + return ok; } /* clang-format off */ @@ -878,15 +878,11 @@ } } -// This is a templated function so that T can be either a char* or a -// std::string. -template <typename T> -void BytesToHexStringInternal(const unsigned char* absl_nullable src, T dest, - size_t num) { - auto dest_ptr = &dest[0]; - for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) { +void BytesToHexStringInternal(const unsigned char* absl_nullable src, + char* dest, size_t num) { + for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest += 2) { const char* hex_p = &numbers_internal::kHexTable[*src_ptr * 2]; - std::copy(hex_p, hex_p + 2, dest_ptr); + std::copy(hex_p, hex_p + 2, dest); } } @@ -899,7 +895,32 @@ // ---------------------------------------------------------------------- bool CUnescape(absl::string_view source, std::string* absl_nonnull dest, std::string* absl_nullable error) { - return CUnescapeInternal(source, kUnescapeNulls, dest, error); + bool success; + + // `CUnescape()` allows for in-place unescaping, which means `source` may + // alias `*dest`. However, absl::StringResizeAndOverwrite() invalidates + // invalidates all iterators, pointers, and references into the string, + // regardless whether reallocation occurs. Therefore we need to avoid calling + // absl::StringResizeAndOverwrite() when `source.data() == + // dest->data()`. Comparing the sizes is sufficient to cover this case. + if (dest->size() >= source.size()) { + size_t dest_size = 0; + success = CUnescapeInternal(source, kUnescapeNulls, dest->data(), + &dest_size, error); + ABSL_ASSERT(dest_size <= dest->size()); + dest->erase(dest_size); + } else { + StringResizeAndOverwrite( + *dest, source.size(), + [source, error, &success](char* buf, size_t buf_size) { + size_t dest_size = 0; + success = + CUnescapeInternal(source, kUnescapeNulls, buf, &dest_size, error); + ABSL_ASSERT(dest_size <= buf_size); + return dest_size; + }); + } + return success; } std::string CEscape(absl::string_view src) { @@ -966,19 +987,23 @@ return false; } - absl::strings_internal::STLStringResizeUninitialized(&output, num_bytes); - auto hex_p = hex.cbegin(); - for (std::string::iterator bin_p = output.begin(); bin_p != output.end(); - ++bin_p) { - int h1 = absl::kHexValueStrict[static_cast<size_t>(*hex_p++)]; - int h2 = absl::kHexValueStrict[static_cast<size_t>(*hex_p++)]; - if (h1 == -1 || h2 == -1) { - output.resize(static_cast<size_t>(bin_p - output.begin())); - return false; - } - *bin_p = static_cast<char>((h1 << 4) + h2); - } + StringResizeAndOverwrite( + output, num_bytes, [hex](char* buf, size_t buf_size) { + auto hex_p = hex.cbegin(); + for (size_t i = 0; i < buf_size; ++i) { + int h1 = absl::kHexValueStrict[static_cast<size_t>(*hex_p++)]; + int h2 = absl::kHexValueStrict[static_cast<size_t>(*hex_p++)]; + if (h1 == -1 || h2 == -1) { + return size_t{0}; + } + buf[i] = static_cast<char>((h1 << 4) + h2); + } + return buf_size; + }); + if (output.size() != num_bytes) { + return false; + } *bytes = std::move(output); return true; } @@ -986,16 +1011,22 @@ std::string HexStringToBytes(absl::string_view from) { std::string result; const auto num = from.size() / 2; - strings_internal::STLStringResizeUninitialized(&result, num); - absl::HexStringToBytesInternal<std::string&>(from.data(), result, num); + StringResizeAndOverwrite(result, num, [from](char* buf, size_t buf_size) { + absl::HexStringToBytesInternal<char*>(from.data(), buf, buf_size); + return buf_size; + }); return result; } std::string BytesToHexString(absl::string_view from) { std::string result; - strings_internal::STLStringResizeUninitialized(&result, 2 * from.size()); - absl::BytesToHexStringInternal<std::string&>( - reinterpret_cast<const unsigned char*>(from.data()), result, from.size()); + StringResizeAndOverwrite( + result, 2 * from.size(), [from](char* buf, size_t buf_size) { + absl::BytesToHexStringInternal( + reinterpret_cast<const unsigned char*>(from.data()), buf, + from.size()); + return buf_size; + }); return result; }
diff --git a/absl/strings/internal/escaping.h b/absl/strings/internal/escaping.h index 2186f77..b71fb7e 100644 --- a/absl/strings/internal/escaping.h +++ b/absl/strings/internal/escaping.h
@@ -17,7 +17,7 @@ #include <cassert> -#include "absl/strings/internal/resize_uninitialized.h" +#include "absl/strings/resize_and_overwrite.h" namespace absl { ABSL_NAMESPACE_BEGIN @@ -42,12 +42,14 @@ bool do_padding, const char* base64_chars) { const size_t calc_escaped_size = CalculateBase64EscapedLenInternal(szsrc, do_padding); - STLStringResizeUninitialized(dest, calc_escaped_size); - - const size_t escaped_len = Base64EscapeInternal( - src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding); - assert(calc_escaped_size == escaped_len); - dest->erase(escaped_len); + StringResizeAndOverwrite( + *dest, calc_escaped_size, + [src, szsrc, base64_chars, do_padding](char* buf, size_t buf_size) { + const size_t escaped_len = Base64EscapeInternal( + src, szsrc, buf, buf_size, base64_chars, do_padding); + assert(escaped_len == buf_size); + return escaped_len; + }); } } // namespace strings_internal