Performance improvement for absl::AsciiStrToUpper() and absl::AsciiStrToLower()

PiperOrigin-RevId: 516275043
Change-Id: I906ef0d96dddf12e3738490bd26cb05753ec008c
diff --git a/absl/strings/ascii.cc b/absl/strings/ascii.cc
index 868df2d..16c9689 100644
--- a/absl/strings/ascii.cc
+++ b/absl/strings/ascii.cc
@@ -14,6 +14,10 @@
 
 #include "absl/strings/ascii.h"
 
+#include <climits>
+#include <cstring>
+#include <string>
+
 namespace absl {
 ABSL_NAMESPACE_BEGIN
 namespace ascii_internal {
@@ -153,18 +157,62 @@
 };
 // clang-format on
 
-}  // namespace ascii_internal
+template <bool ToUpper>
+constexpr void AsciiStrCaseFold(char* p, char* end) {
+  // The upper- and lowercase versions of ASCII characters differ by only 1 bit.
+  // When we need to flip the case, we can xor with this bit to achieve the
+  // desired result. Note that the choice of 'a' and 'A' here is arbitrary. We
+  // could have chosen 'z' and 'Z', or any other pair of characters as they all
+  // have the same single bit difference.
+  constexpr unsigned char kAsciiCaseBitFlip = 'a' ^ 'A';
 
-void AsciiStrToLower(std::string* s) {
-  for (auto& ch : *s) {
-    ch = absl::ascii_tolower(static_cast<unsigned char>(ch));
+  constexpr char ch_a = ToUpper ? 'a' : 'A';
+  constexpr char ch_z = ToUpper ? 'z' : 'Z';
+  for (; p < end; ++p) {
+    unsigned char v = static_cast<unsigned char>(*p);
+    // We use & instead of && to ensure this always stays branchless
+    // We use static_cast<int> to suppress -Wbitwise-instead-of-logical
+    bool is_in_range = static_cast<bool>(static_cast<int>(ch_a <= v) &
+                                         static_cast<int>(v <= ch_z));
+    v ^= is_in_range ? kAsciiCaseBitFlip : 0;
+    *p = static_cast<char>(v);
   }
 }
 
-void AsciiStrToUpper(std::string* s) {
-  for (auto& ch : *s) {
-    ch = absl::ascii_toupper(static_cast<unsigned char>(ch));
+static constexpr size_t ValidateAsciiCasefold() {
+  constexpr size_t num_chars = 1 + CHAR_MAX - CHAR_MIN;
+  size_t incorrect_index = 0;
+  char lowered[num_chars] = {};
+  char uppered[num_chars] = {};
+  for (unsigned int i = 0; i < num_chars; ++i) {
+    uppered[i] = lowered[i] = static_cast<char>(i);
   }
+  AsciiStrCaseFold<false>(&lowered[0], &lowered[num_chars]);
+  AsciiStrCaseFold<true>(&uppered[0], &uppered[num_chars]);
+  for (size_t i = 0; i < num_chars; ++i) {
+    const char ch = static_cast<char>(i),
+               ch_upper = ('a' <= ch && ch <= 'z' ? 'A' + (ch - 'a') : ch),
+               ch_lower = ('A' <= ch && ch <= 'Z' ? 'a' + (ch - 'A') : ch);
+    if (uppered[i] != ch_upper || lowered[i] != ch_lower) {
+      incorrect_index = i > 0 ? i : num_chars;
+      break;
+    }
+  }
+  return incorrect_index;
+}
+
+static_assert(ValidateAsciiCasefold() == 0, "error in case conversion");
+
+}  // namespace ascii_internal
+
+void AsciiStrToLower(std::string* s) {
+  char* p = &(*s)[0];  // Guaranteed to be valid for empty strings
+  return ascii_internal::AsciiStrCaseFold<false>(p, p + s->size());
+}
+
+void AsciiStrToUpper(std::string* s) {
+  char* p = &(*s)[0];  // Guaranteed to be valid for empty strings
+  return ascii_internal::AsciiStrCaseFold<true>(p, p + s->size());
 }
 
 void RemoveExtraAsciiWhitespace(std::string* str) {
diff --git a/absl/strings/ascii_test.cc b/absl/strings/ascii_test.cc
index dfed114..4ea262f 100644
--- a/absl/strings/ascii_test.cc
+++ b/absl/strings/ascii_test.cc
@@ -14,6 +14,7 @@
 
 #include "absl/strings/ascii.h"
 
+#include <algorithm>
 #include <cctype>
 #include <clocale>
 #include <cstring>
@@ -189,14 +190,14 @@
   const std::string str("GHIJKL");
   const std::string str2("MNOPQR");
   const absl::string_view sp(str2);
-  std::string mutable_str("STUVWX");
+  std::string mutable_str("_`?@[{AMNOPQRSTUVWXYZ");
 
   EXPECT_EQ("abcdef", absl::AsciiStrToLower(buf));
   EXPECT_EQ("ghijkl", absl::AsciiStrToLower(str));
   EXPECT_EQ("mnopqr", absl::AsciiStrToLower(sp));
 
   absl::AsciiStrToLower(&mutable_str);
-  EXPECT_EQ("stuvwx", mutable_str);
+  EXPECT_EQ("_`?@[{amnopqrstuvwxyz", mutable_str);
 
   char mutable_buf[] = "Mutable";
   std::transform(mutable_buf, mutable_buf + strlen(mutable_buf),
@@ -207,12 +208,12 @@
 TEST(AsciiStrTo, Upper) {
   const char buf[] = "abcdef";
   const std::string str("ghijkl");
-  const std::string str2("mnopqr");
+  const std::string str2("_`?@[{amnopqrstuvwxyz");
   const absl::string_view sp(str2);
 
   EXPECT_EQ("ABCDEF", absl::AsciiStrToUpper(buf));
   EXPECT_EQ("GHIJKL", absl::AsciiStrToUpper(str));
-  EXPECT_EQ("MNOPQR", absl::AsciiStrToUpper(sp));
+  EXPECT_EQ("_`?@[{AMNOPQRSTUVWXYZ", absl::AsciiStrToUpper(sp));
 
   char mutable_buf[] = "Mutable";
   std::transform(mutable_buf, mutable_buf + strlen(mutable_buf),