misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 1 | // |
| 2 | // Copyright 2017 The Abseil Authors. |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | // you may not use this file except in compliance with the License. |
| 6 | // You may obtain a copy of the License at |
| 7 | // |
nik7273 | 38b7043 | 2019-03-08 10:27:53 -0500 | [diff] [blame] | 8 | // https://www.apache.org/licenses/LICENSE-2.0 |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 9 | // |
| 10 | // Unless required by applicable law or agreed to in writing, software |
| 11 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | // See the License for the specific language governing permissions and |
| 14 | // limitations under the License. |
| 15 | // |
| 16 | // ----------------------------------------------------------------------------- |
| 17 | // File: ascii.h |
| 18 | // ----------------------------------------------------------------------------- |
| 19 | // |
| 20 | // This package contains functions operating on characters and strings |
| 21 | // restricted to standard ASCII. These include character classification |
| 22 | // functions analogous to those found in the ANSI C Standard Library <ctype.h> |
| 23 | // header file. |
| 24 | // |
| 25 | // C++ implementations provide <ctype.h> functionality based on their |
| 26 | // C environment locale. In general, reliance on such a locale is not ideal, as |
| 27 | // the locale standard is problematic (and may not return invariant information |
| 28 | // for the same character set, for example). These `ascii_*()` functions are |
| 29 | // hard-wired for standard ASCII, much faster, and guaranteed to behave |
| 30 | // consistently. They will never be overloaded, nor will their function |
| 31 | // signature change. |
| 32 | // |
| 33 | // `ascii_isalnum()`, `ascii_isalpha()`, `ascii_isascii()`, `ascii_isblank()`, |
| 34 | // `ascii_iscntrl()`, `ascii_isdigit()`, `ascii_isgraph()`, `ascii_islower()`, |
| 35 | // `ascii_isprint()`, `ascii_ispunct()`, `ascii_isspace()`, `ascii_isupper()`, |
| 36 | // `ascii_isxdigit()` |
Bruce Mitchener | 08760ad | 2018-04-20 01:11:44 +0700 | [diff] [blame] | 37 | // Analogous to the <ctype.h> functions with similar names, these |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 38 | // functions take an unsigned char and return a bool, based on whether the |
| 39 | // character matches the condition specified. |
| 40 | // |
| 41 | // If the input character has a numerical value greater than 127, these |
| 42 | // functions return `false`. |
| 43 | // |
| 44 | // `ascii_tolower()`, `ascii_toupper()` |
Bruce Mitchener | 08760ad | 2018-04-20 01:11:44 +0700 | [diff] [blame] | 45 | // Analogous to the <ctype.h> functions with similar names, these functions |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 46 | // take an unsigned char and return a char. |
| 47 | // |
| 48 | // If the input character is not an ASCII {lower,upper}-case letter (including |
| 49 | // numerical values greater than 127) then the functions return the same value |
| 50 | // as the input character. |
| 51 | |
| 52 | #ifndef ABSL_STRINGS_ASCII_H_ |
| 53 | #define ABSL_STRINGS_ASCII_H_ |
| 54 | |
| 55 | #include <algorithm> |
Dmitri Gribenko | 0ddbfd5 | 2023-08-08 09:46:31 -0700 | [diff] [blame] | 56 | #include <cstddef> |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 57 | #include <string> |
Abseil Team | 6b8ebb3 | 2024-07-30 13:54:55 -0700 | [diff] [blame] | 58 | #include <utility> |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 59 | |
| 60 | #include "absl/base/attributes.h" |
Abseil Team | 37dd256 | 2020-01-28 11:50:11 -0800 | [diff] [blame] | 61 | #include "absl/base/config.h" |
Dmitri Gribenko | 55621d1 | 2023-12-11 09:07:07 -0800 | [diff] [blame] | 62 | #include "absl/base/nullability.h" |
Abseil Team | 5ea745c | 2024-07-23 07:08:41 -0700 | [diff] [blame] | 63 | #include "absl/strings/internal/resize_uninitialized.h" |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 64 | #include "absl/strings/string_view.h" |
| 65 | |
| 66 | namespace absl { |
Abseil Team | 12bc53e | 2019-12-12 10:36:03 -0800 | [diff] [blame] | 67 | ABSL_NAMESPACE_BEGIN |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 68 | namespace ascii_internal { |
| 69 | |
| 70 | // Declaration for an array of bitfields holding character information. |
Abseil Team | 37dd256 | 2020-01-28 11:50:11 -0800 | [diff] [blame] | 71 | ABSL_DLL extern const unsigned char kPropertyBits[256]; |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 72 | |
| 73 | // Declaration for the array of characters to upper-case characters. |
Abseil Team | 37dd256 | 2020-01-28 11:50:11 -0800 | [diff] [blame] | 74 | ABSL_DLL extern const char kToUpper[256]; |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 75 | |
| 76 | // Declaration for the array of characters to lower-case characters. |
Abseil Team | 37dd256 | 2020-01-28 11:50:11 -0800 | [diff] [blame] | 77 | ABSL_DLL extern const char kToLower[256]; |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 78 | |
Martin Brænne | f555f69 | 2024-09-12 07:56:09 -0700 | [diff] [blame] | 79 | void AsciiStrToLower(absl::Nonnull<char*> dst, absl::Nullable<const char*> src, |
Abseil Team | 5ea745c | 2024-07-23 07:08:41 -0700 | [diff] [blame] | 80 | size_t n); |
| 81 | |
Martin Brænne | f555f69 | 2024-09-12 07:56:09 -0700 | [diff] [blame] | 82 | void AsciiStrToUpper(absl::Nonnull<char*> dst, absl::Nullable<const char*> src, |
Abseil Team | 5ea745c | 2024-07-23 07:08:41 -0700 | [diff] [blame] | 83 | size_t n); |
| 84 | |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 85 | } // namespace ascii_internal |
| 86 | |
| 87 | // ascii_isalpha() |
| 88 | // |
| 89 | // Determines whether the given character is an alphabetic character. |
| 90 | inline bool ascii_isalpha(unsigned char c) { |
| 91 | return (ascii_internal::kPropertyBits[c] & 0x01) != 0; |
| 92 | } |
| 93 | |
| 94 | // ascii_isalnum() |
| 95 | // |
| 96 | // Determines whether the given character is an alphanumeric character. |
| 97 | inline bool ascii_isalnum(unsigned char c) { |
| 98 | return (ascii_internal::kPropertyBits[c] & 0x04) != 0; |
| 99 | } |
| 100 | |
| 101 | // ascii_isspace() |
| 102 | // |
| 103 | // Determines whether the given character is a whitespace character (space, |
| 104 | // tab, vertical tab, formfeed, linefeed, or carriage return). |
| 105 | inline bool ascii_isspace(unsigned char c) { |
| 106 | return (ascii_internal::kPropertyBits[c] & 0x08) != 0; |
| 107 | } |
| 108 | |
| 109 | // ascii_ispunct() |
| 110 | // |
| 111 | // Determines whether the given character is a punctuation character. |
| 112 | inline bool ascii_ispunct(unsigned char c) { |
| 113 | return (ascii_internal::kPropertyBits[c] & 0x10) != 0; |
| 114 | } |
| 115 | |
| 116 | // ascii_isblank() |
| 117 | // |
| 118 | // Determines whether the given character is a blank character (tab or space). |
| 119 | inline bool ascii_isblank(unsigned char c) { |
| 120 | return (ascii_internal::kPropertyBits[c] & 0x20) != 0; |
| 121 | } |
| 122 | |
| 123 | // ascii_iscntrl() |
| 124 | // |
| 125 | // Determines whether the given character is a control character. |
| 126 | inline bool ascii_iscntrl(unsigned char c) { |
| 127 | return (ascii_internal::kPropertyBits[c] & 0x40) != 0; |
| 128 | } |
| 129 | |
| 130 | // ascii_isxdigit() |
| 131 | // |
| 132 | // Determines whether the given character can be represented as a hexadecimal |
| 133 | // digit character (i.e. {0-9} or {A-F}). |
| 134 | inline bool ascii_isxdigit(unsigned char c) { |
| 135 | return (ascii_internal::kPropertyBits[c] & 0x80) != 0; |
| 136 | } |
| 137 | |
| 138 | // ascii_isdigit() |
| 139 | // |
| 140 | // Determines whether the given character can be represented as a decimal |
| 141 | // digit character (i.e. {0-9}). |
Abseil Team | 8c495b5 | 2024-10-17 01:25:47 -0700 | [diff] [blame] | 142 | inline constexpr bool ascii_isdigit(unsigned char c) { |
| 143 | return c >= '0' && c <= '9'; |
| 144 | } |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 145 | |
| 146 | // ascii_isprint() |
| 147 | // |
Abseil Team | 1065514 | 2021-12-13 14:47:02 -0800 | [diff] [blame] | 148 | // Determines whether the given character is printable, including spaces. |
Abseil Team | 8c495b5 | 2024-10-17 01:25:47 -0700 | [diff] [blame] | 149 | inline constexpr bool ascii_isprint(unsigned char c) { |
| 150 | return c >= 32 && c < 127; |
| 151 | } |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 152 | |
| 153 | // ascii_isgraph() |
| 154 | // |
| 155 | // Determines whether the given character has a graphical representation. |
Abseil Team | 8c495b5 | 2024-10-17 01:25:47 -0700 | [diff] [blame] | 156 | inline constexpr bool ascii_isgraph(unsigned char c) { |
| 157 | return c > 32 && c < 127; |
| 158 | } |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 159 | |
| 160 | // ascii_isupper() |
| 161 | // |
| 162 | // Determines whether the given character is uppercase. |
Abseil Team | 8c495b5 | 2024-10-17 01:25:47 -0700 | [diff] [blame] | 163 | inline constexpr bool ascii_isupper(unsigned char c) { |
| 164 | return c >= 'A' && c <= 'Z'; |
| 165 | } |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 166 | |
| 167 | // ascii_islower() |
| 168 | // |
| 169 | // Determines whether the given character is lowercase. |
Abseil Team | 8c495b5 | 2024-10-17 01:25:47 -0700 | [diff] [blame] | 170 | inline constexpr bool ascii_islower(unsigned char c) { |
| 171 | return c >= 'a' && c <= 'z'; |
| 172 | } |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 173 | |
| 174 | // ascii_isascii() |
| 175 | // |
| 176 | // Determines whether the given character is ASCII. |
Abseil Team | 8c495b5 | 2024-10-17 01:25:47 -0700 | [diff] [blame] | 177 | inline constexpr bool ascii_isascii(unsigned char c) { return c < 128; } |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 178 | |
| 179 | // ascii_tolower() |
| 180 | // |
| 181 | // Returns an ASCII character, converting to lowercase if uppercase is |
| 182 | // passed. Note that character values > 127 are simply returned. |
| 183 | inline char ascii_tolower(unsigned char c) { |
| 184 | return ascii_internal::kToLower[c]; |
| 185 | } |
| 186 | |
| 187 | // Converts the characters in `s` to lowercase, changing the contents of `s`. |
Dmitri Gribenko | 55621d1 | 2023-12-11 09:07:07 -0800 | [diff] [blame] | 188 | void AsciiStrToLower(absl::Nonnull<std::string*> s); |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 189 | |
Abseil Team | bed5bd6 | 2018-08-21 11:31:02 -0700 | [diff] [blame] | 190 | // Creates a lowercase string from a given absl::string_view. |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 191 | ABSL_MUST_USE_RESULT inline std::string AsciiStrToLower(absl::string_view s) { |
Abseil Team | 5ea745c | 2024-07-23 07:08:41 -0700 | [diff] [blame] | 192 | std::string result; |
| 193 | strings_internal::STLStringResizeUninitialized(&result, s.size()); |
| 194 | ascii_internal::AsciiStrToLower(&result[0], s.data(), s.size()); |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 195 | return result; |
| 196 | } |
| 197 | |
Abseil Team | 6b8ebb3 | 2024-07-30 13:54:55 -0700 | [diff] [blame] | 198 | // Creates a lowercase string from a given std::string&&. |
| 199 | // |
| 200 | // (Template is used to lower priority of this overload.) |
| 201 | template <int&... DoNotSpecify> |
| 202 | ABSL_MUST_USE_RESULT inline std::string AsciiStrToLower(std::string&& s) { |
| 203 | std::string result = std::move(s); |
| 204 | absl::AsciiStrToLower(&result); |
| 205 | return result; |
| 206 | } |
| 207 | |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 208 | // ascii_toupper() |
| 209 | // |
| 210 | // Returns the ASCII character, converting to upper-case if lower-case is |
| 211 | // passed. Note that characters values > 127 are simply returned. |
| 212 | inline char ascii_toupper(unsigned char c) { |
| 213 | return ascii_internal::kToUpper[c]; |
| 214 | } |
| 215 | |
| 216 | // Converts the characters in `s` to uppercase, changing the contents of `s`. |
Dmitri Gribenko | 55621d1 | 2023-12-11 09:07:07 -0800 | [diff] [blame] | 217 | void AsciiStrToUpper(absl::Nonnull<std::string*> s); |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 218 | |
Abseil Team | bed5bd6 | 2018-08-21 11:31:02 -0700 | [diff] [blame] | 219 | // Creates an uppercase string from a given absl::string_view. |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 220 | ABSL_MUST_USE_RESULT inline std::string AsciiStrToUpper(absl::string_view s) { |
Abseil Team | 5ea745c | 2024-07-23 07:08:41 -0700 | [diff] [blame] | 221 | std::string result; |
| 222 | strings_internal::STLStringResizeUninitialized(&result, s.size()); |
| 223 | ascii_internal::AsciiStrToUpper(&result[0], s.data(), s.size()); |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 224 | return result; |
| 225 | } |
| 226 | |
Abseil Team | 6b8ebb3 | 2024-07-30 13:54:55 -0700 | [diff] [blame] | 227 | // Creates an uppercase string from a given std::string&&. |
| 228 | // |
| 229 | // (Template is used to lower priority of this overload.) |
| 230 | template <int&... DoNotSpecify> |
| 231 | ABSL_MUST_USE_RESULT inline std::string AsciiStrToUpper(std::string&& s) { |
| 232 | std::string result = std::move(s); |
| 233 | absl::AsciiStrToUpper(&result); |
| 234 | return result; |
| 235 | } |
| 236 | |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 237 | // Returns absl::string_view with whitespace stripped from the beginning of the |
| 238 | // given string_view. |
| 239 | ABSL_MUST_USE_RESULT inline absl::string_view StripLeadingAsciiWhitespace( |
| 240 | absl::string_view str) { |
| 241 | auto it = std::find_if_not(str.begin(), str.end(), absl::ascii_isspace); |
Abseil Team | 628a282 | 2022-01-04 18:04:19 -0800 | [diff] [blame] | 242 | return str.substr(static_cast<size_t>(it - str.begin())); |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 243 | } |
| 244 | |
Abseil Team | bed5bd6 | 2018-08-21 11:31:02 -0700 | [diff] [blame] | 245 | // Strips in place whitespace from the beginning of the given string. |
Dmitri Gribenko | 55621d1 | 2023-12-11 09:07:07 -0800 | [diff] [blame] | 246 | inline void StripLeadingAsciiWhitespace(absl::Nonnull<std::string*> str) { |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 247 | auto it = std::find_if_not(str->begin(), str->end(), absl::ascii_isspace); |
| 248 | str->erase(str->begin(), it); |
| 249 | } |
| 250 | |
| 251 | // Returns absl::string_view with whitespace stripped from the end of the given |
| 252 | // string_view. |
| 253 | ABSL_MUST_USE_RESULT inline absl::string_view StripTrailingAsciiWhitespace( |
| 254 | absl::string_view str) { |
| 255 | auto it = std::find_if_not(str.rbegin(), str.rend(), absl::ascii_isspace); |
Abseil Team | 628a282 | 2022-01-04 18:04:19 -0800 | [diff] [blame] | 256 | return str.substr(0, static_cast<size_t>(str.rend() - it)); |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 257 | } |
| 258 | |
Abseil Team | bed5bd6 | 2018-08-21 11:31:02 -0700 | [diff] [blame] | 259 | // Strips in place whitespace from the end of the given string |
Dmitri Gribenko | 55621d1 | 2023-12-11 09:07:07 -0800 | [diff] [blame] | 260 | inline void StripTrailingAsciiWhitespace(absl::Nonnull<std::string*> str) { |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 261 | auto it = std::find_if_not(str->rbegin(), str->rend(), absl::ascii_isspace); |
Abseil Team | 628a282 | 2022-01-04 18:04:19 -0800 | [diff] [blame] | 262 | str->erase(static_cast<size_t>(str->rend() - it)); |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 263 | } |
| 264 | |
| 265 | // Returns absl::string_view with whitespace stripped from both ends of the |
| 266 | // given string_view. |
| 267 | ABSL_MUST_USE_RESULT inline absl::string_view StripAsciiWhitespace( |
| 268 | absl::string_view str) { |
| 269 | return StripTrailingAsciiWhitespace(StripLeadingAsciiWhitespace(str)); |
| 270 | } |
| 271 | |
Abseil Team | bed5bd6 | 2018-08-21 11:31:02 -0700 | [diff] [blame] | 272 | // Strips in place whitespace from both ends of the given string |
Dmitri Gribenko | 55621d1 | 2023-12-11 09:07:07 -0800 | [diff] [blame] | 273 | inline void StripAsciiWhitespace(absl::Nonnull<std::string*> str) { |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 274 | StripTrailingAsciiWhitespace(str); |
| 275 | StripLeadingAsciiWhitespace(str); |
| 276 | } |
| 277 | |
| 278 | // Removes leading, trailing, and consecutive internal whitespace. |
Dmitri Gribenko | 55621d1 | 2023-12-11 09:07:07 -0800 | [diff] [blame] | 279 | void RemoveExtraAsciiWhitespace(absl::Nonnull<std::string*> str); |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 280 | |
Abseil Team | 12bc53e | 2019-12-12 10:36:03 -0800 | [diff] [blame] | 281 | ABSL_NAMESPACE_END |
misterg | c2e7548 | 2017-09-19 16:54:40 -0400 | [diff] [blame] | 282 | } // namespace absl |
| 283 | |
| 284 | #endif // ABSL_STRINGS_ASCII_H_ |