blob: d9317eb11330336a8fded2e7594b649ddacdd7a5 [file] [log] [blame]
mistergc2e75482017-09-19 16:54:40 -04001//
2// Copyright 2017 The Abseil Authors.
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
nik727338b70432019-03-08 10:27:53 -05008// https://www.apache.org/licenses/LICENSE-2.0
mistergc2e75482017-09-19 16:54:40 -04009//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16// -----------------------------------------------------------------------------
17// File: ascii.h
18// -----------------------------------------------------------------------------
19//
20// This package contains functions operating on characters and strings
21// restricted to standard ASCII. These include character classification
22// functions analogous to those found in the ANSI C Standard Library <ctype.h>
23// header file.
24//
25// C++ implementations provide <ctype.h> functionality based on their
26// C environment locale. In general, reliance on such a locale is not ideal, as
27// the locale standard is problematic (and may not return invariant information
28// for the same character set, for example). These `ascii_*()` functions are
29// hard-wired for standard ASCII, much faster, and guaranteed to behave
30// consistently. They will never be overloaded, nor will their function
31// signature change.
32//
33// `ascii_isalnum()`, `ascii_isalpha()`, `ascii_isascii()`, `ascii_isblank()`,
34// `ascii_iscntrl()`, `ascii_isdigit()`, `ascii_isgraph()`, `ascii_islower()`,
35// `ascii_isprint()`, `ascii_ispunct()`, `ascii_isspace()`, `ascii_isupper()`,
36// `ascii_isxdigit()`
Bruce Mitchener08760ad2018-04-20 01:11:44 +070037// Analogous to the <ctype.h> functions with similar names, these
mistergc2e75482017-09-19 16:54:40 -040038// functions take an unsigned char and return a bool, based on whether the
39// character matches the condition specified.
40//
41// If the input character has a numerical value greater than 127, these
42// functions return `false`.
43//
44// `ascii_tolower()`, `ascii_toupper()`
Bruce Mitchener08760ad2018-04-20 01:11:44 +070045// Analogous to the <ctype.h> functions with similar names, these functions
mistergc2e75482017-09-19 16:54:40 -040046// take an unsigned char and return a char.
47//
48// If the input character is not an ASCII {lower,upper}-case letter (including
49// numerical values greater than 127) then the functions return the same value
50// as the input character.
51
52#ifndef ABSL_STRINGS_ASCII_H_
53#define ABSL_STRINGS_ASCII_H_
54
55#include <algorithm>
Dmitri Gribenko0ddbfd52023-08-08 09:46:31 -070056#include <cstddef>
mistergc2e75482017-09-19 16:54:40 -040057#include <string>
Abseil Team6b8ebb32024-07-30 13:54:55 -070058#include <utility>
mistergc2e75482017-09-19 16:54:40 -040059
60#include "absl/base/attributes.h"
Abseil Team37dd2562020-01-28 11:50:11 -080061#include "absl/base/config.h"
Dmitri Gribenko55621d12023-12-11 09:07:07 -080062#include "absl/base/nullability.h"
Abseil Team5ea745c2024-07-23 07:08:41 -070063#include "absl/strings/internal/resize_uninitialized.h"
mistergc2e75482017-09-19 16:54:40 -040064#include "absl/strings/string_view.h"
65
66namespace absl {
Abseil Team12bc53e2019-12-12 10:36:03 -080067ABSL_NAMESPACE_BEGIN
mistergc2e75482017-09-19 16:54:40 -040068namespace ascii_internal {
69
70// Declaration for an array of bitfields holding character information.
Abseil Team37dd2562020-01-28 11:50:11 -080071ABSL_DLL extern const unsigned char kPropertyBits[256];
mistergc2e75482017-09-19 16:54:40 -040072
73// Declaration for the array of characters to upper-case characters.
Abseil Team37dd2562020-01-28 11:50:11 -080074ABSL_DLL extern const char kToUpper[256];
mistergc2e75482017-09-19 16:54:40 -040075
76// Declaration for the array of characters to lower-case characters.
Abseil Team37dd2562020-01-28 11:50:11 -080077ABSL_DLL extern const char kToLower[256];
mistergc2e75482017-09-19 16:54:40 -040078
Martin Brænnef555f692024-09-12 07:56:09 -070079void AsciiStrToLower(absl::Nonnull<char*> dst, absl::Nullable<const char*> src,
Abseil Team5ea745c2024-07-23 07:08:41 -070080 size_t n);
81
Martin Brænnef555f692024-09-12 07:56:09 -070082void AsciiStrToUpper(absl::Nonnull<char*> dst, absl::Nullable<const char*> src,
Abseil Team5ea745c2024-07-23 07:08:41 -070083 size_t n);
84
mistergc2e75482017-09-19 16:54:40 -040085} // namespace ascii_internal
86
87// ascii_isalpha()
88//
89// Determines whether the given character is an alphabetic character.
90inline bool ascii_isalpha(unsigned char c) {
91 return (ascii_internal::kPropertyBits[c] & 0x01) != 0;
92}
93
94// ascii_isalnum()
95//
96// Determines whether the given character is an alphanumeric character.
97inline bool ascii_isalnum(unsigned char c) {
98 return (ascii_internal::kPropertyBits[c] & 0x04) != 0;
99}
100
101// ascii_isspace()
102//
103// Determines whether the given character is a whitespace character (space,
104// tab, vertical tab, formfeed, linefeed, or carriage return).
105inline bool ascii_isspace(unsigned char c) {
106 return (ascii_internal::kPropertyBits[c] & 0x08) != 0;
107}
108
109// ascii_ispunct()
110//
111// Determines whether the given character is a punctuation character.
112inline bool ascii_ispunct(unsigned char c) {
113 return (ascii_internal::kPropertyBits[c] & 0x10) != 0;
114}
115
116// ascii_isblank()
117//
118// Determines whether the given character is a blank character (tab or space).
119inline bool ascii_isblank(unsigned char c) {
120 return (ascii_internal::kPropertyBits[c] & 0x20) != 0;
121}
122
123// ascii_iscntrl()
124//
125// Determines whether the given character is a control character.
126inline bool ascii_iscntrl(unsigned char c) {
127 return (ascii_internal::kPropertyBits[c] & 0x40) != 0;
128}
129
130// ascii_isxdigit()
131//
132// Determines whether the given character can be represented as a hexadecimal
133// digit character (i.e. {0-9} or {A-F}).
134inline bool ascii_isxdigit(unsigned char c) {
135 return (ascii_internal::kPropertyBits[c] & 0x80) != 0;
136}
137
138// ascii_isdigit()
139//
140// Determines whether the given character can be represented as a decimal
141// digit character (i.e. {0-9}).
Abseil Team8c495b52024-10-17 01:25:47 -0700142inline constexpr bool ascii_isdigit(unsigned char c) {
143 return c >= '0' && c <= '9';
144}
mistergc2e75482017-09-19 16:54:40 -0400145
146// ascii_isprint()
147//
Abseil Team10655142021-12-13 14:47:02 -0800148// Determines whether the given character is printable, including spaces.
Abseil Team8c495b52024-10-17 01:25:47 -0700149inline constexpr bool ascii_isprint(unsigned char c) {
150 return c >= 32 && c < 127;
151}
mistergc2e75482017-09-19 16:54:40 -0400152
153// ascii_isgraph()
154//
155// Determines whether the given character has a graphical representation.
Abseil Team8c495b52024-10-17 01:25:47 -0700156inline constexpr bool ascii_isgraph(unsigned char c) {
157 return c > 32 && c < 127;
158}
mistergc2e75482017-09-19 16:54:40 -0400159
160// ascii_isupper()
161//
162// Determines whether the given character is uppercase.
Abseil Team8c495b52024-10-17 01:25:47 -0700163inline constexpr bool ascii_isupper(unsigned char c) {
164 return c >= 'A' && c <= 'Z';
165}
mistergc2e75482017-09-19 16:54:40 -0400166
167// ascii_islower()
168//
169// Determines whether the given character is lowercase.
Abseil Team8c495b52024-10-17 01:25:47 -0700170inline constexpr bool ascii_islower(unsigned char c) {
171 return c >= 'a' && c <= 'z';
172}
mistergc2e75482017-09-19 16:54:40 -0400173
174// ascii_isascii()
175//
176// Determines whether the given character is ASCII.
Abseil Team8c495b52024-10-17 01:25:47 -0700177inline constexpr bool ascii_isascii(unsigned char c) { return c < 128; }
mistergc2e75482017-09-19 16:54:40 -0400178
179// ascii_tolower()
180//
181// Returns an ASCII character, converting to lowercase if uppercase is
182// passed. Note that character values > 127 are simply returned.
183inline char ascii_tolower(unsigned char c) {
184 return ascii_internal::kToLower[c];
185}
186
187// Converts the characters in `s` to lowercase, changing the contents of `s`.
Dmitri Gribenko55621d12023-12-11 09:07:07 -0800188void AsciiStrToLower(absl::Nonnull<std::string*> s);
mistergc2e75482017-09-19 16:54:40 -0400189
Abseil Teambed5bd62018-08-21 11:31:02 -0700190// Creates a lowercase string from a given absl::string_view.
mistergc2e75482017-09-19 16:54:40 -0400191ABSL_MUST_USE_RESULT inline std::string AsciiStrToLower(absl::string_view s) {
Abseil Team5ea745c2024-07-23 07:08:41 -0700192 std::string result;
193 strings_internal::STLStringResizeUninitialized(&result, s.size());
194 ascii_internal::AsciiStrToLower(&result[0], s.data(), s.size());
mistergc2e75482017-09-19 16:54:40 -0400195 return result;
196}
197
Abseil Team6b8ebb32024-07-30 13:54:55 -0700198// Creates a lowercase string from a given std::string&&.
199//
200// (Template is used to lower priority of this overload.)
201template <int&... DoNotSpecify>
202ABSL_MUST_USE_RESULT inline std::string AsciiStrToLower(std::string&& s) {
203 std::string result = std::move(s);
204 absl::AsciiStrToLower(&result);
205 return result;
206}
207
mistergc2e75482017-09-19 16:54:40 -0400208// ascii_toupper()
209//
210// Returns the ASCII character, converting to upper-case if lower-case is
211// passed. Note that characters values > 127 are simply returned.
212inline char ascii_toupper(unsigned char c) {
213 return ascii_internal::kToUpper[c];
214}
215
216// Converts the characters in `s` to uppercase, changing the contents of `s`.
Dmitri Gribenko55621d12023-12-11 09:07:07 -0800217void AsciiStrToUpper(absl::Nonnull<std::string*> s);
mistergc2e75482017-09-19 16:54:40 -0400218
Abseil Teambed5bd62018-08-21 11:31:02 -0700219// Creates an uppercase string from a given absl::string_view.
mistergc2e75482017-09-19 16:54:40 -0400220ABSL_MUST_USE_RESULT inline std::string AsciiStrToUpper(absl::string_view s) {
Abseil Team5ea745c2024-07-23 07:08:41 -0700221 std::string result;
222 strings_internal::STLStringResizeUninitialized(&result, s.size());
223 ascii_internal::AsciiStrToUpper(&result[0], s.data(), s.size());
mistergc2e75482017-09-19 16:54:40 -0400224 return result;
225}
226
Abseil Team6b8ebb32024-07-30 13:54:55 -0700227// Creates an uppercase string from a given std::string&&.
228//
229// (Template is used to lower priority of this overload.)
230template <int&... DoNotSpecify>
231ABSL_MUST_USE_RESULT inline std::string AsciiStrToUpper(std::string&& s) {
232 std::string result = std::move(s);
233 absl::AsciiStrToUpper(&result);
234 return result;
235}
236
mistergc2e75482017-09-19 16:54:40 -0400237// Returns absl::string_view with whitespace stripped from the beginning of the
238// given string_view.
239ABSL_MUST_USE_RESULT inline absl::string_view StripLeadingAsciiWhitespace(
240 absl::string_view str) {
241 auto it = std::find_if_not(str.begin(), str.end(), absl::ascii_isspace);
Abseil Team628a2822022-01-04 18:04:19 -0800242 return str.substr(static_cast<size_t>(it - str.begin()));
mistergc2e75482017-09-19 16:54:40 -0400243}
244
Abseil Teambed5bd62018-08-21 11:31:02 -0700245// Strips in place whitespace from the beginning of the given string.
Dmitri Gribenko55621d12023-12-11 09:07:07 -0800246inline void StripLeadingAsciiWhitespace(absl::Nonnull<std::string*> str) {
mistergc2e75482017-09-19 16:54:40 -0400247 auto it = std::find_if_not(str->begin(), str->end(), absl::ascii_isspace);
248 str->erase(str->begin(), it);
249}
250
251// Returns absl::string_view with whitespace stripped from the end of the given
252// string_view.
253ABSL_MUST_USE_RESULT inline absl::string_view StripTrailingAsciiWhitespace(
254 absl::string_view str) {
255 auto it = std::find_if_not(str.rbegin(), str.rend(), absl::ascii_isspace);
Abseil Team628a2822022-01-04 18:04:19 -0800256 return str.substr(0, static_cast<size_t>(str.rend() - it));
mistergc2e75482017-09-19 16:54:40 -0400257}
258
Abseil Teambed5bd62018-08-21 11:31:02 -0700259// Strips in place whitespace from the end of the given string
Dmitri Gribenko55621d12023-12-11 09:07:07 -0800260inline void StripTrailingAsciiWhitespace(absl::Nonnull<std::string*> str) {
mistergc2e75482017-09-19 16:54:40 -0400261 auto it = std::find_if_not(str->rbegin(), str->rend(), absl::ascii_isspace);
Abseil Team628a2822022-01-04 18:04:19 -0800262 str->erase(static_cast<size_t>(str->rend() - it));
mistergc2e75482017-09-19 16:54:40 -0400263}
264
265// Returns absl::string_view with whitespace stripped from both ends of the
266// given string_view.
267ABSL_MUST_USE_RESULT inline absl::string_view StripAsciiWhitespace(
268 absl::string_view str) {
269 return StripTrailingAsciiWhitespace(StripLeadingAsciiWhitespace(str));
270}
271
Abseil Teambed5bd62018-08-21 11:31:02 -0700272// Strips in place whitespace from both ends of the given string
Dmitri Gribenko55621d12023-12-11 09:07:07 -0800273inline void StripAsciiWhitespace(absl::Nonnull<std::string*> str) {
mistergc2e75482017-09-19 16:54:40 -0400274 StripTrailingAsciiWhitespace(str);
275 StripLeadingAsciiWhitespace(str);
276}
277
278// Removes leading, trailing, and consecutive internal whitespace.
Dmitri Gribenko55621d12023-12-11 09:07:07 -0800279void RemoveExtraAsciiWhitespace(absl::Nonnull<std::string*> str);
mistergc2e75482017-09-19 16:54:40 -0400280
Abseil Team12bc53e2019-12-12 10:36:03 -0800281ABSL_NAMESPACE_END
mistergc2e75482017-09-19 16:54:40 -0400282} // namespace absl
283
284#endif // ABSL_STRINGS_ASCII_H_