| // Copyright 2024 The Abseil Authors |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include "absl/debugging/internal/decode_rust_punycode.h" |
| |
| #include <cstddef> |
| #include <cstring> |
| #include <string> |
| |
| #include "gmock/gmock.h" |
| #include "gtest/gtest.h" |
| #include "absl/base/config.h" |
| |
| namespace absl { |
| ABSL_NAMESPACE_BEGIN |
| namespace debugging_internal { |
| namespace { |
| |
| using ::testing::AllOf; |
| using ::testing::Eq; |
| using ::testing::IsNull; |
| using ::testing::Pointee; |
| using ::testing::ResultOf; |
| using ::testing::StrEq; |
| |
| class DecodeRustPunycodeTest : public ::testing::Test { |
| protected: |
| void FillBufferWithNonzeroBytes() { |
| // The choice of nonzero value to fill with is arbitrary. The point is just |
| // to fail tests if DecodeRustPunycode forgets to write the final NUL |
| // character. |
| std::memset(buffer_storage_, 0xab, sizeof(buffer_storage_)); |
| } |
| |
| DecodeRustPunycodeOptions WithAmpleSpace() { |
| FillBufferWithNonzeroBytes(); |
| |
| DecodeRustPunycodeOptions options; |
| options.punycode_begin = punycode_.data(); |
| options.punycode_end = punycode_.data() + punycode_.size(); |
| options.out_begin = buffer_storage_; |
| options.out_end = buffer_storage_ + sizeof(buffer_storage_); |
| return options; |
| } |
| |
| DecodeRustPunycodeOptions WithJustEnoughSpace() { |
| FillBufferWithNonzeroBytes(); |
| |
| const size_t begin_offset = sizeof(buffer_storage_) - plaintext_.size() - 1; |
| DecodeRustPunycodeOptions options; |
| options.punycode_begin = punycode_.data(); |
| options.punycode_end = punycode_.data() + punycode_.size(); |
| options.out_begin = buffer_storage_ + begin_offset; |
| options.out_end = buffer_storage_ + sizeof(buffer_storage_); |
| return options; |
| } |
| |
| DecodeRustPunycodeOptions WithOneByteTooFew() { |
| FillBufferWithNonzeroBytes(); |
| |
| const size_t begin_offset = sizeof(buffer_storage_) - plaintext_.size(); |
| DecodeRustPunycodeOptions options; |
| options.punycode_begin = punycode_.data(); |
| options.punycode_end = punycode_.data() + punycode_.size(); |
| options.out_begin = buffer_storage_ + begin_offset; |
| options.out_end = buffer_storage_ + sizeof(buffer_storage_); |
| return options; |
| } |
| |
| // Matches a correct return value of DecodeRustPunycode when `golden` is the |
| // expected plaintext output. |
| auto PointsToTheNulAfter(const std::string& golden) { |
| const size_t golden_size = golden.size(); |
| return AllOf( |
| Pointee(Eq('\0')), |
| ResultOf("preceding string body", |
| [golden_size](const char* p) { return p - golden_size; }, |
| StrEq(golden))); |
| } |
| |
| std::string punycode_; |
| std::string plaintext_; |
| char buffer_storage_[1024]; |
| }; |
| |
| TEST_F(DecodeRustPunycodeTest, MapsEmptyToEmpty) { |
| punycode_ = ""; |
| plaintext_ = ""; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, |
| StripsTheTrailingDelimiterFromAPureRunOfBasicChars) { |
| punycode_ = "foo_"; |
| plaintext_ = "foo"; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, TreatsTheLastUnderscoreAsTheDelimiter) { |
| punycode_ = "foo_bar_"; |
| plaintext_ = "foo_bar"; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, AcceptsALeadingUnderscoreIfNotTheDelimiter) { |
| punycode_ = "_foo_"; |
| plaintext_ = "_foo"; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, RejectsALeadingUnderscoreDelimiter) { |
| punycode_ = "_foo"; |
| |
| EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, RejectsEmbeddedNul) { |
| punycode_ = std::string("foo\0bar_", 8); |
| |
| EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, RejectsAsciiCharsOtherThanIdentifierChars) { |
| punycode_ = "foo\007_"; |
| EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); |
| |
| punycode_ = "foo-_"; |
| EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); |
| |
| punycode_ = "foo;_"; |
| EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); |
| |
| punycode_ = "foo\177_"; |
| EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, RejectsRawNonAsciiChars) { |
| punycode_ = "\x80"; |
| EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); |
| |
| punycode_ = "\x80_"; |
| EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); |
| |
| punycode_ = "\xff"; |
| EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); |
| |
| punycode_ = "\xff_"; |
| EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, RecognizesU0080) { |
| // a encodes 0, so the output is the smallest non-ASCII code point standing |
| // alone. (U+0080 PAD is not an identifier character, but DecodeRustPunycode |
| // does not check whether non-ASCII characters could belong to an identifier.) |
| punycode_ = "a"; |
| plaintext_ = "\xc2\x80"; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, OneByteDeltaSequencesMustBeA) { |
| // Because bias = 72 for the first code point, any digit but a/A is nonfinal |
| // in one of the first two bytes of a delta sequence. |
| punycode_ = "b"; |
| EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); |
| |
| punycode_ = "z"; |
| EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); |
| |
| punycode_ = "0"; |
| EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); |
| |
| punycode_ = "9"; |
| EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, AcceptsDeltaSequenceBA) { |
| punycode_ = "ba"; |
| plaintext_ = "\xc2\x81"; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, AcceptsOtherDeltaSequencesWithSecondByteA) { |
| punycode_ = "ca"; |
| plaintext_ = "\xc2\x82"; |
| EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| |
| punycode_ = "za"; |
| plaintext_ = "\xc2\x99"; |
| EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| |
| punycode_ = "0a"; |
| plaintext_ = "\xc2\x9a"; |
| EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| |
| punycode_ = "1a"; |
| plaintext_ = "\xc2\x9b"; |
| EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| |
| punycode_ = "9a"; |
| plaintext_ = "ยฃ"; // Pound sign, U+00A3 |
| EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, RejectsDeltaWhereTheSecondAndLastDigitIsNotA) { |
| punycode_ = "bb"; |
| EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); |
| |
| punycode_ = "zz"; |
| EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); |
| |
| punycode_ = "00"; |
| EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); |
| |
| punycode_ = "99"; |
| EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, AcceptsDeltasWithSecondByteBFollowedByA) { |
| punycode_ = "bba"; |
| plaintext_ = "ยค"; // U+00A4 |
| EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| |
| punycode_ = "cba"; |
| plaintext_ = "ยฅ"; // U+00A5 |
| EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| |
| punycode_ = "zba"; |
| plaintext_ = "ยผ"; // U+00BC |
| EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| |
| punycode_ = "0ba"; |
| plaintext_ = "ยฝ"; // U+00BD |
| EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| |
| punycode_ = "1ba"; |
| plaintext_ = "ยพ"; // U+00BE |
| EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| |
| punycode_ = "9ba"; |
| plaintext_ = "ร"; // U+00C6 |
| EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| } |
| |
| // Tests beyond this point use characters allowed in identifiers, so you can |
| // prepend _RNvC1cu<decimal length><underscore if [0-9_] follows> to a test |
| // input and run it through another Rust demangler to verify that the |
| // corresponding golden output is correct. |
| |
| TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAlone) { |
| punycode_ = "0ca"; |
| plaintext_ = "ร "; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharBeforeBasicChars) { |
| punycode_ = "_la_mode_yya"; |
| plaintext_ = "ร _la_mode"; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAmidBasicChars) { |
| punycode_ = "verre__vin_m4a"; |
| plaintext_ = "verre_ร _vin"; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAfterBasicChars) { |
| punycode_ = "belt_3na"; |
| plaintext_ = "beltร "; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedTwoByteChar) { |
| punycode_ = "0caaaa"; |
| plaintext_ = "ร ร ร ร "; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, AcceptsNearbyTwoByteCharsInOrder) { |
| punycode_ = "3camsuz"; |
| plaintext_ = "รฃรฉรฏรดรน"; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, AcceptsNearbyTwoByteCharsOutOfOrder) { |
| punycode_ = "3caltsx"; |
| plaintext_ = "รนรฉรดรฃรฏ"; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, AcceptsThreeByteCharAlone) { |
| punycode_ = "fiq"; |
| plaintext_ = "ไธญ"; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedThreeByteChar) { |
| punycode_ = "fiqaaaa"; |
| plaintext_ = "ไธญไธญไธญไธญไธญ"; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, AcceptsThreeByteCharsInOrder) { |
| punycode_ = "fiq228c"; |
| plaintext_ = "ไธญๆ"; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, AcceptsNearbyThreeByteCharsOutOfOrder) { |
| punycode_ = "fiq128c"; |
| plaintext_ = "ๆไธญ"; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAlone) { |
| punycode_ = "uy7h"; |
| plaintext_ = "๐ป"; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharBeforeBasicChars) { |
| punycode_ = "jack__uh63d"; |
| plaintext_ = "jack_๐ป"; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAmidBasicChars) { |
| punycode_ = "jack__of_hearts_ki37n"; |
| plaintext_ = "jack_๐ป_of_hearts"; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAfterBasicChars) { |
| punycode_ = "_of_hearts_kz45i"; |
| plaintext_ = "๐ป_of_hearts"; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedFourByteChar) { |
| punycode_ = "uy7haaaa"; |
| plaintext_ = "๐ป๐ป๐ป๐ป๐ป"; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, AcceptsNearbyFourByteCharsInOrder) { |
| punycode_ = "8x7hcjmf"; |
| plaintext_ = "๐ฆ๐ง๐ช๐ญ๐ฎ"; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, AcceptsNearbyFourByteCharsOutOfOrder) { |
| punycode_ = "8x7hcild"; |
| plaintext_ = "๐ฎ๐ฆ๐ญ๐ช๐ง"; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, AcceptsAMixtureOfByteLengths) { |
| punycode_ = "3caltsx2079ivf8aiuy7cja3a6ak"; |
| plaintext_ = "รนรฉรดรฃรฏไธญๆ๐ฎ๐ฆ๐ญ๐ช๐ง"; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| TEST_F(DecodeRustPunycodeTest, RejectsOverlargeDeltas) { |
| punycode_ = "123456789a"; |
| |
| EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); |
| } |
| |
| // Finally, we test on a few prose and poetry snippets as a defense in depth. |
| // If our artificial short test inputs did not exercise a bug that is tickled by |
| // patterns typical of real human writing, maybe real human writing will catch |
| // that. |
| // |
| // These test inputs are extracted from texts old enough to be out of copyright |
| // that probe a variety of ranges of code-point space. All are longer than 32 |
| // code points, so they exercise the carrying of seminibbles from one uint64_t |
| // to the next higher one in BoundedUtf8LengthSequence. |
| |
| // The first three lines of the Old English epic _Beowulf_, mostly ASCII with a |
| // few archaic two-byte letters interspersed. |
| TEST_F(DecodeRustPunycodeTest, Beowulf) { |
| punycode_ = "hwt_we_gardena_in_geardagum_" |
| "eodcyninga_rym_gefrunon_" |
| "hu_a_elingas_ellen_fremedon_hxg9c70do9alau"; |
| plaintext_ = "hwรฆt_we_gardena_in_geardagum_" |
| "รพeodcyninga_รพrym_gefrunon_" |
| "hu_รฐa_รฆรพelingas_ellen_fremedon"; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| // The whole of ้ๆ
ไบบ่ by the 8th-century Chinese poet ๅญๆตฉ็ถ |
| // (Meng Haoran), exercising three-byte-character processing. |
| TEST_F(DecodeRustPunycodeTest, MengHaoran) { |
| punycode_ = "gmq4ss0cfvao1e2wg8mcw8b0wkl9a7tt90a8riuvbk7t8kbv9a66ogofvzlf6" |
| "3d01ybn1u28dyqi5q2cxyyxnk5d2gx1ks9ddvfm17bk6gbsd6wftrav60u4ta"; |
| plaintext_ = "ๆ
ไบบๅ
ท้้ป" "้ๆ่ณ็ฐๅฎถ" |
| "็ถ ๆจนๆ้ๅ" "้ๅฑฑ้ญๅคๆ" |
| "้่ป้ขๅ ดๅ" "ๆ้
่ฉฑๆก้บป" |
| "ๅพ
ๅฐ้้ฝๆฅ" "้ไพๅฐฑ่่ฑ"; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| // A poem of the 8th-century Japanese poet ๅฑฑไธๆถ่ฏ (Yamanoue no Okura). |
| // Japanese mixes two-byte and three-byte characters: a good workout for codecs. |
| TEST_F(DecodeRustPunycodeTest, YamanoueNoOkura) { |
| punycode_ = "48jdaa3a6ccpepjrsmlb0q4bwcdtid8fg6c0cai9822utqeruk3om0u4f2wbp0" |
| "em23do0op23cc2ff70mb6tae8aq759gja"; |
| plaintext_ = "็้ฃใใฐ" |
| "ๅญใฉใๆใปใ" |
| "ๆ ้ฃใใฐ" |
| "ใพใใฆๅฒใฏใ" |
| "ไฝๅฆใใ" |
| "ๆฅใใใใฎใ" |
| "็ผไบคใซ" |
| "ใใจใชๆธใใฆ" |
| "ๅฎ็ ใๅฏใใฌ"; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| // The first two lines of the Phoenician-language inscription on the sarcophagus |
| // of Eshmunazar II of Sidon, 6th century BCE. Phoenician and many other |
| // archaic scripts are allocated in the Supplemental Multilingual Plane (U+10000 |
| // through U+1FFFF) and thus exercise four-byte-character processing. |
| TEST_F(DecodeRustPunycodeTest, EshmunazarSarcophagus) { |
| punycode_ = "wj9caaabaabbaaohcacxvhdc7bgxbccbdcjeacddcedcdlddbdbddcdbdcknfcee" |
| "ifel8del2a7inq9fhcpxikms7a4a9ac9ataaa0g"; |
| plaintext_ = "๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค" |
| "๐ค
๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค" |
| "๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค" |
| "๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค" |
| "๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค" |
| "๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค"; |
| |
| ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), |
| PointsToTheNulAfter(plaintext_)); |
| EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); |
| } |
| |
| } // namespace |
| } // namespace debugging_internal |
| ABSL_NAMESPACE_END |
| } // namespace absl |