| // Copyright 2022 The Abseil Authors |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #ifndef ABSL_CRC_INTERNAL_CRC_CORD_STATE_H_ |
| #define ABSL_CRC_INTERNAL_CRC_CORD_STATE_H_ |
| |
| #include <atomic> |
| #include <cstddef> |
| #include <deque> |
| |
| #include "absl/base/config.h" |
| #include "absl/crc/crc32c.h" |
| |
| namespace absl { |
| ABSL_NAMESPACE_BEGIN |
| namespace crc_internal { |
| |
| // CrcCordState is a copy-on-write class that holds the chunked CRC32C data |
| // that allows CrcCord to perform efficient substring operations. CrcCordState |
| // is used as a member variable in CrcCord. When a CrcCord is converted to a |
| // Cord, the CrcCordState is shallow-copied into the root node of the Cord. If |
| // the converted Cord is modified outside of CrcCord, the CrcCordState is |
| // discarded from the Cord. If the Cord is converted back to a CrcCord, and the |
| // Cord is still carrying the CrcCordState in its root node, the CrcCord can |
| // re-use the CrcCordState, making the construction of the CrcCord cheap. |
| // |
| // CrcCordState does not try to encapsulate the CRC32C state (CrcCord requires |
| // knowledge of how CrcCordState represents the CRC32C state). It does |
| // encapsulate the copy-on-write nature of the state. |
| class CrcCordState { |
| public: |
| // Constructors. |
| CrcCordState(); |
| CrcCordState(const CrcCordState&); |
| CrcCordState(CrcCordState&&); |
| |
| // Destructor. Atomically unreferences the data. |
| ~CrcCordState(); |
| |
| // Copy and move operators. |
| CrcCordState& operator=(const CrcCordState&); |
| CrcCordState& operator=(CrcCordState&&); |
| |
| // A (length, crc) pair. |
| struct PrefixCrc { |
| PrefixCrc() = default; |
| PrefixCrc(size_t length_arg, absl::crc32c_t crc_arg) |
| : length(length_arg), crc(crc_arg) {} |
| |
| size_t length = 0; |
| |
| // TODO(absl-team): Memory stomping often zeros out memory. If this struct |
| // gets overwritten, we could end up with {0, 0}, which is the correct CRC |
| // for a string of length 0. Consider storing a scrambled value and |
| // unscrambling it before verifying it. |
| absl::crc32c_t crc = absl::crc32c_t{0}; |
| }; |
| |
| // The representation of the chunked CRC32C data. |
| struct Rep { |
| // `removed_prefix` is the crc and length of any prefix that has been |
| // removed from the Cord (for example, by calling |
| // `CrcCord::RemovePrefix()`). To get the checksum of any prefix of the |
| // cord, this value must be subtracted from `prefix_crc`. See `Checksum()` |
| // for an example. |
| // |
| // CrcCordState is said to be "normalized" if removed_prefix.length == 0. |
| PrefixCrc removed_prefix; |
| |
| // A deque of (length, crc) pairs, representing length and crc of a prefix |
| // of the Cord, before removed_prefix has been subtracted. The lengths of |
| // the prefixes are stored in increasing order. If the Cord is not empty, |
| // the last value in deque is the contains the CRC32C of the entire Cord |
| // when removed_prefix is subtracted from it. |
| std::deque<PrefixCrc> prefix_crc; |
| }; |
| |
| // Returns a reference to the representation of the chunked CRC32C data. |
| const Rep& rep() const { return refcounted_rep_->rep; } |
| |
| // Returns a mutable reference to the representation of the chunked CRC32C |
| // data. Calling this function will copy the data if another instance also |
| // holds a reference to the data, so it is important to call rep() instead if |
| // the data may not be mutated. |
| Rep* mutable_rep() { |
| if (refcounted_rep_->count.load(std::memory_order_acquire) != 1) { |
| RefcountedRep* copy = new RefcountedRep; |
| copy->rep = refcounted_rep_->rep; |
| Unref(refcounted_rep_); |
| refcounted_rep_ = copy; |
| } |
| return &refcounted_rep_->rep; |
| } |
| |
| // Returns the CRC32C of the entire Cord. |
| absl::crc32c_t Checksum() const; |
| |
| // Returns true if the chunked CRC32C cached is normalized. |
| bool IsNormalized() const { return rep().removed_prefix.length == 0; } |
| |
| // Normalizes the chunked CRC32C checksum cache by subtracting any removed |
| // prefix from the chunks. |
| void Normalize(); |
| |
| // Returns the number of cached chunks. |
| size_t NumChunks() const { return rep().prefix_crc.size(); } |
| |
| // Helper that returns the (length, crc) of the `n`-th cached chunked. |
| PrefixCrc NormalizedPrefixCrcAtNthChunk(size_t n) const; |
| |
| // Poisons all chunks to so that Checksum() will likely be incorrect with high |
| // probability. |
| void Poison(); |
| |
| private: |
| struct RefcountedRep { |
| std::atomic<int32_t> count{1}; |
| Rep rep; |
| }; |
| |
| // Adds a reference to the shared global empty `RefcountedRep`, and returns a |
| // pointer to the `RefcountedRep`. This is an optimization to avoid unneeded |
| // allocations when the allocation is unlikely to ever be used. The returned |
| // pointer can be `Unref()`ed when it is no longer needed. Since the returned |
| // instance will always have a reference counter greater than 1, attempts to |
| // modify it (by calling `mutable_rep()`) will create a new unshared copy. |
| static RefcountedRep* RefSharedEmptyRep(); |
| |
| static void Ref(RefcountedRep* r) { |
| assert(r != nullptr); |
| r->count.fetch_add(1, std::memory_order_relaxed); |
| } |
| |
| static void Unref(RefcountedRep* r) { |
| assert(r != nullptr); |
| if (r->count.fetch_sub(1, std::memory_order_acq_rel) == 1) { |
| delete r; |
| } |
| } |
| |
| RefcountedRep* refcounted_rep_; |
| }; |
| |
| } // namespace crc_internal |
| ABSL_NAMESPACE_END |
| } // namespace absl |
| |
| #endif // ABSL_CRC_INTERNAL_CRC_CORD_STATE_H_ |