blob: cacbf3da30b6fbe56fa1a370e3a6a7900c276616 [file] [log] [blame]
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cord_rep_btree.h"
#include <cassert>
#include <cstdint>
#include <iostream>
#include <string>
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_consume.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
#ifdef ABSL_INTERNAL_NEED_REDUNDANT_CONSTEXPR_DECL
constexpr size_t CordRepBtree::kMaxCapacity;
#endif
namespace {
using NodeStack = CordRepBtree * [CordRepBtree::kMaxDepth];
using EdgeType = CordRepBtree::EdgeType;
using OpResult = CordRepBtree::OpResult;
using CopyResult = CordRepBtree::CopyResult;
constexpr auto kFront = CordRepBtree::kFront;
constexpr auto kBack = CordRepBtree::kBack;
inline bool exhaustive_validation() {
return cord_btree_exhaustive_validation.load(std::memory_order_relaxed);
}
// Implementation of the various 'Dump' functions.
// Prints the entire tree structure or 'rep'. External callers should
// not specify 'depth' and leave it to its default (0) value.
// Rep may be a CordRepBtree tree, or a SUBSTRING / EXTERNAL / FLAT node.
void DumpAll(const CordRep* rep, bool include_contents, std::ostream& stream,
int depth = 0) {
// Allow for full height trees + substring -> flat / external nodes.
assert(depth <= CordRepBtree::kMaxDepth + 2);
std::string sharing = const_cast<CordRep*>(rep)->refcount.IsOne()
? std::string("Private")
: absl::StrCat("Shared(", rep->refcount.Get(), ")");
std::string sptr = absl::StrCat("0x", absl::Hex(rep));
// Dumps the data contents of `rep` if `include_contents` is true.
// Always emits a new line character.
auto maybe_dump_data = [&stream, include_contents](const CordRep* r) {
if (include_contents) {
// Allow for up to 60 wide display of content data, which with some
// indentation and prefix / labels keeps us within roughly 80-100 wide.
constexpr size_t kMaxDataLength = 60;
stream << ", data = \""
<< EdgeData(r).substr(0, kMaxDataLength)
<< (r->length > kMaxDataLength ? "\"..." : "\"");
}
stream << '\n';
};
// For each level, we print the 'shared/private' state and the rep pointer,
// indented by two spaces per recursive depth.
stream << std::string(depth * 2, ' ') << sharing << " (" << sptr << ") ";
if (rep->IsBtree()) {
const CordRepBtree* node = rep->btree();
std::string label =
node->height() ? absl::StrCat("Node(", node->height(), ")") : "Leaf";
stream << label << ", len = " << node->length
<< ", begin = " << node->begin() << ", end = " << node->end()
<< "\n";
for (CordRep* edge : node->Edges()) {
DumpAll(edge, include_contents, stream, depth + 1);
}
} else if (rep->tag == SUBSTRING) {
const CordRepSubstring* substring = rep->substring();
stream << "Substring, len = " << rep->length
<< ", start = " << substring->start;
maybe_dump_data(rep);
DumpAll(substring->child, include_contents, stream, depth + 1);
} else if (rep->tag >= FLAT) {
stream << "Flat, len = " << rep->length
<< ", cap = " << rep->flat()->Capacity();
maybe_dump_data(rep);
} else if (rep->tag == EXTERNAL) {
stream << "Extn, len = " << rep->length;
maybe_dump_data(rep);
}
}
// TODO(b/192061034): add 'bytes to copy' logic to avoid large slop on substring
// small data out of large reps, and general efficiency of 'always copy small
// data'. Consider making this a cord rep internal library function.
CordRepSubstring* CreateSubstring(CordRep* rep, size_t offset, size_t n) {
assert(n != 0);
assert(offset + n <= rep->length);
assert(offset != 0 || n != rep->length);
if (rep->tag == SUBSTRING) {
CordRepSubstring* substring = rep->substring();
offset += substring->start;
rep = CordRep::Ref(substring->child);
CordRep::Unref(substring);
}
assert(rep->IsExternal() || rep->IsFlat());
CordRepSubstring* substring = new CordRepSubstring();
substring->length = n;
substring->tag = SUBSTRING;
substring->start = offset;
substring->child = rep;
return substring;
}
// TODO(b/192061034): consider making this a cord rep library function.
inline CordRep* MakeSubstring(CordRep* rep, size_t offset, size_t n) {
if (n == rep->length) return rep;
if (n == 0) return CordRep::Unref(rep), nullptr;
return CreateSubstring(rep, offset, n);
}
// TODO(b/192061034): consider making this a cord rep library function.
inline CordRep* MakeSubstring(CordRep* rep, size_t offset) {
if (offset == 0) return rep;
return CreateSubstring(rep, offset, rep->length - offset);
}
// Resizes `edge` to the provided `length`. Adopts a reference on `edge`.
// This method directly returns `edge` if `length` equals `edge->length`.
// If `is_mutable` is set to true, this function may return `edge` with
// `edge->length` set to the new length depending on the type and size of
// `edge`. Otherwise, this function returns a new CordRepSubstring value.
// Requires `length > 0 && length <= edge->length`.
CordRep* ResizeEdge(CordRep* edge, size_t length, bool is_mutable) {
assert(length > 0);
assert(length <= edge->length);
assert(IsDataEdge(edge));
if (length >= edge->length) return edge;
if (is_mutable && (edge->tag >= FLAT || edge->tag == SUBSTRING)) {
edge->length = length;
return edge;
}
return CreateSubstring(edge, 0, length);
}
template <EdgeType edge_type>
inline absl::string_view Consume(absl::string_view s, size_t n) {
return edge_type == kBack ? s.substr(n) : s.substr(0, s.size() - n);
}
template <EdgeType edge_type>
inline absl::string_view Consume(char* dst, absl::string_view s, size_t n) {
if (edge_type == kBack) {
memcpy(dst, s.data(), n);
return s.substr(n);
} else {
const size_t offset = s.size() - n;
memcpy(dst, s.data() + offset, n);
return s.substr(0, offset);
}
}
// Known issue / optimization weirdness: the store associated with the
// decrement introduces traffic between cpus (even if the result of that
// traffic does nothing), making this faster than a single call to
// refcount.Decrement() checking the zero refcount condition.
template <typename R, typename Fn>
inline void FastUnref(R* r, Fn&& fn) {
if (r->refcount.IsOne()) {
fn(r);
} else if (!r->refcount.DecrementExpectHighRefcount()) {
fn(r);
}
}
void DeleteSubstring(CordRepSubstring* substring) {
CordRep* rep = substring->child;
if (!rep->refcount.Decrement()) {
if (rep->tag >= FLAT) {
CordRepFlat::Delete(rep->flat());
} else {
assert(rep->tag == EXTERNAL);
CordRepExternal::Delete(rep->external());
}
}
delete substring;
}
// Deletes a leaf node data edge. Requires `IsDataEdge(rep)`.
void DeleteLeafEdge(CordRep* rep) {
assert(IsDataEdge(rep));
if (rep->tag >= FLAT) {
CordRepFlat::Delete(rep->flat());
} else if (rep->tag == EXTERNAL) {
CordRepExternal::Delete(rep->external());
} else {
DeleteSubstring(rep->substring());
}
}
// StackOperations contains the logic to build a left-most or right-most stack
// (leg) down to the leaf level of a btree, and 'unwind' / 'Finalize' methods to
// propagate node changes up the stack.
template <EdgeType edge_type>
struct StackOperations {
// Returns true if the node at 'depth' is not shared, i.e. has a refcount
// of one and all of its parent nodes have a refcount of one.
inline bool owned(int depth) const { return depth < share_depth; }
// Returns the node at 'depth'.
inline CordRepBtree* node(int depth) const { return stack[depth]; }
// Builds a `depth` levels deep stack starting at `tree` recording which nodes
// are private in the form of the 'share depth' where nodes are shared.
inline CordRepBtree* BuildStack(CordRepBtree* tree, int depth) {
assert(depth <= tree->height());
int current_depth = 0;
while (current_depth < depth && tree->refcount.IsOne()) {
stack[current_depth++] = tree;
tree = tree->Edge(edge_type)->btree();
}
share_depth = current_depth + (tree->refcount.IsOne() ? 1 : 0);
while (current_depth < depth) {
stack[current_depth++] = tree;
tree = tree->Edge(edge_type)->btree();
}
return tree;
}
// Builds a stack with the invariant that all nodes are private owned / not
// shared. This is used in iterative updates where a previous propagation
// guaranteed all nodes are owned / private.
inline void BuildOwnedStack(CordRepBtree* tree, int height) {
assert(height <= CordRepBtree::kMaxHeight);
int depth = 0;
while (depth < height) {
assert(tree->refcount.IsOne());
stack[depth++] = tree;
tree = tree->Edge(edge_type)->btree();
}
assert(tree->refcount.IsOne());
share_depth = depth + 1;
}
// Processes the final 'top level' result action for the tree.
// See the 'Action' enum for the various action implications.
static inline CordRepBtree* Finalize(CordRepBtree* tree, OpResult result) {
switch (result.action) {
case CordRepBtree::kPopped:
tree = edge_type == kBack ? CordRepBtree::New(tree, result.tree)
: CordRepBtree::New(result.tree, tree);
if (ABSL_PREDICT_FALSE(tree->height() > CordRepBtree::kMaxHeight)) {
tree = CordRepBtree::Rebuild(tree);
ABSL_RAW_CHECK(tree->height() <= CordRepBtree::kMaxHeight,
"Max height exceeded");
}
return tree;
case CordRepBtree::kCopied:
CordRep::Unref(tree);
ABSL_FALLTHROUGH_INTENDED;
case CordRepBtree::kSelf:
return result.tree;
}
ABSL_INTERNAL_UNREACHABLE;
return result.tree;
}
// Propagate the action result in 'result' up into all nodes of the stack
// starting at depth 'depth'. 'length' contains the extra length of data that
// was added at the lowest level, and is updated into all nodes of the stack.
// See the 'Action' enum for the various action implications.
// If 'propagate' is true, then any copied node values are updated into the
// stack, which is used for iterative processing on the same stack.
template <bool propagate = false>
inline CordRepBtree* Unwind(CordRepBtree* tree, int depth, size_t length,
OpResult result) {
// TODO(mvels): revisit the below code to check if 3 loops with 3
// (incremental) conditions is faster than 1 loop with a switch.
// Benchmarking and perf recordings indicate the loop with switch is
// fastest, likely because of indirect jumps on the tight case values and
// dense branches. But it's worth considering 3 loops, as the `action`
// transitions are mono directional. E.g.:
// while (action == kPopped) {
// ...
// }
// while (action == kCopied) {
// ...
// }
// ...
// We also found that an "if () do {}" loop here seems faster, possibly
// because it allows the branch predictor more granular heuristics on
// 'single leaf' (`depth` == 0) and 'single depth' (`depth` == 1) cases
// which appear to be the most common use cases.
if (depth != 0) {
do {
CordRepBtree* node = stack[--depth];
const bool owned = depth < share_depth;
switch (result.action) {
case CordRepBtree::kPopped:
assert(!propagate);
result = node->AddEdge<edge_type>(owned, result.tree, length);
break;
case CordRepBtree::kCopied:
result = node->SetEdge<edge_type>(owned, result.tree, length);
if (propagate) stack[depth] = result.tree;
break;
case CordRepBtree::kSelf:
node->length += length;
while (depth > 0) {
node = stack[--depth];
node->length += length;
}
return node;
}
} while (depth > 0);
}
return Finalize(tree, result);
}
// Invokes `Unwind` with `propagate=true` to update the stack node values.
inline CordRepBtree* Propagate(CordRepBtree* tree, int depth, size_t length,
OpResult result) {
return Unwind</*propagate=*/true>(tree, depth, length, result);
}
// `share_depth` contains the depth at which the nodes in the stack become
// shared. I.e., if the top most level is shared (i.e.: `!refcount.IsOne()`),
// then `share_depth` is 0. If the 2nd node is shared (and implicitly all
// nodes below that) then `share_depth` is 1, etc. A `share_depth` greater
// than the depth of the stack indicates that none of the nodes in the stack
// are shared.
int share_depth;
NodeStack stack;
};
} // namespace
void CordRepBtree::Dump(const CordRep* rep, absl::string_view label,
bool include_contents, std::ostream& stream) {
stream << "===================================\n";
if (!label.empty()) {
stream << label << '\n';
stream << "-----------------------------------\n";
}
if (rep) {
DumpAll(rep, include_contents, stream);
} else {
stream << "NULL\n";
}
}
void CordRepBtree::Dump(const CordRep* rep, absl::string_view label,
std::ostream& stream) {
Dump(rep, label, false, stream);
}
void CordRepBtree::Dump(const CordRep* rep, std::ostream& stream) {
Dump(rep, absl::string_view(), false, stream);
}
template <size_t size>
static void DestroyTree(CordRepBtree* tree) {
for (CordRep* node : tree->Edges()) {
if (node->refcount.Decrement()) continue;
for (CordRep* edge : node->btree()->Edges()) {
if (edge->refcount.Decrement()) continue;
if (size == 1) {
DeleteLeafEdge(edge);
} else {
CordRepBtree::Destroy(edge->btree());
}
}
CordRepBtree::Delete(node->btree());
}
CordRepBtree::Delete(tree);
}
void CordRepBtree::Destroy(CordRepBtree* tree) {
switch (tree->height()) {
case 0:
for (CordRep* edge : tree->Edges()) {
if (!edge->refcount.Decrement()) {
DeleteLeafEdge(edge);
}
}
return CordRepBtree::Delete(tree);
case 1:
return DestroyTree<1>(tree);
default:
return DestroyTree<2>(tree);
}
}
bool CordRepBtree::IsValid(const CordRepBtree* tree, bool shallow) {
#define NODE_CHECK_VALID(x) \
if (!(x)) { \
ABSL_RAW_LOG(ERROR, "CordRepBtree::CheckValid() FAILED: %s", #x); \
return false; \
}
#define NODE_CHECK_EQ(x, y) \
if ((x) != (y)) { \
ABSL_RAW_LOG(ERROR, \
"CordRepBtree::CheckValid() FAILED: %s != %s (%s vs %s)", #x, \
#y, absl::StrCat(x).c_str(), absl::StrCat(y).c_str()); \
return false; \
}
NODE_CHECK_VALID(tree != nullptr);
NODE_CHECK_VALID(tree->IsBtree());
NODE_CHECK_VALID(tree->height() <= kMaxHeight);
NODE_CHECK_VALID(tree->begin() < tree->capacity());
NODE_CHECK_VALID(tree->end() <= tree->capacity());
NODE_CHECK_VALID(tree->begin() <= tree->end());
size_t child_length = 0;
for (CordRep* edge : tree->Edges()) {
NODE_CHECK_VALID(edge != nullptr);
if (tree->height() > 0) {
NODE_CHECK_VALID(edge->IsBtree());
NODE_CHECK_VALID(edge->btree()->height() == tree->height() - 1);
} else {
NODE_CHECK_VALID(IsDataEdge(edge));
}
child_length += edge->length;
}
NODE_CHECK_EQ(child_length, tree->length);
if ((!shallow || exhaustive_validation()) && tree->height() > 0) {
for (CordRep* edge : tree->Edges()) {
if (!IsValid(edge->btree(), shallow)) return false;
}
}
return true;
#undef NODE_CHECK_VALID
#undef NODE_CHECK_EQ
}
#ifndef NDEBUG
CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree, bool shallow) {
if (!IsValid(tree, shallow)) {
Dump(tree, "CordRepBtree validation failed:", false, std::cout);
ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED");
}
return tree;
}
const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree,
bool shallow) {
if (!IsValid(tree, shallow)) {
Dump(tree, "CordRepBtree validation failed:", false, std::cout);
ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED");
}
return tree;
}
#endif // NDEBUG
template <EdgeType edge_type>
inline OpResult CordRepBtree::AddEdge(bool owned, CordRep* edge, size_t delta) {
if (size() >= kMaxCapacity) return {New(edge), kPopped};
OpResult result = ToOpResult(owned);
result.tree->Add<edge_type>(edge);
result.tree->length += delta;
return result;
}
template <EdgeType edge_type>
OpResult CordRepBtree::SetEdge(bool owned, CordRep* edge, size_t delta) {
OpResult result;
const size_t idx = index(edge_type);
if (owned) {
result = {this, kSelf};
CordRep::Unref(edges_[idx]);
} else {
// Create a copy containing all unchanged edges. Unchanged edges are the
// open interval [begin, back) or [begin + 1, end) depending on `edge_type`.
// We conveniently cover both case using a constexpr `shift` being 0 or 1
// as `end :== back + 1`.
result = {CopyRaw(), kCopied};
constexpr int shift = edge_type == kFront ? 1 : 0;
for (CordRep* r : Edges(begin() + shift, back() + shift)) {
CordRep::Ref(r);
}
}
result.tree->edges_[idx] = edge;
result.tree->length += delta;
return result;
}
template <EdgeType edge_type>
CordRepBtree* CordRepBtree::AddCordRep(CordRepBtree* tree, CordRep* rep) {
const int depth = tree->height();
const size_t length = rep->length;
StackOperations<edge_type> ops;
CordRepBtree* leaf = ops.BuildStack(tree, depth);
const OpResult result =
leaf->AddEdge<edge_type>(ops.owned(depth), rep, length);
return ops.Unwind(tree, depth, length, result);
}
template <>
CordRepBtree* CordRepBtree::NewLeaf<kBack>(absl::string_view data,
size_t extra) {
CordRepBtree* leaf = CordRepBtree::New(0);
size_t length = 0;
size_t end = 0;
const size_t cap = leaf->capacity();
while (!data.empty() && end != cap) {
auto* flat = CordRepFlat::New(data.length() + extra);
flat->length = (std::min)(data.length(), flat->Capacity());
length += flat->length;
leaf->edges_[end++] = flat;
data = Consume<kBack>(flat->Data(), data, flat->length);
}
leaf->length = length;
leaf->set_end(end);
return leaf;
}
template <>
CordRepBtree* CordRepBtree::NewLeaf<kFront>(absl::string_view data,
size_t extra) {
CordRepBtree* leaf = CordRepBtree::New(0);
size_t length = 0;
size_t begin = leaf->capacity();
leaf->set_end(leaf->capacity());
while (!data.empty() && begin != 0) {
auto* flat = CordRepFlat::New(data.length() + extra);
flat->length = (std::min)(data.length(), flat->Capacity());
length += flat->length;
leaf->edges_[--begin] = flat;
data = Consume<kFront>(flat->Data(), data, flat->length);
}
leaf->length = length;
leaf->set_begin(begin);
return leaf;
}
template <>
absl::string_view CordRepBtree::AddData<kBack>(absl::string_view data,
size_t extra) {
assert(!data.empty());
assert(size() < capacity());
AlignBegin();
const size_t cap = capacity();
do {
CordRepFlat* flat = CordRepFlat::New(data.length() + extra);
const size_t n = (std::min)(data.length(), flat->Capacity());
flat->length = n;
edges_[fetch_add_end(1)] = flat;
data = Consume<kBack>(flat->Data(), data, n);
} while (!data.empty() && end() != cap);
return data;
}
template <>
absl::string_view CordRepBtree::AddData<kFront>(absl::string_view data,
size_t extra) {
assert(!data.empty());
assert(size() < capacity());
AlignEnd();
do {
CordRepFlat* flat = CordRepFlat::New(data.length() + extra);
const size_t n = (std::min)(data.length(), flat->Capacity());
flat->length = n;
edges_[sub_fetch_begin(1)] = flat;
data = Consume<kFront>(flat->Data(), data, n);
} while (!data.empty() && begin() != 0);
return data;
}
template <EdgeType edge_type>
CordRepBtree* CordRepBtree::AddData(CordRepBtree* tree, absl::string_view data,
size_t extra) {
if (ABSL_PREDICT_FALSE(data.empty())) return tree;
const size_t original_data_size = data.size();
int depth = tree->height();
StackOperations<edge_type> ops;
CordRepBtree* leaf = ops.BuildStack(tree, depth);
// If there is capacity in the last edge, append as much data
// as possible into this last edge.
if (leaf->size() < leaf->capacity()) {
OpResult result = leaf->ToOpResult(ops.owned(depth));
data = result.tree->AddData<edge_type>(data, extra);
if (data.empty()) {
result.tree->length += original_data_size;
return ops.Unwind(tree, depth, original_data_size, result);
}
// We added some data into this leaf, but not all. Propagate the added
// length to the top most node, and rebuild the stack with any newly copied
// or updated nodes. From this point on, the path (leg) from the top most
// node to the right-most node towards the leaf node is privately owned.
size_t delta = original_data_size - data.size();
assert(delta > 0);
result.tree->length += delta;
tree = ops.Propagate(tree, depth, delta, result);
ops.share_depth = depth + 1;
}
// We were unable to append all data into the existing right-most leaf node.
// This means all remaining data must be put into (a) new leaf node(s) which
// we append to the tree. To make this efficient, we iteratively build full
// leaf nodes from `data` until the created leaf contains all remaining data.
// We utilize the `Unwind` method to merge the created leaf into the first
// level towards root that has capacity. On each iteration with remaining
// data, we rebuild the stack in the knowledge that right-most nodes are
// privately owned after the first `Unwind` completes.
for (;;) {
OpResult result = {CordRepBtree::NewLeaf<edge_type>(data, extra), kPopped};
if (result.tree->length == data.size()) {
return ops.Unwind(tree, depth, result.tree->length, result);
}
data = Consume<edge_type>(data, result.tree->length);
tree = ops.Unwind(tree, depth, result.tree->length, result);
depth = tree->height();
ops.BuildOwnedStack(tree, depth);
}
}
template <EdgeType edge_type>
CordRepBtree* CordRepBtree::Merge(CordRepBtree* dst, CordRepBtree* src) {
assert(dst->height() >= src->height());
// Capture source length as we may consume / destroy `src`.
const size_t length = src->length;
// We attempt to merge `src` at its corresponding height in `dst`.
const int depth = dst->height() - src->height();
StackOperations<edge_type> ops;
CordRepBtree* merge_node = ops.BuildStack(dst, depth);
// If there is enough space in `merge_node` for all edges from `src`, add all
// edges to this node, making a fresh copy as needed if not privately owned.
// If `merge_node` does not have capacity for `src`, we rely on `Unwind` and
// `Finalize` to merge `src` into the first level towards `root` where there
// is capacity for another edge, or create a new top level node.
OpResult result;
if (merge_node->size() + src->size() <= kMaxCapacity) {
result = merge_node->ToOpResult(ops.owned(depth));
result.tree->Add<edge_type>(src->Edges());
result.tree->length += src->length;
if (src->refcount.IsOne()) {
Delete(src);
} else {
for (CordRep* edge : src->Edges()) CordRep::Ref(edge);
CordRepBtree::Unref(src);
}
} else {
result = {src, kPopped};
}
// Unless we merged at the top level (i.e.: src and dst are equal height),
// unwind the result towards the top level, and finalize the result.
if (depth) {
return ops.Unwind(dst, depth, length, result);
}
return ops.Finalize(dst, result);
}
CopyResult CordRepBtree::CopySuffix(size_t offset) {
assert(offset < this->length);
// As long as `offset` starts inside the last edge, we can 'drop' the current
// depth. For the most extreme example: if offset references the last data
// edge in the tree, there is only a single edge / path from the top of the
// tree to that last edge, so we can drop all the nodes except that edge.
// The fast path check for this is `back->length >= length - offset`.
int height = this->height();
CordRepBtree* node = this;
size_t len = node->length - offset;
CordRep* back = node->Edge(kBack);
while (back->length >= len) {
offset = back->length - len;
if (--height < 0) {
return {MakeSubstring(CordRep::Ref(back), offset), height};
}
node = back->btree();
back = node->Edge(kBack);
}
if (offset == 0) return {CordRep::Ref(node), height};
// Offset does not point into the last edge, so we span at least two edges.
// Find the index of offset with `IndexBeyond` which provides us the edge
// 'beyond' the offset if offset is not a clean starting point of an edge.
Position pos = node->IndexBeyond(offset);
CordRepBtree* sub = node->CopyToEndFrom(pos.index, len);
const CopyResult result = {sub, height};
// `pos.n` contains a non zero value if the offset is not an exact starting
// point of an edge. In this case, `pos.n` contains the 'trailing' amount of
// bytes of the edge preceding that in `pos.index`. We need to iteratively
// adjust the preceding edge with the 'broken' offset until we have a perfect
// start of the edge.
while (pos.n != 0) {
assert(pos.index >= 1);
const size_t begin = pos.index - 1;
sub->set_begin(begin);
CordRep* const edge = node->Edge(begin);
len = pos.n;
offset = edge->length - len;
if (--height < 0) {
sub->edges_[begin] = MakeSubstring(CordRep::Ref(edge), offset, len);
return result;
}
node = edge->btree();
pos = node->IndexBeyond(offset);
CordRepBtree* nsub = node->CopyToEndFrom(pos.index, len);
sub->edges_[begin] = nsub;
sub = nsub;
}
sub->set_begin(pos.index);
return result;
}
CopyResult CordRepBtree::CopyPrefix(size_t n, bool allow_folding) {
assert(n > 0);
assert(n <= this->length);
// As long as `n` does not exceed the length of the first edge, we can 'drop'
// the current depth. For the most extreme example: if we'd copy a 1 byte
// prefix from a tree, there is only a single edge / path from the top of the
// tree to the single data edge containing this byte, so we can drop all the
// nodes except the data node.
int height = this->height();
CordRepBtree* node = this;
CordRep* front = node->Edge(kFront);
if (allow_folding) {
while (front->length >= n) {
if (--height < 0) return {MakeSubstring(CordRep::Ref(front), 0, n), -1};
node = front->btree();
front = node->Edge(kFront);
}
}
if (node->length == n) return {CordRep::Ref(node), height};
// `n` spans at least two nodes, find the end point of the span.
Position pos = node->IndexOf(n);
// Create a partial copy of the node up to `pos.index`, with a defined length
// of `n`. Any 'partial last edge' is added further below as needed.
CordRepBtree* sub = node->CopyBeginTo(pos.index, n);
const CopyResult result = {sub, height};
// `pos.n` contains the 'offset inside the edge for IndexOf(n)'. As long as
// this is not zero, we don't have a 'clean cut', so we need to make a
// (partial) copy of that last edge, and repeat this until pos.n is zero.
while (pos.n != 0) {
size_t end = pos.index;
n = pos.n;
CordRep* edge = node->Edge(pos.index);
if (--height < 0) {
sub->edges_[end++] = MakeSubstring(CordRep::Ref(edge), 0, n);
sub->set_end(end);
AssertValid(result.edge->btree());
return result;
}
node = edge->btree();
pos = node->IndexOf(n);
CordRepBtree* nsub = node->CopyBeginTo(pos.index, n);
sub->edges_[end++] = nsub;
sub->set_end(end);
sub = nsub;
}
sub->set_end(pos.index);
AssertValid(result.edge->btree());
return result;
}
CordRep* CordRepBtree::ExtractFront(CordRepBtree* tree) {
CordRep* front = tree->Edge(tree->begin());
if (tree->refcount.IsOne()) {
Unref(tree->Edges(tree->begin() + 1, tree->end()));
CordRepBtree::Delete(tree);
} else {
CordRep::Ref(front);
CordRep::Unref(tree);
}
return front;
}
CordRepBtree* CordRepBtree::ConsumeBeginTo(CordRepBtree* tree, size_t end,
size_t new_length) {
assert(end <= tree->end());
if (tree->refcount.IsOne()) {
Unref(tree->Edges(end, tree->end()));
tree->set_end(end);
tree->length = new_length;
} else {
CordRepBtree* old = tree;
tree = tree->CopyBeginTo(end, new_length);
CordRep::Unref(old);
}
return tree;
}
CordRep* CordRepBtree::RemoveSuffix(CordRepBtree* tree, size_t n) {
// Check input and deal with trivial cases 'Remove all/none'
assert(tree != nullptr);
assert(n <= tree->length);
const size_t len = tree->length;
if (ABSL_PREDICT_FALSE(n == 0)) {
return tree;
}
if (ABSL_PREDICT_FALSE(n >= len)) {
CordRepBtree::Unref(tree);
return nullptr;
}
size_t length = len - n;
int height = tree->height();
bool is_mutable = tree->refcount.IsOne();
// Extract all top nodes which are reduced to size = 1
Position pos = tree->IndexOfLength(length);
while (pos.index == tree->begin()) {
CordRep* edge = ExtractFront(tree);
is_mutable &= edge->refcount.IsOne();
if (height-- == 0) return ResizeEdge(edge, length, is_mutable);
tree = edge->btree();
pos = tree->IndexOfLength(length);
}
// Repeat the following sequence traversing down the tree:
// - Crop the top node to the 'last remaining edge' adjusting length.
// - Set the length for down edges to the partial length in that last edge.
// - Repeat this until the last edge is 'included in full'
// - If we hit the data edge level, resize and return the last data edge
CordRepBtree* top = tree = ConsumeBeginTo(tree, pos.index + 1, length);
CordRep* edge = tree->Edge(pos.index);
length = pos.n;
while (length != edge->length) {
// ConsumeBeginTo guarantees `tree` is a clean, privately owned copy.
assert(tree->refcount.IsOne());
const bool edge_is_mutable = edge->refcount.IsOne();
if (height-- == 0) {
tree->edges_[pos.index] = ResizeEdge(edge, length, edge_is_mutable);
return AssertValid(top);
}
if (!edge_is_mutable) {
// We can't 'in place' remove any suffixes down this edge.
// Replace this edge with a prefix copy instead.
tree->edges_[pos.index] = edge->btree()->CopyPrefix(length, false).edge;
CordRep::Unref(edge);
return AssertValid(top);
}
// Move down one level, rinse repeat.
tree = edge->btree();
pos = tree->IndexOfLength(length);
tree = ConsumeBeginTo(edge->btree(), pos.index + 1, length);
edge = tree->Edge(pos.index);
length = pos.n;
}
return AssertValid(top);
}
CordRep* CordRepBtree::SubTree(size_t offset, size_t n) {
assert(n <= this->length);
assert(offset <= this->length - n);
if (ABSL_PREDICT_FALSE(n == 0)) return nullptr;
CordRepBtree* node = this;
int height = node->height();
Position front = node->IndexOf(offset);
CordRep* left = node->edges_[front.index];
while (front.n + n <= left->length) {
if (--height < 0) return MakeSubstring(CordRep::Ref(left), front.n, n);
node = left->btree();
front = node->IndexOf(front.n);
left = node->edges_[front.index];
}
const Position back = node->IndexBefore(front, n);
CordRep* const right = node->edges_[back.index];
assert(back.index > front.index);
// Get partial suffix and prefix entries.
CopyResult prefix;
CopyResult suffix;
if (height > 0) {
// Copy prefix and suffix of the boundary nodes.
prefix = left->btree()->CopySuffix(front.n);
suffix = right->btree()->CopyPrefix(back.n);
// If there is an edge between the prefix and suffix edges, then the tree
// must remain at its previous (full) height. If we have no edges between
// prefix and suffix edges, then the tree must be as high as either the
// suffix or prefix edges (which are collapsed to their minimum heights).
if (front.index + 1 == back.index) {
height = (std::max)(prefix.height, suffix.height) + 1;
}
// Raise prefix and suffixes to the new tree height.
for (int h = prefix.height + 1; h < height; ++h) {
prefix.edge = CordRepBtree::New(prefix.edge);
}
for (int h = suffix.height + 1; h < height; ++h) {
suffix.edge = CordRepBtree::New(suffix.edge);
}
} else {
// Leaf node, simply take substrings for prefix and suffix.
prefix = CopyResult{MakeSubstring(CordRep::Ref(left), front.n), -1};
suffix = CopyResult{MakeSubstring(CordRep::Ref(right), 0, back.n), -1};
}
// Compose resulting tree.
CordRepBtree* sub = CordRepBtree::New(height);
size_t end = 0;
sub->edges_[end++] = prefix.edge;
for (CordRep* r : node->Edges(front.index + 1, back.index)) {
sub->edges_[end++] = CordRep::Ref(r);
}
sub->edges_[end++] = suffix.edge;
sub->set_end(end);
sub->length = n;
return AssertValid(sub);
}
CordRepBtree* CordRepBtree::MergeTrees(CordRepBtree* left,
CordRepBtree* right) {
return left->height() >= right->height() ? Merge<kBack>(left, right)
: Merge<kFront>(right, left);
}
bool CordRepBtree::IsFlat(absl::string_view* fragment) const {
if (height() == 0 && size() == 1) {
if (fragment) *fragment = Data(begin());
return true;
}
return false;
}
bool CordRepBtree::IsFlat(size_t offset, const size_t n,
absl::string_view* fragment) const {
assert(n <= this->length);
assert(offset <= this->length - n);
if (ABSL_PREDICT_FALSE(n == 0)) return false;
int height = this->height();
const CordRepBtree* node = this;
for (;;) {
const Position front = node->IndexOf(offset);
const CordRep* edge = node->Edge(front.index);
if (edge->length < front.n + n) return false;
if (--height < 0) {
if (fragment) *fragment = EdgeData(edge).substr(front.n, n);
return true;
}
offset = front.n;
node = node->Edge(front.index)->btree();
}
}
char CordRepBtree::GetCharacter(size_t offset) const {
assert(offset < length);
const CordRepBtree* node = this;
int height = node->height();
for (;;) {
Position front = node->IndexOf(offset);
if (--height < 0) return node->Data(front.index)[front.n];
offset = front.n;
node = node->Edge(front.index)->btree();
}
}
Span<char> CordRepBtree::GetAppendBufferSlow(size_t size) {
// The inlined version in `GetAppendBuffer()` deals with all heights <= 3.
assert(height() >= 4);
assert(refcount.IsOne());
// Build a stack of nodes we may potentially need to update if we find a
// non-shared FLAT with capacity at the leaf level.
const int depth = height();
CordRepBtree* node = this;
CordRepBtree* stack[kMaxDepth];
for (int i = 0; i < depth; ++i) {
node = node->Edge(kBack)->btree();
if (!node->refcount.IsOne()) return {};
stack[i] = node;
}
// Must be a privately owned, mutable flat.
CordRep* const edge = node->Edge(kBack);
if (!edge->refcount.IsOne() || edge->tag < FLAT) return {};
// Must have capacity.
const size_t avail = edge->flat()->Capacity() - edge->length;
if (avail == 0) return {};
// Build span on remaining capacity.
size_t delta = (std::min)(size, avail);
Span<char> span = {edge->flat()->Data() + edge->length, delta};
edge->length += delta;
this->length += delta;
for (int i = 0; i < depth; ++i) {
stack[i]->length += delta;
}
return span;
}
CordRepBtree* CordRepBtree::CreateSlow(CordRep* rep) {
if (rep->IsBtree()) return rep->btree();
CordRepBtree* node = nullptr;
auto consume = [&node](CordRep* r, size_t offset, size_t length) {
r = MakeSubstring(r, offset, length);
if (node == nullptr) {
node = New(r);
} else {
node = CordRepBtree::AddCordRep<kBack>(node, r);
}
};
Consume(rep, consume);
return node;
}
CordRepBtree* CordRepBtree::AppendSlow(CordRepBtree* tree, CordRep* rep) {
if (ABSL_PREDICT_TRUE(rep->IsBtree())) {
return MergeTrees(tree, rep->btree());
}
auto consume = [&tree](CordRep* r, size_t offset, size_t length) {
r = MakeSubstring(r, offset, length);
tree = CordRepBtree::AddCordRep<kBack>(tree, r);
};
Consume(rep, consume);
return tree;
}
CordRepBtree* CordRepBtree::PrependSlow(CordRepBtree* tree, CordRep* rep) {
if (ABSL_PREDICT_TRUE(rep->IsBtree())) {
return MergeTrees(rep->btree(), tree);
}
auto consume = [&tree](CordRep* r, size_t offset, size_t length) {
r = MakeSubstring(r, offset, length);
tree = CordRepBtree::AddCordRep<kFront>(tree, r);
};
ReverseConsume(rep, consume);
return tree;
}
CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, absl::string_view data,
size_t extra) {
return CordRepBtree::AddData<kBack>(tree, data, extra);
}
CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, absl::string_view data,
size_t extra) {
return CordRepBtree::AddData<kFront>(tree, data, extra);
}
template CordRepBtree* CordRepBtree::AddCordRep<kFront>(CordRepBtree* tree,
CordRep* rep);
template CordRepBtree* CordRepBtree::AddCordRep<kBack>(CordRepBtree* tree,
CordRep* rep);
template CordRepBtree* CordRepBtree::AddData<kFront>(CordRepBtree* tree,
absl::string_view data,
size_t extra);
template CordRepBtree* CordRepBtree::AddData<kBack>(CordRepBtree* tree,
absl::string_view data,
size_t extra);
void CordRepBtree::Rebuild(CordRepBtree** stack, CordRepBtree* tree,
bool consume) {
bool owned = consume && tree->refcount.IsOne();
if (tree->height() == 0) {
for (CordRep* edge : tree->Edges()) {
if (!owned) edge = CordRep::Ref(edge);
size_t height = 0;
size_t length = edge->length;
CordRepBtree* node = stack[0];
OpResult result = node->AddEdge<kBack>(true, edge, length);
while (result.action == CordRepBtree::kPopped) {
stack[height] = result.tree;
if (stack[++height] == nullptr) {
result.action = CordRepBtree::kSelf;
stack[height] = CordRepBtree::New(node, result.tree);
} else {
node = stack[height];
result = node->AddEdge<kBack>(true, result.tree, length);
}
}
while (stack[++height] != nullptr) {
stack[height]->length += length;
}
}
} else {
for (CordRep* rep : tree->Edges()) {
Rebuild(stack, rep->btree(), owned);
}
}
if (consume) {
if (owned) {
CordRepBtree::Delete(tree);
} else {
CordRepBtree::Unref(tree);
}
}
}
CordRepBtree* CordRepBtree::Rebuild(CordRepBtree* tree) {
// Set up initial stack with empty leaf node.
CordRepBtree* node = CordRepBtree::New();
CordRepBtree* stack[CordRepBtree::kMaxDepth + 1] = {node};
// Recursively build the tree, consuming the input tree.
Rebuild(stack, tree, /* consume reference */ true);
// Return top most node
for (CordRepBtree* parent : stack) {
if (parent == nullptr) return node;
node = parent;
}
// Unreachable
assert(false);
return nullptr;
}
CordRepBtree::ExtractResult CordRepBtree::ExtractAppendBuffer(
CordRepBtree* tree, size_t extra_capacity) {
int depth = 0;
NodeStack stack;
// Set up default 'no success' result which is {tree, nullptr}.
ExtractResult result;
result.tree = tree;
result.extracted = nullptr;
// Dive down the right side of the tree, making sure no edges are shared.
while (tree->height() > 0) {
if (!tree->refcount.IsOne()) return result;
stack[depth++] = tree;
tree = tree->Edge(kBack)->btree();
}
if (!tree->refcount.IsOne()) return result;
// Validate we ended on a non shared flat.
CordRep* rep = tree->Edge(kBack);
if (!(rep->IsFlat() && rep->refcount.IsOne())) return result;
// Verify it has at least the requested extra capacity.
CordRepFlat* flat = rep->flat();
const size_t length = flat->length;
const size_t avail = flat->Capacity() - flat->length;
if (extra_capacity > avail) return result;
// Set the extracted flat in the result.
result.extracted = flat;
// Cascading delete all nodes that become empty.
while (tree->size() == 1) {
CordRepBtree::Delete(tree);
if (--depth < 0) {
// We consumed the entire tree: return nullptr for new tree.
result.tree = nullptr;
return result;
}
rep = tree;
tree = stack[depth];
}
// Remove the edge or cascaded up parent node.
tree->set_end(tree->end() - 1);
tree->length -= length;
// Adjust lengths up the tree.
while (depth > 0) {
tree = stack[--depth];
tree->length -= length;
}
// Remove unnecessary top nodes with size = 1. This may iterate all the way
// down to the leaf node in which case we simply return the remaining last
// edge in that node and the extracted flat.
while (tree->size() == 1) {
int height = tree->height();
rep = tree->Edge(kBack);
Delete(tree);
if (height == 0) {
// We consumed the leaf: return the sole data edge as the new tree.
result.tree = rep;
return result;
}
tree = rep->btree();
}
// Done: return the (new) top level node and extracted flat.
result.tree = tree;
return result;
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl