summaryrefslogtreecommitdiff
path: root/llvm/lib/Support/SuffixTree.cpp
diff options
context:
space:
mode:
authorArthur Eubanks <aeubanks@google.com>2024-06-27 16:32:27 -0700
committershawbyoung <shawbyoung@gmail.com>2024-06-27 16:32:27 -0700
commitf5c7df12cacdb84552b36a7ac598a8db41acc680 (patch)
tree3b33e941b9bfb88c40c64fd18ee32a633423cbed /llvm/lib/Support/SuffixTree.cpp
parent608880c3a7a59c86db82728067e553a8d4665a45 (diff)
parent804415825b97e974c96a92580bcbeaf4c7ff0a04 (diff)
[𝘀𝗽𝗿] changes introduced through rebaseusers/shawbyoung/spr/main.boltnfc-refactoring-callgraph
Created using spr 1.3.4 [skip ci]
Diffstat (limited to 'llvm/lib/Support/SuffixTree.cpp')
-rw-r--r--llvm/lib/Support/SuffixTree.cpp105
1 files changed, 90 insertions, 15 deletions
diff --git a/llvm/lib/Support/SuffixTree.cpp b/llvm/lib/Support/SuffixTree.cpp
index c00c7989d1a6..5e58310e1128 100644
--- a/llvm/lib/Support/SuffixTree.cpp
+++ b/llvm/lib/Support/SuffixTree.cpp
@@ -26,7 +26,9 @@ static size_t numElementsInSubstring(const SuffixTreeNode *N) {
return N->getEndIdx() - N->getStartIdx() + 1;
}
-SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str) : Str(Str) {
+SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str,
+ bool OutlinerLeafDescendants)
+ : Str(Str), OutlinerLeafDescendants(OutlinerLeafDescendants) {
Root = insertRoot();
Active.Node = Root;
@@ -46,6 +48,11 @@ SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str) : Str(Str) {
// Set the suffix indices of each leaf.
assert(Root && "Root node can't be nullptr!");
setSuffixIndices();
+
+ // Collect all leaf nodes of the suffix tree. And for each internal node,
+ // record the range of leaf nodes that are descendants of it.
+ if (OutlinerLeafDescendants)
+ setLeafNodes();
}
SuffixTreeNode *SuffixTree::insertLeaf(SuffixTreeInternalNode &Parent,
@@ -105,6 +112,70 @@ void SuffixTree::setSuffixIndices() {
}
}
+void SuffixTree::setLeafNodes() {
+ // A stack that keeps track of nodes to visit for post-order DFS traversal.
+ SmallVector<SuffixTreeNode *> ToVisit;
+ ToVisit.push_back(Root);
+
+ // This keeps track of the index of the next leaf node to be added to
+ // the LeafNodes vector of the suffix tree.
+ unsigned LeafCounter = 0;
+
+ // This keeps track of nodes whose children have been added to the stack.
+ // The value is a pair, representing a node's first and last children.
+ DenseMap<SuffixTreeInternalNode *,
+ std::pair<SuffixTreeNode *, SuffixTreeNode *>>
+ ChildrenMap;
+
+ // Traverse the tree in post-order.
+ while (!ToVisit.empty()) {
+ SuffixTreeNode *CurrNode = ToVisit.pop_back_val();
+ if (auto *CurrInternalNode = dyn_cast<SuffixTreeInternalNode>(CurrNode)) {
+ // The current node is an internal node.
+ auto I = ChildrenMap.find(CurrInternalNode);
+ if (I == ChildrenMap.end()) {
+ // This is the first time we visit this node.
+ // Its children have not been added to the stack yet.
+ // We add current node back, and add its children to the stack.
+ // We keep track of the first and last children of the current node.
+ auto J = CurrInternalNode->Children.begin();
+ if (J != CurrInternalNode->Children.end()) {
+ ToVisit.push_back(CurrNode);
+ SuffixTreeNode *FirstChild = J->second;
+ SuffixTreeNode *LastChild = nullptr;
+ for (; J != CurrInternalNode->Children.end(); ++J) {
+ LastChild = J->second;
+ ToVisit.push_back(LastChild);
+ }
+ ChildrenMap[CurrInternalNode] = {FirstChild, LastChild};
+ }
+ } else {
+ // This is the second time we visit this node.
+ // All of its children have already been processed.
+ // Now, we can set its LeftLeafIdx and RightLeafIdx;
+ auto [FirstChild, LastChild] = I->second;
+ // Get the first child to use its RightLeafIdx.
+ // The first child is the first one added to the stack, so it is
+ // the last one to be processed. Hence, the leaf descendants
+ // of the first child are assigned the largest index numbers.
+ CurrNode->setRightLeafIdx(FirstChild->getRightLeafIdx());
+ // Get the last child to use its LeftLeafIdx.
+ CurrNode->setLeftLeafIdx(LastChild->getLeftLeafIdx());
+ assert(CurrNode->getLeftLeafIdx() <= CurrNode->getRightLeafIdx() &&
+ "LeftLeafIdx should not be larger than RightLeafIdx");
+ }
+ } else {
+ // The current node is a leaf node.
+ // We can simply set its LeftLeafIdx and RightLeafIdx.
+ CurrNode->setLeftLeafIdx(LeafCounter);
+ CurrNode->setRightLeafIdx(LeafCounter);
+ ++LeafCounter;
+ auto *CurrLeafNode = cast<SuffixTreeLeafNode>(CurrNode);
+ LeafNodes.push_back(CurrLeafNode);
+ }
+ }
+}
+
unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
SuffixTreeInternalNode *NeedsLink = nullptr;
@@ -241,30 +312,34 @@ void SuffixTree::RepeatedSubstringIterator::advance() {
// it's too short, we'll quit.
unsigned Length = Curr->getConcatLen();
- // Iterate over each child, saving internal nodes for visiting, and
- // leaf nodes' SuffixIdx in RepeatedSubstringStarts. Internal nodes
- // represent individual strings, which may repeat.
- for (auto &ChildPair : Curr->Children) {
+ // Iterate over each child, saving internal nodes for visiting.
+ // Internal nodes represent individual strings, which may repeat.
+ for (auto &ChildPair : Curr->Children)
// Save all of this node's children for processing.
if (auto *InternalChild =
- dyn_cast<SuffixTreeInternalNode>(ChildPair.second)) {
+ dyn_cast<SuffixTreeInternalNode>(ChildPair.second))
InternalNodesToVisit.push_back(InternalChild);
- continue;
- }
-
- if (Length < MinLength)
- continue;
- // Have an occurrence of a potentially repeated string. Save it.
- auto *Leaf = cast<SuffixTreeLeafNode>(ChildPair.second);
- RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());
- }
+ // If length of repeated substring is below threshold, then skip it.
+ if (Length < MinLength)
+ continue;
// The root never represents a repeated substring. If we're looking at
// that, then skip it.
if (Curr->isRoot())
continue;
+ // Collect leaf children or leaf descendants by OutlinerLeafDescendants.
+ if (OutlinerLeafDescendants) {
+ for (unsigned I = Curr->getLeftLeafIdx(); I <= Curr->getRightLeafIdx();
+ ++I)
+ RepeatedSubstringStarts.push_back(LeafNodes[I]->getSuffixIdx());
+ } else {
+ for (auto &ChildPair : Curr->Children)
+ if (auto *Leaf = dyn_cast<SuffixTreeLeafNode>(ChildPair.second))
+ RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());
+ }
+
// Do we have any repeated substrings?
if (RepeatedSubstringStarts.size() < 2)
continue;