Skip to content

Commit

Permalink
new filter planar
Browse files Browse the repository at this point in the history
  • Loading branch information
rwittler committed Jan 29, 2025
2 parents 8e60985 + 5a71189 commit d09aace
Show file tree
Hide file tree
Showing 25 changed files with 7,280 additions and 7 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ In: Huber, K. and Gusfield, D. (eds.) Proceedings of WABI 2019. LIPIcs. 143, Sch

## Requirements

For the main program, there are no strict dependencies other than C++ version 14.
For the main program, there are no strict dependencies other than C++ version 17.
To read in **compressed** fasta/fastq files, it could be necessary to install zlib:
```
sudo apt install libz-dev
Expand Down Expand Up @@ -129,6 +129,7 @@ Specify your input by `-i <list>` where `<list>` is either a file-of-files or in
If you want a **tree**, use `-f strict`. In this case, `-N <newick-file>` can be used to write the resulting tree into a newick file; instead or additionally to `-o <split-file>`.
If you want a **network**, use one of the following filters:
* `weakly`: a split is kept if it is weakly compatible to all previously filtered splits (see publication for definition of "weak compatibility").
* `planar`: a split is kept if the resulting set of splits can be displayed in the plain without any edges crossing (a.k.a. circular compatible, outer-labeled planar)
* `2-tree`: two sets of compatible splits (=trees) are maintained. A split is added to the first if possible (compatible); if not to the second if possible.
`3-tree`: three sets of compatible splits (=trees) are maintained. A split is added to the first if possible (compatible); if not to the second if possible; if not to the third if possible.

Expand Down Expand Up @@ -348,6 +349,7 @@ SANS is provided as a service of the [German Network for Bioinformatics Infrastr

* The sparse-map library is licensed under the [MIT license](https://github.com/Tessil/sparse-map/blob/master/LICENSE).
* The Bifrost library is licensed under the [BSD-2 license](https://github.com/pmelsted/bifrost/blob/master/LICENSE).
* The [PC-tree library](https://github.com/N-Coder/pc-tree) is licensed under the [OGDF license](https://github.com/N-Coder/pc-tree/blob/main/LICENSE.txt).
* SANS uses gzstream, licensed under the [LGPL license](/src/gz/COPYING.LIB).
* SANS is licensed under the [GNU general public license](/LICENSE).

Expand Down
32 changes: 28 additions & 4 deletions makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# MAX. K-MER LENGTH, NUMBER OF FILES
CC = g++ -O3 -march=native -DmaxK=64 -DmaxN=64 -std=c++14
CC = g++ -O3 -march=native -DmaxK=64 -DmaxN=64 -std=c++17
XX = -lpthread -lz

## IF DEBUG
Expand All @@ -16,6 +16,7 @@ CFLAGS = gcc -O3 -march=native
SRCDIR := src
BUILDDIR := obj


# Wrap Windows / Unix commands
ifeq ($(OS), Windows_NT)
TD = $(BUILDDIR)
Expand All @@ -39,9 +40,9 @@ endif
all: makefile start SANS done

SANS: makefile $(BUILDDIR)/main.o
$(CC) -o SANS $(BUILDDIR)/nexus_color.o $(BUILDDIR)/main.o $(BUILDDIR)/graph.o $(BUILDDIR)/kmer.o $(BUILDDIR)/kmerAmino.o $(BUILDDIR)/color.o $(BUILDDIR)/util.o $(BUILDDIR)/translator.o $(BUILDDIR)/cleanliness.o $(BUILDDIR)/gzstream.o $(XX)
$(CC) -o SANS $(BUILDDIR)/nexus_color.o $(BUILDDIR)/main.o $(BUILDDIR)/graph.o $(BUILDDIR)/kmer.o $(BUILDDIR)/kmerAmino.o $(BUILDDIR)/color.o $(BUILDDIR)/util.o $(BUILDDIR)/translator.o $(BUILDDIR)/cleanliness.o $(BUILDDIR)/gzstream.o $(BUILDDIR)/PCTree_basic.o $(BUILDDIR)/PCTree_construction.o $(BUILDDIR)/PCTreeForest.o $(BUILDDIR)/PCTree_restriction.o $(BUILDDIR)/PCTree_intersect.o $(BUILDDIR)/PCNode.o $(XX)

$(BUILDDIR)/main.o: makefile $(SRCDIR)/main.cpp $(SRCDIR)/main.h $(BUILDDIR)/color.o $(BUILDDIR)/translator.o $(BUILDDIR)/graph.o $(BUILDDIR)/util.o $(BUILDDIR)/cleanliness.o $(BUILDDIR)/gzstream.o $(BUILDDIR)/nexus_color.o
$(BUILDDIR)/main.o: makefile $(SRCDIR)/main.cpp $(SRCDIR)/main.h $(BUILDDIR)/color.o $(BUILDDIR)/translator.o $(BUILDDIR)/graph.o $(BUILDDIR)/util.o $(BUILDDIR)/cleanliness.o $(BUILDDIR)/gzstream.o $(BUILDDIR)/nexus_color.o $(BUILDDIR)/PCTree_construction.o $(BUILDDIR)/PCTree_basic.o $(BUILDDIR)/PCTreeForest.o $(BUILDDIR)/PCTree_restriction.o $(BUILDDIR)/PCTree_intersect.o $(BUILDDIR)/PCNode.o
$(CC) -c $(SRCDIR)/main.cpp -o $(BUILDDIR)/main.o

$(BUILDDIR)/graph.o: makefile $(SRCDIR)/graph.cpp $(SRCDIR)/graph.h $(BUILDDIR)/kmer.o $(BUILDDIR)/kmerAmino.o $(BUILDDIR)/color.o
Expand All @@ -56,7 +57,7 @@ $(BUILDDIR)/kmerAmino.o: makefile $(SRCDIR)/kmerAmino.cpp $(SRCDIR)/kmerAmino.h
$(BUILDDIR)/color.o: makefile $(SRCDIR)/color.cpp $(SRCDIR)/color.h
$(CC) -c $(SRCDIR)/color.cpp -o $(BUILDDIR)/color.o

$(BUILDDIR)/nexus_color.o: makefile $(SRCDIR)/nexus_color.cpp $(SRCDIR)/nexus_color.h
$(BUILDDIR)/nexus_color.o: $(SRCDIR)/nexus_color.cpp $(SRCDIR)/nexus_color.h
$(CC) -c $(SRCDIR)/nexus_color.cpp -o $(BUILDDIR)/nexus_color.o

$(BUILDDIR)/util.o: $(SRCDIR)/util.cpp $(SRCDIR)/util.h
Expand All @@ -72,6 +73,29 @@ $(BUILDDIR)/gzstream.o: $(SRCDIR)/gz/gzstream.C $(SRCDIR)/gz/gzstream.h
$(CFLAGS) -c $(SRCDIR)/gz/gzstream.C -o $(BUILDDIR)/gzstream.o


# PC-Tree


$(BUILDDIR)/PCNode.o: $(SRCDIR)/pctree/PCNode.cpp $(SRCDIR)/pctree/PCNode.h $(BUILDDIR)/PCTreeForest.o
$(CC) -c $(SRCDIR)/pctree/PCNode.cpp -o $(BUILDDIR)/PCNode.o

$(BUILDDIR)/PCTree_basic.o: $(SRCDIR)/pctree/PCTree_basic.cpp $(BUILDDIR)/PCNode.o
$(CC) -c $(SRCDIR)/pctree/PCTree_basic.cpp -o $(BUILDDIR)/PCTree_basic.o

$(BUILDDIR)/PCTree_construction.o: $(SRCDIR)/pctree/PCTree_construction.cpp $(BUILDDIR)/PCNode.o
$(CC) -c $(SRCDIR)/pctree/PCTree_construction.cpp -o $(BUILDDIR)/PCTree_construction.o

$(BUILDDIR)/PCTreeForest.o: $(SRCDIR)/pctree/PCTreeForest.cpp $(SRCDIR)/pctree/PCTreeForest.h
$(CC) -c $(SRCDIR)/pctree/PCTreeForest.cpp -o $(BUILDDIR)/PCTreeForest.o

$(BUILDDIR)/PCTree_intersect.o: $(SRCDIR)/pctree/PCTree_intersect.cpp
$(CC) -c $(SRCDIR)/pctree/PCTree_intersect.cpp -o $(BUILDDIR)/PCTree_intersect.o

$(BUILDDIR)/PCTree_restriction.o: $(SRCDIR)/pctree/PCTree_restriction.cpp $(BUILDDIR)/PCNode.o
$(CC) -c $(SRCDIR)/pctree/PCTree_restriction.cpp -o $(BUILDDIR)/PCTree_restriction.o



# [Internal rules]

# Print info at compile start
Expand Down
53 changes: 53 additions & 0 deletions src/graph.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "graph.h"
#include "util.h"
#include "pctree/PCTree.h"
#include <mutex>
#include <thread>
#include <algorithm>
Expand Down Expand Up @@ -1727,6 +1728,58 @@ void graph::filter_weakly(multimap_<double, color_t>& split_list, bool& verbose)
}
}

/**
* This function filters a planar graph compatible subset.
*
* @param split_list list of splits to be filtered
* @param verbose print progress
*/
void graph::filter_planar(multimap_<double, color_t>& split_list, bool& verbose) {

auto it = split_list.begin();
color_t split_copy;

// single bit of split
int state = 0;

// construct PC-Tree with maxN nodes
vector<pc_tree::PCNode*> leaves;
pc_tree::PCTree tree(maxN, &leaves);


loop:
while(it != split_list.end()) {

vector<pc_tree::PCNode*> consecutiveLeaves = {};

// current split
split_copy = it->second;

// read all bits of the current split
for(int j=0;j<maxN;j++) {
state = (split_copy & 0b1u);
split_copy >>= 01u;

//collect all leaves which should be consecutive
if (state == 1) {
consecutiveLeaves.push_back(leaves.at(j));
}
}

// if possible, insert the new split in pc-tree
if (tree.makeConsecutive(consecutiveLeaves)) {
++it; goto loop;
}

// delete the split, if it is not compatible to the pc-tree
it = split_list.erase(it);


}

}


/**
* This function filters a greedy maximum weight n-tree compatible subset and returns a string with all trees in newick format.
*
Expand Down
8 changes: 8 additions & 0 deletions src/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,14 @@ class graph {
*/
static void filter_weakly(multimap_<double, color_t>& split_list, bool& verbose);

/**
* This function filters a planar graph compatible subset.
* @param split_list list of splits to be filtered
* @param verbose print progress
*/
static void filter_planar(multimap_<double, color_t>& split_list, bool& verbose);


/**
* This function filters a greedy maximum weight n-tree compatible subset.
*
Expand Down
7 changes: 6 additions & 1 deletion src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ int main(int argc, char* argv[]) {
// cout << " \t filtered splits w.r.t. original splits" << endl;
cout << " \t options: strict: compatible to a tree" << endl;
cout << " \t weakly: weakly compatible network" << endl;
cout << " \t planar: compatible to a planar graph" << endl;
cout << " \t (a.k.a. circular compatible, outer labeled planar)" << endl;
cout << " \t n-tree: compatible to a union of n trees" << endl;
cout << " \t (where n is an arbitrary number, e.g. 2-tree)" << endl;
cout << endl;
Expand Down Expand Up @@ -382,7 +384,7 @@ int main(int argc, char* argv[]) {
else if (strcmp(argv[i], "-f") == 0 || strcmp(argv[i], "--filter") == 0) {
catch_missing_dependent_args(argv[i + 1], argv[i]);
filter = argv[++i]; // Filter a greedy maximum weight subset
if (filter == "strict" || filter == "tree") {
if (filter == "strict" || filter == "tree" | filter == "planar") {
// compatible to a tree
}
else if (filter == "weakly") {
Expand Down Expand Up @@ -1737,6 +1739,9 @@ void apply_filter(string filter, string newick, std::function<string(const uint6
else if (filter == "weakly") {
graph::filter_weakly(split_list, verbose);
}
else if (filter == "planar") {
graph::filter_planar(split_list, verbose);
}
else if (filter.find("tree") != -1 && filter.substr(filter.find("tree")) == "tree") {
if (!newick.empty()) {
ofstream file(newick); // output file stream
Expand Down
2 changes: 1 addition & 1 deletion src/main.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ using namespace std;
// SANS ambages
// Symmetric Alignment-free phylogeNomic Splits
// phylogenomics with Abundance-filter, Multi-threading and Bootstrapping on Amino-acid or GEnomic Sequences
#define SANS_VERSION "2.5_1A" // SANS ambages
#define SANS_VERSION "2.5_1B" // SANS ambages

/**
* This is the entry point of the program.
Expand Down
71 changes: 71 additions & 0 deletions src/pctree/PCEnum.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/** \file
* \brief Predeclaration of various PC-tree related classes and enums.
*
* \author Simon D. Fink <ogdf@niko.fink.bayern>
*
* \par License:
* This file is part of the Open Graph Drawing Framework (OGDF).
*
* \par
* Copyright (C)<br>
* See README.md in the OGDF root directory for details.
*
* \par
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* Version 2 or 3 as published by the Free Software Foundation;
* see the file LICENSE.txt included in the packaging of this file
* for details.
*
* \par
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* \par
* You should have received a copy of the GNU General Public
* License along with this program; if not, see
* http://www.gnu.org/copyleft/gpl.html
*/

#pragma once

#include "util/RegisteredArray.h"
#include "util/RegisteredSet.h"
#include "util/defines.h"

#include <ostream>

namespace pc_tree {
enum class OGDF_EXPORT NodeLabel { Unknown, Partial, Full, Empty = Unknown };

enum class OGDF_EXPORT PCNodeType { PNode, CNode, Leaf };

class OGDF_EXPORT PCTree;

class OGDF_EXPORT PCTreeForest;

class OGDF_EXPORT PCTreeRegistry;

class OGDF_EXPORT PCNode;

#define OGDF_DECL_REG_ARRAY_TYPE(v, c) pc_tree::RegisteredArray<PCTreeRegistry, v, c>
OGDF_DECL_REG_ARRAY(PCTreeNodeArray)
#undef PCTREE_DECL_REG_ARRAY_TYPE

template<bool SupportFastSizeQuery = true>
using PCTreeNodeSet = pc_tree::RegisteredSet<PCTreeRegistry, SupportFastSizeQuery>;

OGDF_EXPORT std::ostream& operator<<(std::ostream&, pc_tree::NodeLabel);

OGDF_EXPORT std::ostream& operator<<(std::ostream&, pc_tree::PCNodeType);

OGDF_EXPORT std::ostream& operator<<(std::ostream&, const pc_tree::PCTree*);

OGDF_EXPORT std::ostream& operator<<(std::ostream&, const pc_tree::PCNode*);

OGDF_EXPORT std::ostream& operator<<(std::ostream&, const pc_tree::PCTree&);

OGDF_EXPORT std::ostream& operator<<(std::ostream&, const pc_tree::PCNode&);
}
Loading

0 comments on commit d09aace

Please sign in to comment.