Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/left-align-better' into lr-giraffe
Browse files Browse the repository at this point in the history
  • Loading branch information
adamnovak committed Aug 9, 2024
2 parents 007df92 + fd85a78 commit dbc77d7
Show file tree
Hide file tree
Showing 11 changed files with 169 additions and 136 deletions.
30 changes: 23 additions & 7 deletions src/algorithms/pad_band.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,43 @@
namespace vg {
namespace algorithms {

std::function<size_t(const Alignment&, const HandleGraph&)> pad_band_random_walk(double band_padding_multiplier, size_t band_padding_memo_size) {

std::function<size_t(const Alignment&, const HandleGraph&)> pad_band_random_walk_internal(double band_padding_multiplier, size_t band_padding_memo_size, size_t max_padding,
const std::function<size_t(const Alignment&, const HandleGraph&)> size_function) {
// We're goign to capture this vector by value into the closure
std::vector<size_t> band_padding_memo;

// Fill it in to initialize
band_padding_memo.resize(band_padding_memo_size);
for (size_t i = 0; i < band_padding_memo.size(); i++) {
band_padding_memo[i] = size_t(band_padding_multiplier * sqrt(i)) + 1;
band_padding_memo[i] = std::min<size_t>(max_padding, size_t(band_padding_multiplier * sqrt(i)) + 1);
}

function<size_t(const Alignment&, const HandleGraph&)> choose_band_padding = [band_padding_multiplier, band_padding_memo](const Alignment& seq, const HandleGraph& graph) {
size_t read_length = seq.sequence().size();
return read_length < band_padding_memo.size() ? band_padding_memo.at(read_length)
: size_t(band_padding_multiplier * sqrt(read_length)) + 1;

function<size_t(const Alignment&, const HandleGraph&)> choose_band_padding =
[band_padding_multiplier, band_padding_memo, size_function, max_padding](const Alignment& seq, const HandleGraph& graph) {
size_t size = size_function(seq, graph);
return size < band_padding_memo.size() ? band_padding_memo.at(size)
: std::min<size_t>(max_padding, size_t(band_padding_multiplier * sqrt(size)) + 1);
};

// And return the closure which now owns the memo table.
return choose_band_padding;
}

std::function<size_t(const Alignment&, const HandleGraph&)> pad_band_random_walk(double band_padding_multiplier, size_t band_padding_memo_size, size_t max_padding) {
return pad_band_random_walk_internal(band_padding_multiplier, band_padding_memo_size, max_padding,
[](const Alignment& seq, const HandleGraph&) {
return seq.sequence().size();
});
}

std::function<size_t(const Alignment&, const HandleGraph&)> pad_band_min_random_walk(double band_padding_multiplier, size_t band_padding_memo_size, size_t max_padding) {
return pad_band_random_walk_internal(band_padding_multiplier, band_padding_memo_size, max_padding,
[](const Alignment& seq, const HandleGraph& graph) {
return std::min(seq.sequence().size(), graph.get_total_length());
});
}

std::function<size_t(const Alignment&, const HandleGraph&)> pad_band_constant(size_t band_padding) {
// don't dynamically choose band padding, shim constant value into a function type
function<size_t(const Alignment&,const HandleGraph&)> constant_padding = [band_padding](const Alignment& seq, const HandleGraph& graph) {
Expand Down
14 changes: 12 additions & 2 deletions src/algorithms/pad_band.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,25 @@

#include "../handle.hpp"
#include <vg/vg.pb.h>
#include <limits>

namespace vg {
namespace algorithms {

using namespace std;

/// Get a band padding function that uses the expected distance of a random
/// Get a band padding function that scales with the expected distance of a random
/// walk, memoized out to the given length.
std::function<size_t(const Alignment&, const HandleGraph&)> pad_band_random_walk(double band_padding_multiplier = 1.0, size_t band_padding_memo_size = 2000);
std::function<size_t(const Alignment&, const HandleGraph&)> pad_band_random_walk(double band_padding_multiplier = 1.0,
size_t band_padding_memo_size = 2000,
size_t max_padding = std::numeric_limits<size_t>::max());

/// Get a band padding function that scales the expected distance of a random
/// walk, memoized out to the given length, using the minimum of graph size and
/// read size as the length.
std::function<size_t(const Alignment&, const HandleGraph&)> pad_band_min_random_walk(double band_padding_multiplier = 1.0,
size_t band_padding_memo_size = 2000,
size_t max_padding = std::numeric_limits<size_t>::max());

/// Get a band padding function that uses a constant value.
std::function<size_t(const Alignment&, const HandleGraph&)> pad_band_constant(size_t band_padding);
Expand Down
60 changes: 20 additions & 40 deletions src/aligner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1421,8 +1421,7 @@ void Aligner::align_pinned_multi(Alignment& alignment, vector<Alignment>& alt_al
}

void Aligner::align_global_banded(Alignment& alignment, const HandleGraph& g,
int32_t band_padding, bool permissive_banding,
const unordered_map<handle_t, bool>* left_align_strand) const {
int32_t band_padding, bool permissive_banding) const {

if (alignment.sequence().empty()) {
// we can save time by using a specialized deletion aligner for empty strings
Expand All @@ -1447,8 +1446,7 @@ void Aligner::align_global_banded(Alignment& alignment, const HandleGraph& g,
g,
band_padding,
permissive_banding,
false,
left_align_strand);
false);

band_graph.align(score_matrix, nt_table, gap_open, gap_extension);
} else if (best_score <= numeric_limits<int16_t>::max() && worst_score >= numeric_limits<int16_t>::min()) {
Expand All @@ -1457,8 +1455,7 @@ void Aligner::align_global_banded(Alignment& alignment, const HandleGraph& g,
g,
band_padding,
permissive_banding,
false,
left_align_strand);
false);

band_graph.align(score_matrix, nt_table, gap_open, gap_extension);
} else if (best_score <= numeric_limits<int32_t>::max() && worst_score >= numeric_limits<int32_t>::min()) {
Expand All @@ -1467,8 +1464,7 @@ void Aligner::align_global_banded(Alignment& alignment, const HandleGraph& g,
g,
band_padding,
permissive_banding,
false,
left_align_strand);
false);

band_graph.align(score_matrix, nt_table, gap_open, gap_extension);
} else {
Expand All @@ -1477,16 +1473,14 @@ void Aligner::align_global_banded(Alignment& alignment, const HandleGraph& g,
g,
band_padding,
permissive_banding,
false,
left_align_strand);
false);

band_graph.align(score_matrix, nt_table, gap_open, gap_extension);
}
}

void Aligner::align_global_banded_multi(Alignment& alignment, vector<Alignment>& alt_alignments, const HandleGraph& g,
int32_t max_alt_alns, int32_t band_padding, bool permissive_banding,
const unordered_map<handle_t, bool>* left_align_strand) const {
int32_t max_alt_alns, int32_t band_padding, bool permissive_banding) const {

if (alignment.sequence().empty()) {
// we can save time by using a specialized deletion aligner for empty strings
Expand All @@ -1511,8 +1505,7 @@ void Aligner::align_global_banded_multi(Alignment& alignment, vector<Alignment>&
max_alt_alns,
band_padding,
permissive_banding,
false,
left_align_strand);
false);

band_graph.align(score_matrix, nt_table, gap_open, gap_extension);
} else if (best_score <= numeric_limits<int16_t>::max() && worst_score >= numeric_limits<int16_t>::min()) {
Expand All @@ -1523,8 +1516,7 @@ void Aligner::align_global_banded_multi(Alignment& alignment, vector<Alignment>&
max_alt_alns,
band_padding,
permissive_banding,
false,
left_align_strand);
false);

band_graph.align(score_matrix, nt_table, gap_open, gap_extension);
} else if (best_score <= numeric_limits<int32_t>::max() && worst_score >= numeric_limits<int32_t>::min()) {
Expand All @@ -1535,8 +1527,7 @@ void Aligner::align_global_banded_multi(Alignment& alignment, vector<Alignment>&
max_alt_alns,
band_padding,
permissive_banding,
false,
left_align_strand);
false);

band_graph.align(score_matrix, nt_table, gap_open, gap_extension);
} else {
Expand All @@ -1547,8 +1538,7 @@ void Aligner::align_global_banded_multi(Alignment& alignment, vector<Alignment>&
max_alt_alns,
band_padding,
permissive_banding,
false,
left_align_strand);
false);

band_graph.align(score_matrix, nt_table, gap_open, gap_extension);
}
Expand Down Expand Up @@ -2105,8 +2095,7 @@ void QualAdjAligner::align_pinned_multi(Alignment& alignment, vector<Alignment>&
}

void QualAdjAligner::align_global_banded(Alignment& alignment, const HandleGraph& g,
int32_t band_padding, bool permissive_banding,
const unordered_map<handle_t, bool>* left_align_strand) const {
int32_t band_padding, bool permissive_banding) const {

if (alignment.sequence().empty()) {
// we can save time by using a specialized deletion aligner for empty strings
Expand All @@ -2129,8 +2118,7 @@ void QualAdjAligner::align_global_banded(Alignment& alignment, const HandleGraph
g,
band_padding,
permissive_banding,
true,
left_align_strand);
true);

band_graph.align(score_matrix, nt_table, gap_open, gap_extension);
} else if (best_score <= numeric_limits<int16_t>::max() && worst_score >= numeric_limits<int16_t>::min()) {
Expand All @@ -2139,8 +2127,7 @@ void QualAdjAligner::align_global_banded(Alignment& alignment, const HandleGraph
g,
band_padding,
permissive_banding,
true,
left_align_strand);
true);

band_graph.align(score_matrix, nt_table, gap_open, gap_extension);
} else if (best_score <= numeric_limits<int32_t>::max() && worst_score >= numeric_limits<int32_t>::min()) {
Expand All @@ -2149,8 +2136,7 @@ void QualAdjAligner::align_global_banded(Alignment& alignment, const HandleGraph
g,
band_padding,
permissive_banding,
true,
left_align_strand);
true);

band_graph.align(score_matrix, nt_table, gap_open, gap_extension);
} else {
Expand All @@ -2159,16 +2145,14 @@ void QualAdjAligner::align_global_banded(Alignment& alignment, const HandleGraph
g,
band_padding,
permissive_banding,
true,
left_align_strand);
true);

band_graph.align(score_matrix, nt_table, gap_open, gap_extension);
}
}

void QualAdjAligner::align_global_banded_multi(Alignment& alignment, vector<Alignment>& alt_alignments, const HandleGraph& g,
int32_t max_alt_alns, int32_t band_padding, bool permissive_banding,
const unordered_map<handle_t, bool>* left_align_strand) const {
int32_t max_alt_alns, int32_t band_padding, bool permissive_banding) const {

if (alignment.sequence().empty()) {
// we can save time by using a specialized deletion aligner for empty strings
Expand All @@ -2193,8 +2177,7 @@ void QualAdjAligner::align_global_banded_multi(Alignment& alignment, vector<Alig
max_alt_alns,
band_padding,
permissive_banding,
true,
left_align_strand);
true);

band_graph.align(score_matrix, nt_table, gap_open, gap_extension);
} else if (best_score <= numeric_limits<int16_t>::max() && worst_score >= numeric_limits<int16_t>::min()) {
Expand All @@ -2205,8 +2188,7 @@ void QualAdjAligner::align_global_banded_multi(Alignment& alignment, vector<Alig
max_alt_alns,
band_padding,
permissive_banding,
true,
left_align_strand);
true);

band_graph.align(score_matrix, nt_table, gap_open, gap_extension);
} else if (best_score <= numeric_limits<int32_t>::max() && worst_score >= numeric_limits<int32_t>::min()) {
Expand All @@ -2217,8 +2199,7 @@ void QualAdjAligner::align_global_banded_multi(Alignment& alignment, vector<Alig
max_alt_alns,
band_padding,
permissive_banding,
true,
left_align_strand);
true);

band_graph.align(score_matrix, nt_table, gap_open, gap_extension);
} else {
Expand All @@ -2229,8 +2210,7 @@ void QualAdjAligner::align_global_banded_multi(Alignment& alignment, vector<Alig
max_alt_alns,
band_padding,
permissive_banding,
true,
left_align_strand);
true);

band_graph.align(score_matrix, nt_table, gap_open, gap_extension);
}
Expand Down
Loading

1 comment on commit dbc77d7

@adamnovak
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for branch lr-giraffe. View the full report here.

15 tests passed, 0 tests failed and 0 tests skipped in 16079 seconds

Please sign in to comment.