Skip to content

Commit

Permalink
Merge remote-tracking branch 'jean/annotate_gaf' into read-quietly
Browse files Browse the repository at this point in the history
  • Loading branch information
adamnovak committed Aug 1, 2024
2 parents 94fc9b2 + 553235d commit 23518fb
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 16 deletions.
1 change: 1 addition & 0 deletions src/alignment.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <htslib/vcf.h>
#include "handle.hpp"
#include "vg/io/alignment_io.hpp"
#include <vg/io/alignment_emitter.hpp>

namespace vg {

Expand Down
47 changes: 31 additions & 16 deletions src/subcommand/annotate_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ void help_annotate(char** argv) {
<< " -b, --bed-name FILE a BED file to convert to GAM. May repeat." << endl
<< " -f, --gff-name FILE a GFF3 file to convert to GAM. May repeat." << endl
<< " -g, --ggff output at GGFF subgraph annotation file instead of GAM (requires -s)" << endl
<< " -F, --gaf-output output in GAF format rather than GAM" << endl
<< " -s, --snarls FILE file containing snarls to expand GFF intervals into" << endl
<< "alignment annotation options:" << endl
<< " -a, --gam FILE file of Alignments to annotate (required)" << endl
Expand Down Expand Up @@ -97,6 +98,7 @@ int main_annotate(int argc, char** argv) {
size_t search_limit = 0;
bool novelty = false;
bool output_ggff = false;
bool output_gaf = false;
string snarls_name;

int c;
Expand All @@ -112,6 +114,7 @@ int main_annotate(int argc, char** argv) {
{"bed-name", required_argument, 0, 'b'},
{"gff-name", required_argument, 0, 'f'},
{"ggff", no_argument, 0, 'g'},
{"gaf-output", no_argument, 0, 'F'},
{"snarls", required_argument, 0, 's'},
{"novelty", no_argument, 0, 'n'},
{"threads", required_argument, 0, 't'},
Expand All @@ -120,7 +123,7 @@ int main_annotate(int argc, char** argv) {
};

int option_index = 0;
c = getopt_long (argc, argv, "hx:a:pml:b:f:gs:nt:h",
c = getopt_long (argc, argv, "hx:a:pml:b:f:gFs:nt:h",
long_options, &option_index);

// Detect the end of the options.
Expand Down Expand Up @@ -149,6 +152,10 @@ int main_annotate(int argc, char** argv) {
output_ggff = true;
break;

case 'F':
output_gaf = true;
break;

case 's':
snarls_name = optarg;
break;
Expand Down Expand Up @@ -293,10 +300,7 @@ int main_annotate(int argc, char** argv) {

get_input_file(bed_name, [&](istream& bed_stream) {
// Load all the BED regions as Alignments embedded in the graph.
vector<Alignment> bed_regions;
parse_bed_regions(bed_stream, xg_index, &bed_regions);

for (auto& region : bed_regions) {
parse_bed_regions(bed_stream, xg_index, [&](Alignment& region) {
// For each region in the BED

// Get the cannonical copy of its name (which may be "")
Expand All @@ -309,7 +313,7 @@ int main_annotate(int argc, char** argv) {
features_on_node[mapping.position().node_id()].emplace_back(mapping_to_range(xg_index, mapping),
interned_name);
}
}
});
});
}

Expand Down Expand Up @@ -461,18 +465,28 @@ int main_annotate(int argc, char** argv) {
}
}
else {
// We are converting annotations to GAM.
// Set up GAM output.
// TODO: use AlignmentEmitter?
vector<Alignment> buffer;
// We are converting annotations to GAM/GAF.

// Open up a GAM/GAF output stream.
// TODO: Make the read parallel so we can actually use all the threads we are configuring for here.
unique_ptr<vg::io::AlignmentEmitter> alignment_emitter = vg::io::get_non_hts_alignment_emitter("-", output_gaf ? "GAF" : "GAM", {}, vg::get_thread_count(),
xg_index);
// There's some benefit to batching oursleves since the un-batched
// emit will make single-item batches to delegate to the batched
// version.
std::vector<Alignment> buffer;
auto emit_alignment = [&](Alignment& aln) {
// Buffer and possibly write each record
buffer.emplace_back(std::move(aln));
vg::io::write_buffered(cout, buffer, 1000);
// if we have enough, write them
if (buffer.size() > 1000) {
alignment_emitter->emit_singles(std::move(buffer));
// clear the buffer
buffer.clear();
}
};

for (auto& bed_name : bed_names) {
// Convert each BED file to GAM
// Convert each BED file
get_input_file(bed_name, [&](istream& bed_stream) {
parse_bed_regions(bed_stream, xg_index, emit_alignment);
});
Expand All @@ -483,9 +497,10 @@ int main_annotate(int argc, char** argv) {
parse_gff_regions(gff_stream, xg_index, emit_alignment);
});
}

// Flush the remaining stuff in the buffer
vg::io::write_buffered(cout, buffer, 0);

if (!buffer.empty()) {
alignment_emitter->emit_singles(std::move(buffer));
}
}
}

Expand Down

1 comment on commit 23518fb

@adamnovak
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for branch read-quietly. View the full report here.

16 tests passed, 0 tests failed and 0 tests skipped in 17411 seconds

Please sign in to comment.