Skip to content

Commit

Permalink
Handle MD:Z and MD:A types in build_alignment_sequence()
Browse files Browse the repository at this point in the history
Check the type code and raise TypeError instead of segfaulting if it is
not a Z string. Also accept MD:A to work around HTSeq bug that writes
1-character strings as MD:A:v. Fixes #1226. Add tests.
  • Loading branch information
jmarshall committed Sep 17, 2023
1 parent fcb4c2d commit 56a1e86
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 1 deletion.
13 changes: 12 additions & 1 deletion pysam/libcalignedsegment.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -757,7 +757,18 @@ cdef inline bytes build_alignment_sequence(bam1_t * src):
elif op == BAM_CHARD_CLIP:
pass # advances neither

cdef char * md_tag = <char*>bam_aux2Z(md_tag_ptr)
cdef char *md_tag, md_buffer[2];
cdef uint8_t md_typecode = md_tag_ptr[0]
if md_typecode == b'Z':
md_tag = bam_aux2Z(md_tag_ptr)
elif md_typecode == b'A':
# Work around HTSeq bug that writes 1-character strings as MD:A:v
md_buffer[0] = bam_aux2A(md_tag_ptr)
md_buffer[1] = b'\0'
md_tag = md_buffer
else:
raise TypeError('Tagged field MD:{}:<value> does not have expected type MD:Z'.format(chr(md_typecode)))

cdef int md_idx = 0
cdef char c
s_idx = 0
Expand Down
26 changes: 26 additions & 0 deletions tests/AlignedSegment_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,32 @@ def test_get_aligned_pairs_uppercase_md(self):
],
)

def test_get_aligned_pairs_1character_md(self):
a = self.build_read()
a.query_sequence = "A" * 7
a.cigarstring = "7M"
a.set_tag("MD", "7", value_type="A")
self.assertEqual(
a.get_aligned_pairs(with_seq=True),
[
(0, 20, "A"),
(1, 21, "A"),
(2, 22, "A"),
(3, 23, "A"),
(4, 24, "A"),
(5, 25, "A"),
(6, 26, "A"),
],
)

def test_get_aligned_pairs_bad_type_md(self):
a = self.build_read()
a.query_sequence = "A" * 7
a.cigarstring = "7M"
a.set_tag("MD", 7)
with self.assertRaises(TypeError):
a.get_aligned_pairs(with_seq=True)

def testNoSequence(self):
"""issue 176: retrieving length without query sequence
with soft-clipping.
Expand Down

0 comments on commit 56a1e86

Please sign in to comment.