Skip to content

Commit

Permalink
add script to plot read balance for xx and xy individuals for chr19, …
Browse files Browse the repository at this point in the history
…chrX, and chrY including fixed sites
  • Loading branch information
thw17 committed Apr 10, 2019
1 parent aedb910 commit fd74343
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 1 deletion.
14 changes: 13 additions & 1 deletion analyses/Webster_etal_2018/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,8 @@ rule all:
direction=[
"postprocessing_minus_noprocessing",
"noprocessing_minus_postprocessing"]),
"xyalign_analyses/hg19/vcf/hg19_chry_readbalance_stats_per_region.txt"
"xyalign_analyses/hg19/vcf/hg19_chry_readbalance_stats_per_region.txt",
"xyalign_analyses/hg19/vcf/hg19_readbalance_stats_overall_with_fixed.txt"

rule prepare_reference_hg19:
input:
Expand Down Expand Up @@ -636,3 +637,14 @@ rule plot_regional_chy_readbalance:
conda_env = xyalign_anaconda_env
shell:
"source activate {params.conda_env} && python scripts/Plot_read_balance_by_chrY_region.py {output} {params.path_prefix}"

rule plot_readbalance_with_fixed_overall:
input:
"xyalign_analyses/hg19/vcf/HG00512_wgs_hg19.noprocessing.vcf.gz"
output:
"xyalign_analyses/hg19/vcf/hg19_readbalance_stats_overall_with_fixed.txt"
params:
path_prefix = "xyalign_analyses/hg19/vcf",
conda_env = xyalign_anaconda_env
shell:
"source activate {params.conda_env} && python scripts/Plot_read_balance_with_fixed_overall.py {output} {params.path_prefix}"
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,26 @@
"chrY", xtr_parse[2], "X-transposed", False, "{}/X_trans_chrY".format(
path_prefix))

# with fixed
xyv.hist_read_balance(
"chrY", amp_parse[2], "Ampliconic", False, "{}/Ampliconic_chrY_with_fixed".format(
path_prefix), include_fixed=True)
xyv.hist_read_balance(
"chrY", het_parse[2], "Heterochromatic", False, "{}/Heterochromatic_chrY_with_fixed".format(
path_prefix), include_fixed=True)
xyv.hist_read_balance(
"chrY", oth_parse[2], "Other", False, "{}/Other_chrY_with_fixed".format(
path_prefix), include_fixed=True)
xyv.hist_read_balance(
"chrY", par_parse[2], "PAR", False, "{}/PAR_chrY_with_fixed".format(
path_prefix), include_fixed=True)
xyv.hist_read_balance(
"chrY", xde_parse[2], "X-degenerate", False, "{}/X_degen_chrY_with_fixed".format(
path_prefix), include_fixed=True)
xyv.hist_read_balance(
"chrY", xtr_parse[2], "X-transposed", False, "{}/X_trans_chrY_with_fixed".format(
path_prefix), include_fixed=True)

parse_list = [amp_parse, het_parse, oth_parse, par_parse, xde_parse, xtr_parse]
parse_list_regions = [
"ampliconic", "heterochromatic", "other", "par", "xdegen", "xtr"]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from __future__ import print_function
from xyalign import variants as xyv
import numpy as np
import sys

out_file = sys.argv[1]
path_prefix = sys.argv[2]

if path_prefix[-1] == "/":
path_prefix = path_prefix[:-1]

xx = xyv.VCFFile("{}/HG00513_wgs_hg19.noprocessing.vcf.gz".format(path_prefix))
xy = xyv.VCFFile("{}/HG00512_wgs_hg19.noprocessing.vcf.gz".format(path_prefix))

xx_19 = xx.parse_platypus_VCF(30, 30, 4, "chr19")
xx_x = xx.parse_platypus_VCF(30, 30, 4, "chrX")
xy_19 = xy.parse_platypus_VCF(30, 30, 4, "chr19")
xy_x = xy.parse_platypus_VCF(30, 30, 4, "chrX")
xy_y = xy.parse_platypus_VCF(30, 30, 4, "chrY")

xyv.hist_read_balance(
"chr19", xx_19[2], "HG000513_chr19", False, "{}/HG000513_chr19_with_fixed".format(
path_prefix), include_fixed=True)

xyv.hist_read_balance(
"chrX", xx_x[2], "HG000513_chrX", False, "{}/HG000513_chrX_with_fixed".format(
path_prefix), include_fixed=True)

xyv.hist_read_balance(
"chr19", xy_19[2], "HG000512_chr19", False, "{}/HG000512_chr19_with_fixed".format(
path_prefix), include_fixed=True)

xyv.hist_read_balance(
"chrX", xy_x[2], "HG000512_chrX", False, "{}/HG000512_chrX_with_fixed".format(
path_prefix), include_fixed=True)

xyv.hist_read_balance(
"chrY", xy_y[2], "HG000512_chrY", False, "{}/HG000512_chrY_with_fixed".format(
path_prefix), include_fixed=True)

parse_list = [xx_19, xx_x, xy_19, xy_x, xy_y]
parse_list_names = ["XX_chr19", "XX_chrX", "XY_chr19", "XY_chrX", "XY_chrY"]

with open(out_file, "w") as f:
f.write("sample_chrom\tmean_read_balance\tnum_sites\n")
for idx, i in enumerate(parse_list):
f.write("{}\t{}\t{}\n".format(
parse_list_names[idx], np.mean(i[2]), len(i[2])))

0 comments on commit fd74343

Please sign in to comment.