From fd743437263806590a6df78bcb6da980ebaab70d Mon Sep 17 00:00:00 2001 From: Tim Webster Date: Wed, 10 Apr 2019 01:13:07 -0600 Subject: [PATCH] add script to plot read balance for xx and xy individuals for chr19, chrX, and chrY including fixed sites --- analyses/Webster_etal_2018/Snakefile | 14 +++++- .../Plot_read_balance_by_chrY_region.py | 20 ++++++++ .../Plot_read_balance_with_fixed_overall.py | 48 +++++++++++++++++++ 3 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 analyses/Webster_etal_2018/scripts/Plot_read_balance_with_fixed_overall.py diff --git a/analyses/Webster_etal_2018/Snakefile b/analyses/Webster_etal_2018/Snakefile index cc2adb4..cb63ab7 100644 --- a/analyses/Webster_etal_2018/Snakefile +++ b/analyses/Webster_etal_2018/Snakefile @@ -143,7 +143,8 @@ rule all: direction=[ "postprocessing_minus_noprocessing", "noprocessing_minus_postprocessing"]), - "xyalign_analyses/hg19/vcf/hg19_chry_readbalance_stats_per_region.txt" + "xyalign_analyses/hg19/vcf/hg19_chry_readbalance_stats_per_region.txt", + "xyalign_analyses/hg19/vcf/hg19_readbalance_stats_overall_with_fixed.txt" rule prepare_reference_hg19: input: @@ -636,3 +637,14 @@ rule plot_regional_chy_readbalance: conda_env = xyalign_anaconda_env shell: "source activate {params.conda_env} && python scripts/Plot_read_balance_by_chrY_region.py {output} {params.path_prefix}" + +rule plot_readbalance_with_fixed_overall: + input: + "xyalign_analyses/hg19/vcf/HG00512_wgs_hg19.noprocessing.vcf.gz" + output: + "xyalign_analyses/hg19/vcf/hg19_readbalance_stats_overall_with_fixed.txt" + params: + path_prefix = "xyalign_analyses/hg19/vcf", + conda_env = xyalign_anaconda_env + shell: + "source activate {params.conda_env} && python scripts/Plot_read_balance_with_fixed_overall.py {output} {params.path_prefix}" diff --git a/analyses/Webster_etal_2018/scripts/Plot_read_balance_by_chrY_region.py b/analyses/Webster_etal_2018/scripts/Plot_read_balance_by_chrY_region.py index 1175d2d..cda381a 100644 --- a/analyses/Webster_etal_2018/scripts/Plot_read_balance_by_chrY_region.py +++ b/analyses/Webster_etal_2018/scripts/Plot_read_balance_by_chrY_region.py @@ -42,6 +42,26 @@ "chrY", xtr_parse[2], "X-transposed", False, "{}/X_trans_chrY".format( path_prefix)) +# with fixed +xyv.hist_read_balance( + "chrY", amp_parse[2], "Ampliconic", False, "{}/Ampliconic_chrY_with_fixed".format( + path_prefix), include_fixed=True) +xyv.hist_read_balance( + "chrY", het_parse[2], "Heterochromatic", False, "{}/Heterochromatic_chrY_with_fixed".format( + path_prefix), include_fixed=True) +xyv.hist_read_balance( + "chrY", oth_parse[2], "Other", False, "{}/Other_chrY_with_fixed".format( + path_prefix), include_fixed=True) +xyv.hist_read_balance( + "chrY", par_parse[2], "PAR", False, "{}/PAR_chrY_with_fixed".format( + path_prefix), include_fixed=True) +xyv.hist_read_balance( + "chrY", xde_parse[2], "X-degenerate", False, "{}/X_degen_chrY_with_fixed".format( + path_prefix), include_fixed=True) +xyv.hist_read_balance( + "chrY", xtr_parse[2], "X-transposed", False, "{}/X_trans_chrY_with_fixed".format( + path_prefix), include_fixed=True) + parse_list = [amp_parse, het_parse, oth_parse, par_parse, xde_parse, xtr_parse] parse_list_regions = [ "ampliconic", "heterochromatic", "other", "par", "xdegen", "xtr"] diff --git a/analyses/Webster_etal_2018/scripts/Plot_read_balance_with_fixed_overall.py b/analyses/Webster_etal_2018/scripts/Plot_read_balance_with_fixed_overall.py new file mode 100644 index 0000000..661a22e --- /dev/null +++ b/analyses/Webster_etal_2018/scripts/Plot_read_balance_with_fixed_overall.py @@ -0,0 +1,48 @@ +from __future__ import print_function +from xyalign import variants as xyv +import numpy as np +import sys + +out_file = sys.argv[1] +path_prefix = sys.argv[2] + +if path_prefix[-1] == "/": + path_prefix = path_prefix[:-1] + +xx = xyv.VCFFile("{}/HG00513_wgs_hg19.noprocessing.vcf.gz".format(path_prefix)) +xy = xyv.VCFFile("{}/HG00512_wgs_hg19.noprocessing.vcf.gz".format(path_prefix)) + +xx_19 = xx.parse_platypus_VCF(30, 30, 4, "chr19") +xx_x = xx.parse_platypus_VCF(30, 30, 4, "chrX") +xy_19 = xy.parse_platypus_VCF(30, 30, 4, "chr19") +xy_x = xy.parse_platypus_VCF(30, 30, 4, "chrX") +xy_y = xy.parse_platypus_VCF(30, 30, 4, "chrY") + +xyv.hist_read_balance( + "chr19", xx_19[2], "HG000513_chr19", False, "{}/HG000513_chr19_with_fixed".format( + path_prefix), include_fixed=True) + +xyv.hist_read_balance( + "chrX", xx_x[2], "HG000513_chrX", False, "{}/HG000513_chrX_with_fixed".format( + path_prefix), include_fixed=True) + +xyv.hist_read_balance( + "chr19", xy_19[2], "HG000512_chr19", False, "{}/HG000512_chr19_with_fixed".format( + path_prefix), include_fixed=True) + +xyv.hist_read_balance( + "chrX", xy_x[2], "HG000512_chrX", False, "{}/HG000512_chrX_with_fixed".format( + path_prefix), include_fixed=True) + +xyv.hist_read_balance( + "chrY", xy_y[2], "HG000512_chrY", False, "{}/HG000512_chrY_with_fixed".format( + path_prefix), include_fixed=True) + +parse_list = [xx_19, xx_x, xy_19, xy_x, xy_y] +parse_list_names = ["XX_chr19", "XX_chrX", "XY_chr19", "XY_chrX", "XY_chrY"] + +with open(out_file, "w") as f: + f.write("sample_chrom\tmean_read_balance\tnum_sites\n") + for idx, i in enumerate(parse_list): + f.write("{}\t{}\t{}\n".format( + parse_list_names[idx], np.mean(i[2]), len(i[2])))