Skip to content

Commit

Permalink
Implement BinaryCovarion and add lphy draft #406
Browse files Browse the repository at this point in the history
  • Loading branch information
walterxie committed Feb 7, 2024
1 parent 14bb886 commit b7f75d0
Show file tree
Hide file tree
Showing 5 changed files with 230 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,24 @@ public Alignment charset(String str) {
return AlignmentUtils.getCharSetAlignment(charSetBlocks, this);
}

@MethodInfo(description="return an alignment array defined by charsets, which could be pre-defined in the nexus file.",
narrativeName = "character set",
category = GeneratorCategory.TAXA_ALIGNMENT,
examples = {"cpacific.lphy"})
public Alignment[] charsets() {
if (hasCharsets(charsetMap)) {
List<Alignment> partitions = new ArrayList<>();
for (Map.Entry<String, List<CharSetBlock>> entry : charsetMap.entrySet()) {
List<CharSetBlock> charSetBlocks = entry.getValue();
SimpleAlignment alg = AlignmentUtils.getCharSetAlignment(charSetBlocks, this);
// TODO how to set id ?
partitions.add(alg);
}
return partitions.toArray(new Alignment[0]);
}
throw new IllegalArgumentException("Not charset is found !");
}

// @MethodInfo(description="return a trait alignment, which contains the set of traits<br>" +
// "extracted from taxa names in this alignment.<br>" +
// "The <i>sepStr</i> is the substring to split the taxa names,<br>" +
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
package lphy.base.evolution.substitutionmodel;

import lphy.core.model.Value;
import lphy.core.model.ValueUtils;
import lphy.core.model.annotation.GeneratorCategory;
import lphy.core.model.annotation.GeneratorInfo;
import lphy.core.model.annotation.ParameterInfo;
import lphy.core.model.datatype.DoubleArray2DValue;

/**
* Description is copied from https://taming-the-beast.org/tutorials/LanguagePhylogenies/ :
*
* In the Binary Covarion model each character can have one of four states
* that are divided into visible states and hidden states.
* The visible ones are 0 and 1 and the hidden ones are fast and slow.
* Characters change their binary state at a rate 1 in the fast state and at rate alpha in the slow state.
* Furthermore, they are allowed to change from one of the hidden categories to another at a switch rate s.
*
* This implements the BEAST mode in BEAST 2.
*/
public class BinaryCovarion extends RateMatrix {

public static final String AlphaParamName = "alpha";
public static final String SwitchRateParamName = "s";
public static final String vfreqParamName = "vfreq";
public static final String hfreqParamName = "hfreq";

private final int NumOfStates = 4;

public BinaryCovarion(@ParameterInfo(name = AlphaParamName, description = "the rate of evolution in slow mode.") Value<Number> alpha,
@ParameterInfo(name = SwitchRateParamName, description = "the rate of flipping between slow and fast modes") Value<Number> s,
@ParameterInfo(name = vfreqParamName, description = "the frequencies of the visible states") Value<Number[]> vfreq,
@ParameterInfo(name = hfreqParamName, description = "the frequencies of the hidden rates") Value<Number[]> hfreq,
@ParameterInfo(name = meanRateParamName, description = "the mean rate of the process. default = 1.0", optional = true) Value<Number> meanRate) {

super(meanRate); // TODO this seems not used because of overwrites

setParam(AlphaParamName, alpha);
setParam(SwitchRateParamName, s);
setParam(vfreqParamName, vfreq);
setParam(hfreqParamName, hfreq);
}

@GeneratorInfo(name = "binaryCovarion", verbClause = "is", narrativeName = "Binary Covarion model",
category = GeneratorCategory.RATE_MATRIX, examples = {"cpacific.lphy"},
description = "The rate matrix of the Covarion model for Binary data. It is equivalent to the BEAST mode.")
public Value<Double[][]> apply() {

double alpha = ValueUtils.doubleValue(getParams().get(AlphaParamName));
double switchRate = ValueUtils.doubleValue(getParams().get(SwitchRateParamName));

double[] vfreq = ValueUtils.doubleArrayValue(getParams().get(vfreqParamName));
double[] hfreq = ValueUtils.doubleArrayValue(getParams().get(hfreqParamName));

Double[][] unnormalizedQ = setupUnnormalizedQMatrix(alpha, switchRate, vfreq, hfreq);
Double[] freqs = getFrequencies(vfreq, hfreq);

Double[][] Q = binaryCovarion(unnormalizedQ, freqs);

return new DoubleArray2DValue(Q, this);
}

private Double[][] binaryCovarion(Double[][] Q, Double[] freqs) {

// set up diagonal
for (int i = 0; i < NumOfStates; i++) {
double sum = 0.0;
for (int j = 0; j < NumOfStates; j++) {
if (i != j)
sum += Q[i][j];
}
Q[i][i] = -sum;
}
// normalise rate matrix to one expected substitution per unit time
normalize(freqs, Q);

return Q;
}

private Double[][] setupUnnormalizedQMatrix(double a, double s, double[] vf, double[] hf) {
double f0 = hf[0];
double f1 = hf[1];
double p0 = vf[0];
double p1 = vf[1];

assert Math.abs(1.0 - f0 - f1) < 1e-8;
assert Math.abs(1.0 - p0 - p1) < 1e-8;

Double[][] unnormalizedQ = new Double[NumOfStates][NumOfStates];

unnormalizedQ[0][1] = a * p1;
unnormalizedQ[0][2] = s;
unnormalizedQ[0][3] = 0.0;

unnormalizedQ[1][0] = a * p0;
unnormalizedQ[1][2] = 0.0;
unnormalizedQ[1][3] = s;

unnormalizedQ[2][0] = s;
unnormalizedQ[2][1] = 0.0;
unnormalizedQ[2][3] = p1;

unnormalizedQ[3][0] = 0.0;
unnormalizedQ[3][1] = s;
unnormalizedQ[3][2] = p0;

return unnormalizedQ;
}

void normalize(Double[] freqs, Double[][] Q) {
double subst = 0.0;
int dimension = freqs.length;

for (int i = 0; i < dimension; i++) {
subst += -Q[i][i] * freqs[i];
}

// normalize, including switches
for (int i = 0; i < dimension; i++) {
for (int j = 0; j < dimension; j++) {
Q[i][j] = Q[i][j] / subst;
}
}

double switchingProportion = 0.0;
switchingProportion += Q[0][2] * freqs[2];
switchingProportion += Q[2][0] * freqs[0];
switchingProportion += Q[1][3] * freqs[3];
switchingProportion += Q[3][1] * freqs[1];

//System.out.println("switchingProportion=" + switchingProportion);

// normalize, removing switches
for (int i = 0; i < dimension; i++) {
for (int j = 0; j < dimension; j++) {
Q[i][j] = Q[i][j] / (1.0 - switchingProportion);
}
}
}

private Double[] getFrequencies(double[] vf, double[] hf) {
Double[] freqs = new Double[NumOfStates];
freqs[0] = vf[0] * hf[0];
freqs[1] = vf[1] * hf[0];
freqs[2] = vf[0] * hf[1];
freqs[3] = vf[1] * hf[1];
return freqs;
}


}
6 changes: 2 additions & 4 deletions lphy-base/src/main/java/lphy/base/spi/LPhyBaseImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,8 @@ public List<Class<? extends BasicFunction>> declareFunctions() {
return Arrays.asList(ARange.class, ArgI.class,
// Substitution models
JukesCantor.class, K80.class, F81.class, HKY.class, GTR.class, WAG.class,
GeneralTimeReversible.class, LewisMK.class,
NucleotideModel.class,
BModelSetFunction.class,
bSiteModelFunction.class,
GeneralTimeReversible.class, LewisMK.class, BinaryCovarion.class,
BModelSetFunction.class, bSiteModelFunction.class, NucleotideModel.class,

// Data types
BinaryDatatypeFunction.class, NucleotidesFunction.class, StandardDatatypeFunction.class,
Expand Down
25 changes: 25 additions & 0 deletions tutorials/cpacific.lphy
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
data {
bird = "55-56 189-190 370-371 596-597 635-636 737-738 16-18 42-44 64-66 67-69 225-227 233-235 255-257 305-307 321-323 392-394 480-482 483-485 486-488 533-535 566-568 629-631 632-634 667-669 670-672 688-690 870-872 908-910 977-979 995-997 1129-1131 1228-1230 1260-1262 1275-1277 12-15 19-22 172-175 258-261 395-398 399-402 440-443 499-502 592-595 650-653 733-736 866-869 904-907 1009-1012 1072-1075 1162-1165 1224-1227 1237-1240 1250-1253 1-5 176-180 191-195 214-218 228-232 272-276 300-304 338-342 352-356 365-369 387-391 410-414 421-425 469-473 569-573 654-658 714-718 817-821 831-835 849-853 854-858 1019-1023 1067-1071 1300-1304 1335-1339";
and = "6-11 78-83 138-143 219-224 308-313 324-329 381-386 415-420 434-439 474-479 512-517 536-541 560-565 574-579 580-585 586-591 607-612 691-696 739-744 796-801 802-807 891-896 1013-1018 1061-1066 1203-1208 1231-1236 1254-1259 1305-1310 1319-1324 57-63 121-127 144-150 314-320 403-409 526-532 673-679 719-725 726-732 745-751 789-795 859-865 884-890 897-903 988-994 1046-1052 1076-1082 1209-1215 23-30 70-77 103-110 181-188 206-213 247-254 292-299 330-337 357-364 426-433 518-525 542-549 613-620 621-628 659-666 680-687 706-713 781-788 911-918 980-987 1053-1060 1216-1223 1311-1318 94-102 163-171 343-351 372-380 503-511 598-606 697-705 763-771 772-780 808-816 822-830 956-964 1241-1249 1278-1286 1351-1359 45-54 84-93 111-120 128-137 196-205 262-271 489-498 550-559 1036-1045 1094-1103 1132-1141 1142-1151 1152-1161 1325-1334";
belly = "31-41 236-246 458-468 752-762 873-883 919-929 998-1008 1083-1093 1118-1128 1166-1176 1192-1202 1340-1350 151-162 965-976 1024-1035 1263-1274 637-649 836-848 930-942 943-955 1287-1299 444-457 1104-1117 277-291 1177-1191";
D = readNexus(file="data/cpacific.nex");
taxa = D.taxa();
partitions = D.charset([bird, and, belly]); // TODO [b, a] cannot connect to b,a
// partitions = D.charsets(); // TODO returns Align[], but this cannot trigger vect
// L = nchar(partitions); // if cannot trigger vect, here will return 1 value, also nchar() is deprecated
L = partitions.nchar();
rootAge = 100;
}
model {
alpha ~ Uniform(lower=1.0E-4, upper=1.0);
switchRate ~ Gamma(shape=0.05, scale=10.0);
Q = binaryCovarion(alpha=alpha, s=switchRate, vfreq=[0.5, 0.5], hfreq=[0.5, 0.5]);

λ ~ Exp(mean=0.01);
mu ~ Exp(mean=0.01);
rho ~ Beta(alpha=22.0, beta=25.0);

ψ ~ BirthDeath(lambda=λ, mu=mu, taxa=taxa, rootAge=rootAge);

D ~ PhyloCTMC(tree=ψ, L=L, Q=Q);
}
Loading

0 comments on commit b7f75d0

Please sign in to comment.