Skip to content

Commit

Permalink
Ignore inapplicable DeprecationWarning
Browse files Browse the repository at this point in the history
  • Loading branch information
davidemms committed Sep 7, 2016
1 parent cda3449 commit 6e29cf4
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 53 deletions.
6 changes: 4 additions & 2 deletions Tests/test_orthofinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,12 +177,14 @@ def setUpClass(cls):


# @unittest.skipIf(__skipLongTests__, "Only performing quick tests")
def test_fromfasta(self):
def test_fromfasta_full(self):
currentResultsDir = exampleFastaDir + "Results_%s/" % datetime.date.today().strftime("%b%d")
expectedCSVFile = currentResultsDir + "Orthogroups.csv"
with CleanUp([], [], [currentResultsDir, ]):
self.stdout, self.stderr = self.RunOrthoFinder("-f %s -g" % exampleFastaDir)
self.stdout, self.stderr = self.RunOrthoFinder("-f %s" % exampleFastaDir)
self.CheckStandardRun(self.stdout, self.stderr, goldResultsDir_smallExample, expectedCSVFile)
expectedTreeFN = exampleFastaDir + ("Results_%s/" % datetime.date.today().strftime("%b%d")) + ("Orthologues_%s/" % datetime.date.today().strftime("%b%d")) + "Gene_Trees/OG0000000_tree.txt"
self.assertTrue(os.path.exists(expectedTreeFN))
self.test_passed = True

def test_fromfasta_threads(self):
Expand Down
36 changes: 20 additions & 16 deletions orthofinder/orthofinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,25 +475,29 @@ def ConnectCognates(seqsInfo, fileInfo, iSpecies):

@staticmethod
def Worker_ConnectCognates(cmd_queue):
while True:
try:
args = cmd_queue.get(True, 1)
WaterfallMethod.ConnectCognates(*args)
except Queue.Empty:
return
with warnings.catch_warnings():
warnings.simplefilter("ignore")
while True:
try:
args = cmd_queue.get(True, 1)
WaterfallMethod.ConnectCognates(*args)
except Queue.Empty:
return

@staticmethod
def WriteGraphParallel(seqsInfo, fileInfo):
with open(fileInfo.graphFilename + "_header", 'wb') as graphFile:
graphFile.write("(mclheader\nmcltype matrix\ndimensions %dx%d\n)\n" % (seqsInfo.nSeqs, seqsInfo.nSeqs))
graphFile.write("\n(mclmatrix\nbegin\n\n")
pool = mp.Pool()
pool.map(WriteGraph_perSpecies, [(seqsInfo, fileInfo, iSpec) for iSpec in xrange(seqsInfo.nSpecies)])
subprocess.call("cat " + fileInfo.graphFilename + "_header " + " ".join([fileInfo.graphFilename + "_%d" % iSp for iSp in xrange(seqsInfo.nSpecies)]) + " > " + fileInfo.graphFilename, shell=True)
# Cleanup
os.remove(fileInfo.graphFilename + "_header")
for iSp in xrange(seqsInfo.nSpecies): os.remove(fileInfo.graphFilename + "_%d" % iSp)
DeleteMatrices(fileInfo)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
with open(fileInfo.graphFilename + "_header", 'wb') as graphFile:
graphFile.write("(mclheader\nmcltype matrix\ndimensions %dx%d\n)\n" % (seqsInfo.nSeqs, seqsInfo.nSeqs))
graphFile.write("\n(mclmatrix\nbegin\n\n")
pool = mp.Pool()
pool.map(WriteGraph_perSpecies, [(seqsInfo, fileInfo, iSpec) for iSpec in xrange(seqsInfo.nSpecies)])
subprocess.call("cat " + fileInfo.graphFilename + "_header " + " ".join([fileInfo.graphFilename + "_%d" % iSp for iSp in xrange(seqsInfo.nSpecies)]) + " > " + fileInfo.graphFilename, shell=True)
# Cleanup
os.remove(fileInfo.graphFilename + "_header")
for iSp in xrange(seqsInfo.nSpecies): os.remove(fileInfo.graphFilename + "_%d" % iSp)
DeleteMatrices(fileInfo)

@staticmethod
def GetMostDistant_s(RBH, B, seqsInfo, iSpec):
Expand Down
71 changes: 38 additions & 33 deletions orthofinder/scripts/get_orthologues.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import itertools
import multiprocessing as mp
import Queue
import warnings

import util
import tree
Expand Down Expand Up @@ -107,8 +108,8 @@ def SequenceDict(self):
util.Fail()
else:
print("Tried to use only the first part of the accession in order to list the sequences in each orthogroup\nmore concisely but these were not unique. The full accession line will be used instead.\n")
self.seqIDsEx = util.FullAccession(self.seqIDsFN).GetIDToNameDict()
return self.seqIDsEx
self.seqIDsEx = util.FullAccession(self.seqIDsFN)
return self.seqIDsEx.GetIDToNameDict()

def SpeciesDict(self):
d = self.speciesIDsEx.GetIDToNameDict()
Expand Down Expand Up @@ -245,18 +246,20 @@ def __init__(self, ogSet, outD, nProcesses):
# Check files exist

def ReadAndPickle(self):
cmd_queue = mp.Queue()
i = 0
for iSp in xrange(len(self.ogSet.seqsInfo.speciesToUse)):
for jSp in xrange(len(self.ogSet.seqsInfo.speciesToUse)):
cmd_queue.put((i, (iSp, jSp)))
i+=1
runningProcesses = [mp.Process(target=Worker_BlastScores, args=(cmd_queue, self.ogSet.seqsInfo, self.ogSet.fileInfo, nThreads, i)) for i_ in xrange(nThreads)]
for proc in runningProcesses:
proc.start()
for proc in runningProcesses:
while proc.is_alive():
proc.join()
with warnings.catch_warnings():
warnings.simplefilter("ignore")
cmd_queue = mp.Queue()
i = 0
for iSp in xrange(len(self.ogSet.seqsInfo.speciesToUse)):
for jSp in xrange(len(self.ogSet.seqsInfo.speciesToUse)):
cmd_queue.put((i, (iSp, jSp)))
i+=1
runningProcesses = [mp.Process(target=Worker_BlastScores, args=(cmd_queue, self.ogSet.seqsInfo, self.ogSet.fileInfo, nThreads, i)) for i_ in xrange(nThreads)]
for proc in runningProcesses:
proc.start()
for proc in runningProcesses:
while proc.is_alive():
proc.join()

def NumberOfSequences(self, species):
ids = self.ogSet.SequenceDict()
Expand All @@ -269,25 +272,27 @@ def GetOGMatrices(self):
ogMatrices contains matrix M for each OG where:
Mij = 0.5*max(Bij, Bmin_i)/Bmax_i
"""
ogs = self.ogSet.OGs()
ogsPerSpecies = [[[(g, i) for i, g in enumerate(og) if g.iSp == iSp] for iSp in self.species] for og in ogs]
nGenes = [len(og) for og in ogs]
nSeqs = self.NumberOfSequences(self.species)
ogMatrices = [np.zeros((n, n)) for n in nGenes]
for iiSp, sp1 in enumerate(self.species):
util.PrintTime("Processing species %d" % sp1)
Bs = [matrices.LoadMatrix("Bit", self.ogSet.fileInfo, iiSp, jjSp) for jjSp in xrange(len(self.species))]
mins = np.ones((nSeqs[sp1], 1), dtype=np.float64)*9e99
maxes = np.zeros((nSeqs[sp1], 1), dtype=np.float64)
for B, sp2 in zip(Bs, self.species):
mins = np.minimum(mins, lil_min(B))
maxes = np.maximum(maxes, lil_max(B))
for jjSp, B in enumerate(Bs):
for og, m in zip(ogsPerSpecies, ogMatrices):
for gi, i in og[iiSp]:
for gj, j in og[jjSp]:
m[i, j] = 0.5*max(B[gi.iSeq, gj.iSeq], mins[gi.iSeq]) / maxes[gi.iSeq]
return ogs, ogMatrices
with warnings.catch_warnings():
warnings.simplefilter("ignore")
ogs = self.ogSet.OGs()
ogsPerSpecies = [[[(g, i) for i, g in enumerate(og) if g.iSp == iSp] for iSp in self.species] for og in ogs]
nGenes = [len(og) for og in ogs]
nSeqs = self.NumberOfSequences(self.species)
ogMatrices = [np.zeros((n, n)) for n in nGenes]
for iiSp, sp1 in enumerate(self.species):
util.PrintTime("Processing species %d" % sp1)
Bs = [matrices.LoadMatrix("Bit", self.ogSet.fileInfo, iiSp, jjSp) for jjSp in xrange(len(self.species))]
mins = np.ones((nSeqs[sp1], 1), dtype=np.float64)*9e99
maxes = np.zeros((nSeqs[sp1], 1), dtype=np.float64)
for B, sp2 in zip(Bs, self.species):
mins = np.minimum(mins, lil_min(B))
maxes = np.maximum(maxes, lil_max(B))
for jjSp, B in enumerate(Bs):
for og, m in zip(ogsPerSpecies, ogMatrices):
for gi, i in og[iiSp]:
for gj, j in og[jjSp]:
m[i, j] = 0.5*max(B[gi.iSeq, gj.iSeq], mins[gi.iSeq]) / maxes[gi.iSeq]
return ogs, ogMatrices

def DeleteBlastMatrices(self):
for f in glob.glob(self.ogSet.fileInfo.outputDir + "Bit*_*.pic"):
Expand Down
4 changes: 2 additions & 2 deletions orthofinder/scripts/orthologues_from_recon_trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def one_to_one_efficient(orthodict, genenumbers, speciesLabels, iSpecies, output
jSpLabel,jGene = map(int,Gene.split('_'))
jSp = speciesLabelsReverse[jSpLabel]
if iSpecies > jSp:
matrixlist[jSp][iGene, jGene] = 1
matrixlist[jSp][iGene, jGene] = 1
for j, m in enumerate(matrixlist):
with open(outputDir + 'ortholog_%d_%d_matrix.pic' % (iSpecies, j), 'wb') as file:
pic.dump(m, file)
Expand Down Expand Up @@ -128,7 +128,7 @@ def WriteOrthologues(resultsDir, spec1, spec2, orthologues, ogSet):
def GetOrthologues(orig_matrix, orig_matrix_csc, index):
orthologues = orig_matrix.getrowview(index).nonzero()[1]
index = orthologues[0]
originalSpeciesGenes = orig_matrix_csc.getcol(index).nonzero()[0]
originalSpeciesGenes = orig_matrix_csc.getcol(index).nonzero()[0] # SparseEfficiencyWarning: changing the sparsity structure of a csc_matrix is expensive. lil_matrix is more efficient
return (originalSpeciesGenes, orthologues)

#Takes in the output of multiply, finds all of the orthology relationships which it writes to textfiles and returns the number of each type of orthology relationship.
Expand Down

0 comments on commit 6e29cf4

Please sign in to comment.