Skip to content

Commit

Permalink
Update incomplete/buggy pc
Browse files Browse the repository at this point in the history
  • Loading branch information
CanYing0913 committed Jun 16, 2023
1 parent 178d693 commit c32003f
Show file tree
Hide file tree
Showing 5 changed files with 242 additions and 52 deletions.
2 changes: 1 addition & 1 deletion main_GUI.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def handle_events(pipe_obj, window, settings):
if do_s0 or do_s1 or do_s2:
window['-META-FIN-SELECT-'].FileTypes = [('TIFF', '*.tif'), ]
elif do_s3:
window['-META-FIN-SELECT-'].FileTypes = [('caiman obj', '*.cmnobj'), ('HDF5 file', '*.hdf5'), ]
window['-META-FIN-SELECT-'].FileTypes = [('caiman obj', '*'), ('HDF5 file', '*.hdf5'), ]

elif event == '-META-start-':
status, msg = pipe_obj.ready()
Expand Down
7 changes: 5 additions & 2 deletions readme.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
## In this project, we provide several ways for you to run the pipeline:
# CalciumZero: A user-friendly prep processing pipeline for fluorescence calcium imaging
- Features
- [Run](#running-the-pipeline-)
## Running the pipeline:
- You can use our pre-built GUI application. Download based on your platform [here](#running-locally-throught-our-distribution).
- If you encounter problems for our GUI application, you can refer here for instructions to export our pipeline to application from source.
- You can directly run it on Google Colab. See [Colab Instructions](#part-i-running-on-colab) to run it within Colab.
- You can run it using our Docker image.
- **[Discouraged]** You can manually install all the dependencies to manually run it and further develop on it. See [Instructions on local](#part-iii-running-locally) for a detailed explanation.
## Running locally throught our distribution
## Running locally through our distribution
If you just want to interact and use this work, the **best** way is to launch our provided application package across platorm.

| System | platform | download link |
Expand Down
Binary file added src/finalized_model.sav
Binary file not shown.
238 changes: 206 additions & 32 deletions src/src_peak_caller.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
"""
Source file for Section 4 - Peak Caller
Last edited on Jan 7, 2023
Last edited on Jun 14, 2023
Author: Xuchen Wang (xw2747@columbia.edu), Yian Wang (canying0913@gmail.com)
Copyright Yian Wang - 2022
For all inquiry, please contact Xuchen Wang.
Copyright Yian Wang - 2023
"""
import numpy as np
from numpy.linalg import matrix_power
import h5py
from matplotlib.pyplot import figure
import pickle
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from numpy import linalg as LA
from numpy import mean, absolute
from numpy.linalg import matrix_power
from scipy.signal import find_peaks


# The only functions a user would call(Current Version) are
Expand All @@ -25,24 +29,20 @@
# Synchronization
# Correlation


def mad(data, axis=None):
return mean(absolute(data - mean(data, axis)), axis)


def flatten(lst):
return [item for sublist in lst for item in sublist]


class PeakCaller:
def __init__(self, seq, filename):
"""
Constructor for PeakCaller object.
Parameters:
seq: input 2D data sequence, should be [92*Length of image sequence]
filename: previously hdf5 filename, now it is the output directory
"""
self.num_peak_rec = 0
self.seq = seq
if '.hdf5' in filename:
self.filename = filename[:-5] # Retrieve input path
else:
self.filename = filename # Directly set path
self.filename = Path(filename).with_suffix('')
self.obs_num = len(seq)
self.length = len(seq[0])
self.smoothed_seq = np.zeros(self.obs_num * self.length).reshape(self.obs_num, self.length)
Expand All @@ -68,9 +68,13 @@ def __init__(self, seq, filename):
self.peak_fall_time = np.zeros(self.obs_num * self.length).reshape(self.obs_num, self.length)
self.non_peak_std = [0 for _ in range(self.obs_num)]
self.series_std = [0 for _ in range(self.obs_num)]
self.series_mad = [0 for _ in range(self.obs_num)]
self.series_rel_std = [0 for _ in range(self.obs_num)]
self.series_rel_std_sorted = [[0, i] for i in range(self.obs_num)]
self.matrix_smoother = np.ones((self.length, self.length)) / self.length
self.candidate_mean_prominence = [0 for _ in range(self.obs_num)]
self.peak_mean_prominence = [0 for _ in range(self.obs_num)]
self.peak_std_prominence = [0 for _ in range(self.obs_num)]
self.TrendSmoothness = 25

def Detrender(self, mark=0, s=60):
Expand All @@ -94,6 +98,102 @@ def Detrender(self, mark=0, s=60):

def Detrender_2(self):
self.detrended_seq = self.seq
for j in range(self.obs_num):
self.series_std[j] = np.std(self.detrended_seq[j])
self.series_mad[j] = mad(self.detrended_seq[j])
self.series_rel_std[j] = self.series_std[j] / (
np.max(self.detrended_seq[j]) - np.min(self.detrended_seq[j]))
self.series_rel_std_sorted[j][0] = self.series_rel_std[j]
self.series_rel_std_sorted.sort()

def Find_Peak_2(self, lookafter=25, lookbefore=25, rise=16.0, fall=16.0):
rise_ratio = (rise - 1) / 100
fall_ratio = (fall - 1) / 100
candidate = [[] for _ in range(self.obs_num)]
pks = [[] for _ in range(self.obs_num)]
for i in range(self.obs_num):
candidate[i], properties = find_peaks(data[i], prominence=(30))
self.candidate_mean_prominence[i] = np.mean(properties['prominences'])
peak_prominence_lst = []
required_rise = rise_ratio
required_fall = fall_ratio
prior_peak = 0
Range = (np.max(self.detrended_seq[i]) - np.min(self.detrended_seq[i]))
for j in range(len(candidate[i])):
k = candidate[i][j]
if k - lookbefore < prior_peak:
continue
# lookbackindex=max(prior_peak,k-lookbefore)
dropit = 0
lookbackindex = max(0, k - lookbefore)
minbefore = min(self.detrended_seq[i][lookbackindex:k + 1])
min_bf_index = np.argmin(self.detrended_seq[i][lookbackindex:k + 1]) + lookbackindex
if minbefore < self.detrended_seq[i][k] - Range * required_rise:
lookaheadthresh = min(self.length - 1, k + lookafter)
lookaheadindex = lookaheadthresh
for afterindex in range(k + 1, lookaheadthresh + 1):
if self.detrended_seq[i][k] < self.detrended_seq[i][afterindex]:
lookaheadindex = afterindex
dropit = 1
break
if dropit == 1:
continue
minafter = min(self.detrended_seq[i][k:lookaheadindex + 1])
min_af_index = np.argmin(self.detrended_seq[i][k:lookaheadindex + 1]) + k
if minafter < self.detrended_seq[i][k] - Range * required_rise:
peak_prominence_lst.append(properties['prominences'][j])
self.peak_loc[i][k] = 1
self.peak_start[i][k] = min_bf_index
self.peak_half_start[i][k] = np.where(
self.detrended_seq[i][min_bf_index:k + 1] <= (minbefore + self.detrended_seq[i][k]) / 2)[0][
-1] + min_bf_index
self.peak_end[i][k] = min_af_index
self.peak_half_end[i][k] = np.where(
self.detrended_seq[i][k:min_af_index + 1] <= (minafter + self.detrended_seq[i][k]) / 2)[0][
0] + k
self.peak_rise_time[i][k] = k - self.peak_half_start[i][k]
self.peak_fall_time[i][k] = self.peak_half_end[i][k] - k
height = (2 * self.detrended_seq[i][k] - minbefore - minafter) / 2
# height=max(self.detrended_seq[i][k]-minbefore,self.detrended_seq[i][k]-minafter)
pks[i].append(height)
self.peak_height[i][k] = height
prior_peak = k
next_peak = self.length - 1
self.peak_height_std[i] = np.std(np.array(pks[i]))
self.peak_height_mean[i] = np.mean(np.array(pks[i]))
self.peak_mean_prominence[i] = np.mean(peak_prominence_lst)
self.peak_std_prominence[i] = np.std(peak_prominence_lst)
continue
for k in reversed(candidate[i]):
lookafterindex = min(next_peak, k + lookafter)
minafter = min(self.detrended_seq[i][k:lookafterindex + 1])
if minafter < (1 - required_rise) * self.detrended_seq[i][k]:
next_peak = k
else:
self.peak_loc[i][k] = 0
for num in range(self.obs_num):
loc = np.where(self.peak_height[num] > 0)[0]
# loc=np.where((self.peak_height[num]>self.peak_height_mean[num]+3*self.peak_height_std[num]))[0]
self.filterer_peak_loc[num][loc] = 1
self.filterer_peak_half_start[num][self.peak_half_start[num][loc].astype(int)] = 1
self.filterer_peak_half_end[num][self.peak_half_end[num][loc].astype(int)] = 1
self.filterer_peak_loc_2[num] = loc
heights = self.peak_height[num][loc]
rise_times = self.peak_rise_time[num][loc]
fall_times = self.peak_fall_time[num][loc]
self.filterer_peak_height_mean[num] = np.mean(heights)
self.filterer_peak_height[num] = list(heights)
self.filterer_peak_rise_time[num] = list(rise_times)
self.filterer_peak_fall_time[num] = list(fall_times)
for num in range(self.obs_num):
index_lst = [1 for _ in range(self.obs_num)]
for ind in self.filterer_peak_loc[num]:
if ind == 1:
for i in range(int(self.peak_start[num][i]), int(self.peak_end[num][i] + 1)):
index_lst[i] = 0
real_index = np.where(np.array(index_lst) == 1)
other_points = self.detrended_seq[num][real_index]
self.non_peak_std[num] = np.std(other_points)

def Find_Peak(self, lookafter=25, lookbefore=25, rise=16.0, fall=16.0):
rise_ratio = (rise - 1) / 100
Expand Down Expand Up @@ -183,11 +283,13 @@ def Find_Peak(self, lookafter=25, lookbefore=25, rise=16.0, fall=16.0):
real_index = np.where(np.array(index_lst) == 1)
other_points = self.detrended_seq[num][real_index]
self.non_peak_std[num] = np.std(other_points)
self.num_peak_rec = [len(self.filterer_peak_loc_2[i]) for i in range(self.obs_num)]

def Print_Peak(self, num):
main_data = self.detrended_seq[num]
# loc=np.where((self.peak_height[num]>self.peak_height_mean[num]+3*self.peak_height_std[num]))[0]
loc = np.where(self.peak_height[num] > (max(self.detrended_seq[num]) - min(self.detrended_seq[num])) / 3)[0]
# loc=np.where(self.peak_height[num]>0)[0]
# print(loc)
highlight = [loc, main_data[loc]]
plt.plot(main_data)
Expand All @@ -209,18 +311,27 @@ def Find_Peak_Bood(self, thresh=0.15):

def Print_ALL_Peaks(self):
path = self.filename + "_All_Peaks"
fig, axs = plt.subplots(self.obs_num, figsize=(15, 4 * self.obs_num))
fig.tight_layout()
for j in range(self.obs_num):
main_data = self.detrended_seq[j]
# loc=np.where((self.peak_height[j]>self.peak_height_mean[j]+2*self.peak_height_std[j]))[0]
loc = np.where(self.peak_height[j] > (max(self.detrended_seq[j]) - min(self.detrended_seq[j])) / 3)[0]
highlight = [loc, main_data[loc]]
axs[j].plot(main_data)
axs[j].scatter(*highlight, marker='v', color='r')
fig.savefig(path + "_All_Peaks")
fig.clf()
plt.close()
for i in range(self.obs_num // 100 + 1):
num_left = min(self.obs_num - 100 * i, 100)
if num_left <= 0:
break
with plt.rc_context({'xtick.color': 'yellow', 'ytick.color': 'yellow'}):
fig, axs = plt.subplots(num_left, figsize=(15, 4 * num_left))
fig.tight_layout()
for j in range(100 * i, 100 * i + num_left):
main_data = self.detrended_seq[j]
# loc=np.where((self.peak_height[j]>self.peak_height_mean[j]+2*self.peak_height_std[j]))[0]
loc = np.where(self.peak_height[j] > (max(self.detrended_seq[j]) - min(self.detrended_seq[j])) / 3)[
0]
# loc=np.where(self.peak_height[j]>0)[0]
highlight = [loc, main_data[loc]]
axs[j % 100].plot(main_data)
axs[j % 100].set_xlabel('Time')
axs[j % 100].set_ylabel('Intensity')
axs[j % 100].scatter(*highlight, marker='v', color='r')
fig.savefig(path + "_All_Peaks_" + str(i))
fig.clf()
plt.close()

def Raster_Plot(self):
path = self.filename + "_Raster_Plot"
Expand All @@ -234,6 +345,7 @@ def Raster_Plot(self):
plt.scatter(x, y, color=(0, 0.8, 0))
plt.xlabel('Time(s)')
plt.ylabel('ROI(#)')
plt.show()
plt.savefig(path)
plt.close()

Expand All @@ -243,6 +355,7 @@ def Histogram_Height(self):
plt.hist(combined, bins=10, edgecolor='black', color=(0.6, 0.6, 0.75))
plt.xlabel('height of peak')
plt.ylabel('number of events')
# plt.show()
plt.savefig(path)
plt.close()

Expand All @@ -255,6 +368,7 @@ def Histogram_Time(self):
plt.legend(prop={'size': 10})
plt.xlabel('time (s)')
plt.ylabel('number of events')
# plt.show()
plt.savefig(path)
plt.close()

Expand Down Expand Up @@ -312,15 +426,25 @@ def Save_Result(self):
series_data.to_csv(path2, index=False)
temp = np.array([max(a, 0) for a in list(series_data['Number_of_Peaks'] - 1)])
val = np.dot(temp, np.array(series_data['Mean_InterEvent_Interval'])) / np.sum(temp)
interlev_lst = []
base1 = np.array(series_data['Mean_InterEvent_Interval'])
for i in range(len(temp)):
interlev_lst += [base1[i]] * int(temp[i])
details = {
'Mean_Number_of_Signal_Events': [np.mean(series_data['Number_of_Peaks'])],
'Standard_Deviation_of_the_Number_of_Signal_Events': [np.std(series_data['Number_of_Peaks'])],
'Mean_Height_All': [np.mean(flatten(self.filterer_peak_height))],
'Std_Height_All': [np.std(flatten(self.filterer_peak_height))],
'Mean_Rise_Time_All': [np.mean(flatten(self.filterer_peak_rise_time))],
'Std_Rise_Time_All': [np.std(flatten(self.filterer_peak_rise_time))],
'Mean_Fall_Time_All': [np.mean(flatten(self.filterer_peak_fall_time))],
'Std_Fall_Time_All': [np.std(flatten(self.filterer_peak_fall_time))],
'Mean_Total_Time_All': [
np.mean(flatten(self.filterer_peak_rise_time)) + np.mean(flatten(self.filterer_peak_fall_time))],
'Std_Total_Time_All': [
np.std(np.add(flatten(self.filterer_peak_rise_time), flatten(self.filterer_peak_fall_time)))],
'Mean_InterEvent_Interval_All': [val],
'Std_InterEvent_Interval_All': [np.std(interlev_lst)],
'Mean_Frequency_All': [1 / val],
}
summary_data = pd.DataFrame(details)
Expand Down Expand Up @@ -354,6 +478,7 @@ def Synchronization(self, cluster=False):
if not cluster:
ax = sns.heatmap(SI, cmap='jet', linecolor='black')
ax.invert_yaxis()
# plt.show()
plt.savefig(path + "No_Cluster")
plt.close()
np.savetxt(path + "No_Cluster.csv", SI, delimiter=",")
Expand All @@ -376,6 +501,7 @@ def Synchronization(self, cluster=False):
SI = SI[idx].T[idx].T
ax = sns.heatmap(SI, cmap='jet', linecolor='black')
ax.invert_yaxis()
# plt.show()
plt.savefig(path + "With_Cluster")
np.savetxt(path + "With_Cluster.csv", SI, delimiter=",")
plt.close()
Expand Down Expand Up @@ -403,13 +529,13 @@ def Correlation(self):
max_cor = max(cor, max_cor)
heat_mat[i][j] = max_cor
heat_mat[j][i] = max_cor
ax = sns.heatmap(heat_mat, cmap='jet', linecolor='black')
ax = sns.heatmap(heat_mat, cmap='jet', linecolor='black', vmin=-1, vmax=1)
ax.invert_yaxis()
# plt.show()
plt.savefig(path)
np.savetxt(path + ".csv", heat_mat, delimiter=",")
plt.close()

# Not used
def Print_Peak_Good(self, thresh=0.15):
total = 0
for item in self.series_rel_std_sorted:
Expand All @@ -424,7 +550,6 @@ def Print_Peak_Good(self, thresh=0.15):
j += 1
fig.savefig("/content/drive/MyDrive/Good_Ones")

# Not used
def Print_Peak_Bad(self, thresh=0.15):
total = 0
for item in self.series_rel_std_sorted:
Expand All @@ -438,3 +563,52 @@ def Print_Peak_Bad(self, thresh=0.15):
axs[j].plot(self.detrended_seq[item[1]])
j += 1
fig.savefig("/content/drive/MyDrive/Bad_Ones")

def Filter_Series(self, model_root):
clf = pickle.load(open(model_root, 'rb'))
ratio = np.array(self.filterer_peak_height_mean) / np.array(self.non_peak_std)
ratio[np.isnan(ratio)] = 1
rel_std = self.series_rel_std
X = np.array([[0.0] for _ in range(self.obs_num)])
Z = np.array([0.0 for _ in range(self.obs_num)])
for i in range(len(X)):
X[i][0] = ratio[i]
Z[i] = rel_std[i]
W = np.array(self.series_mad)
details = {
'ROI(#)': [],
'Number_of_Peaks': [],
'Mean_Height': [],
'Mean_Rise_Time': [],
'Mean_Fall_Time': [],
'Mean_Total_Time': [],
'Mean_InterEvent_Interval': [],
'Mean_Frequency': [],
}
series_data = pd.DataFrame(details)
for i in range(len(self.filterer_peak_height)):
interv = 0
freq = 0
if len(self.filterer_peak_height[i]) >= 2:
interv = (self.filterer_peak_loc_2[i][-1] - self.filterer_peak_loc_2[i][0]) / (
len(self.filterer_peak_height[i]) - 1)
freq = 1 / interv
series_data.loc[len(series_data.index)] = [i, len(self.filterer_peak_loc_2[i]),
np.mean(self.filterer_peak_height[i]),
np.mean(self.filterer_peak_rise_time[i]),
np.mean(self.filterer_peak_fall_time[i]),
np.mean(self.filterer_peak_rise_time[i]) + np.mean(
self.filterer_peak_fall_time[i]), interv, freq]
series_data = series_data.drop(columns=['ROI(#)'])
series_data['peak_mean_prominence'] = self.peak_mean_prominence
series_data['peak_height_std'] = self.peak_height_std
series_data['candidate_mean_prominence'] = self.candidate_mean_prominence
series_data['peak_std_prominence'] = self.peak_std_prominence
series_data['W'] = W
series_data['Z'] = Z
series_data['X'] = X
series_data = series_data.to_numpy()
series_data[np.isnan(series_data)] = 0
pred = clf.predict(series_data)
new_idx = np.where(pred > 0)
return self.seq[new_idx]
Loading

0 comments on commit c32003f

Please sign in to comment.