%matplotlib inline
%load_ext autoreload
%autoreload 2
%load_ext line_profiler
import scanpy as sc
from scipy.io import mmread
import random
import pandas as pd
import src.scanpy_unicoord as scu
import torch
from src.visualization import *
from line_profiler import LineProfiler
import imageio
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
# sc.settings.set_figure_params(dpi=80, facecolor='white')
sc.settings.set_figure_params(vector_friendly=False)
mtx_file = './datasets/Heart_EC/exploratory.data.allgene.mtx'
meta_file = './datasets/Heart_EC/exploratory.metadata.csv'
anno = pd.read_csv(meta_file, index_col=0)
mtx = mmread(mtx_file)
mtx
genes = pd.read_csv('./datasets/Heart_EC/gene_names.txt', index_col=1)
del genes['Unnamed: 0']
adata = sc.AnnData(X = mtx.T.tocsr(), obs = anno, var = genes)
adata
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata)
adata.raw = adata
adata = adata[:, adata.var.highly_variable]
sc.pp.scale(adata)
sc.tl.pca(adata)
sc.pp.neighbors(adata)
sc.tl.leiden(adata)
sc.tl.umap(adata)
sc.pl.umap(adata, color=['Cluster','Tissue', 'ACV','diffusion.pseudotime'],
ncols = 1)
adata = adata.raw.to_adata()
adata.write_h5ad('./datasets/Heart_EC/Heart_EC.h5ad')
adata
diff = adata.obs['diffusion.pseudotime']
diff = (diff-min(diff))/(max(diff)-min(diff))
adata.obs['diff_scale'] = diff * 20
scu.model_unicoord_in_adata(adata, n_cont=50, n_diff=0, n_clus = [],
obs_fitting=['Tissue','diff_scale'])
scu.train_unicoord_in_adata(adata, epochs=100, chunk_size=20000, slot = "cur")
fig = draw_loss_curves(adata.uns['unc_stuffs']['trainer'].losses)
# if save_figs:
# fig.savefig(os.path.join(savePath, 'img', 'fig1_lossCurves.png'))
fig.show()
bdata = adata[~adata.obs.unc_training,:].copy()
scu.predcit_unicoord_in_adata(bdata,adata)
sc.pl.umap(bdata, color=['Tissue','Tissue_unc_infered',
'diff_scale','diff_scale_unc_infered'],
ncols = 2)
bdata = scu.generate_unicoord_in_adata(adata)
bdata
bdata.obsm['X_umap'] = adata.obsm['X_umap']
sc.pl.umap(bdata, color=['Cluster','Tissue', 'ACV','diff_scale'],
ncols = 1)
# bdata = bdata.raw.to_adata()
scu.predcit_unicoord_in_adata(bdata,adata)
sc.pl.umap(bdata, color=['Tissue','Tissue_unc_infered',
'diff_scale','diff_scale_unc_infered'],
ncols = 2)
from src.utils import pairwise_distances
def find_nearest_neighbors(query, ref, genes):
gquery = query[:,genes].X
gref = ref[:,genes].X
dis = pairwise_distances(torch.FloatTensor(gquery), torch.FloatTensor(gref))
return np.array(np.argmin(dis, axis = 1))
import itertools
cells = list(itertools.chain(*[random.sample(list(adata.obs_names[adata.obs.Tissue==ct]), 500) \
for ct in adata.obs.Tissue.value_counts().index]))
def create_figs(set_value, title):
cdata = scu.generate_unicoord_in_adata(adata[cells,:].copy(),adata,
set_value=set_value)
nn = find_nearest_neighbors(cdata,bdata, bdata.var_names)
gen = pd.Series(['original']*len(bdata))
gen[np.unique(nn)] = 'generated'
gen = gen.astype('category')
gen.index = bdata.obs_names
bdata.obs['gen'] = gen
bdata.uns['gen_colors'] = ['#F8E621','#440154']
f = sc.pl.umap(bdata, color=['gen'], return_fig=True, s=20, title=import ostitle)
return f
images = [create_figs({'diff_scale':np.random.random(len(cdata))+i}, 'pst=%d'%(i))\
for i in range(20)]
images
gif = []
for f in images:
f.savefig('./tmp.png')
gif.append(imageio.imread('./tmp.png'))
os.remove('./tmp.png')
imageio.mimsave('./EC_generation_pst.gif', gif)
f = create_figs({'Tissue':'brain','diff_scale':5}, "'Tissue':'brain','diff_scale':5")
f = create_figs({}, "no manipulation")
cdata
def create_figs(set_value, title):
cdata = scu.generate_unicoord_in_adata(adata[cells,:].copy(),adata,
set_value=set_value)
nn = find_nearest_neighbors(cdata,bdata, bdata.var_names)
gen = pd.Series(['original']*len(bdata))
gen[np.unique(nn)] = 'generated'
gen = gen.astype('category')
gen.index = bdata.obs_names
bdata.obs['gen'] = gen
bdata.uns['gen_colors'] = ['#F8E621','#440154']
f = sc.pl.umap(bdata, color=['gen'], return_fig=True, s=20, title=title)
return f
images = [create_figs({'Tissue':t}, 'organ=%s'%(t))\
for t in cdata.obs.Tissue.value_counts().index]
images
gif = []
for f in images:
f.savefig('./tmp.png')
gif.append(imageio.imread('./tmp.png'))
os.remove('./tmp.png')
imageio.mimsave('./EC_generation_organs.gif', gif, duration = 0.5)
import scanpy_unicoord as scu
import scanpy as sc
adata = sc.read_h5ad('./Liver_cancer.h5ad')
scu.model_unicoord_in_adata(adata, n_cont=50, n_diff=0, n_disc, n_clus = [],
obs_fitting=['Tissue','pst','Cell_type','batch'])
scu.train_unicoord_in_adata(adata, epochs=10, chunk_size=20000, slot = "cur")
scu.embed_unicoord_in_adata(adata, only_unsup=True)
bdata = sc.read_h5ad('./Lung_cancer.h5ad')
scu.predcit_unicoord_in_adata(bdata, adata)
cdata = scu.generate_unicoord_in_adata(adata[cells,:].copy(),adata,
set_value={'Cell_type':'T cells', 'pst':5})
adata.uns['unc_stuffs'].keys()