%matplotlib inline
%load_ext autoreload
%autoreload 2
%load_ext line_profiler
import scanpy as sc
import random
import src.scanpy_unicoord as scu
import torch
from src.visualization import *
from line_profiler import LineProfiler
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
# sc.settings.set_figure_params(dpi=80, facecolor='white')
sc.settings.set_figure_params(vector_friendly=False)
adata = sc.read_h5ad(r'D:\hECA\Liver_cancer.pp.h5ad')
adata = adata.raw.to_adata()
sc.pp.normalize_total(adata, target_sum=1e4 ,exclude_highly_expressed= True)
sc.pp.log1p(adata)
adata
scu.model_unicoord_in_adata(adata, n_cont=50, n_diff=0, n_clus = [],
obs_fitting=['Type'])
scu.train_unicoord_in_adata(adata, epochs=10, chunk_size=20000, slot = "cur")
fig = draw_loss_curves(adata.uns['unc_stuffs']['trainer'].losses)
# if save_figs:
# fig.savefig(os.path.join(savePath, 'img', 'fig1_lossCurves.png'))
fig.show()
scu.embed_unicoord_in_adata(adata, chunk_size=5000)
sc.pp.neighbors(adata, use_rep='unicoord')
sc.tl.leiden(adata, resolution=0.5)
sc.tl.umap(adata)
sc.pl.embedding(adata, 'X_umap',legend_loc='on data', legend_fontsize=10,
color=['leiden','Type','S_ID','Sample'], ncols=2)
scu.predcit_unicoord_in_adata(adata)
ct1 = adata.obs.Type
ct2 = adata.obs.Type_unc_infered
ct1 = ct1[adata.obs.Type != 'unclassified']
ct2 = ct2[adata.obs.Type != 'unclassified']
accuracy_score(ct1, ct2)
bdata = adata[~adata.obs.unc_training,:].copy()
bdata
scu.predcit_unicoord_in_adata(bdata, adata)
sc.pl.embedding(bdata, 'X_umap',legend_loc='on data', legend_fontsize=10,
color= ['Type', 'Type_unc_infered'], ncols=2)
from sklearn.metrics import accuracy_score
ct1 = bdata.obs.Type
ct2 = bdata.obs.Type_unc_infered
ct1 = ct1[bdata.obs.Type != 'unclassified']
ct2 = ct2[bdata.obs.Type != 'unclassified']
accuracy_score(ct1, ct2)
bdata.obs.groupby(['Type','Type_unc_infered']).size().unstack()
import itertools
cells = list(itertools.chain(*[random.sample(list(adata.obs_names[adata.obs.Type==ct]), 1000) \
for ct in adata.obs.Type.value_counts().index if ct!='unclassified']))
bdata = adata[cells,:].copy()
bdata
scu.predcit_unicoord_in_adata(bdata, adata)
sc.pl.embedding(bdata, 'X_umap',legend_loc='on data', legend_fontsize=10,
color= ['Type', 'Type_unc_infered'], ncols=2)
cdata = scu.generate_unicoord_in_adata(bdata, adata)
sc.pp.normalize_total(cdata)
sc.pp.log1p(cdata)
sc.pp.highly_variable_genes(cdata)
cdata.raw = cdata
cdata = cdata[:, cdata.var.highly_variable]
cdata
sc.pp.scale(cdata)
sc.tl.pca(cdata)
sc.pp.neighbors(cdata)
sc.tl.leiden(cdata)
sc.tl.umap(cdata)
sc.pl.umap(cdata, color='Type')
cdata = cdata.raw.to_adata()
scu.predcit_unicoord_in_adata(cdata,adata)
sc.pl.umap(cdata, color=['Type','Type_unc_infered', 'CD3E','COL1A1'], ncols = 2)
cdata = scu.generate_unicoord_in_adata(bdata, adata,
set_value={'Type':'T cells'})
sc.pp.normalize_total(cdata)
sc.pp.log1p(cdata)
sc.pp.highly_variable_genes(cdata)
cdata.raw = cdata
cdata = cdata[:, cdata.var.highly_variable]
cdata
sc.pp.scale(cdata)
sc.tl.pca(cdata)
sc.pp.neighbors(cdata)
sc.tl.leiden(cdata)
sc.tl.umap(cdata)
sc.pl.umap(cdata, color='Type')
cdata = cdata.raw.to_adata()
scu.predcit_unicoord_in_adata(cdata,adata)
sc.pl.umap(cdata, color=['Type','Type_unc_infered', 'CD3E','COL1A1'], ncols = 2)
bdata = adata[adata.obs.Type == 'T cells',:][:6000,:].copy()
bdata
cdata = scu.generate_unicoord_in_adata(bdata, adata)
sc.pp.normalize_total(cdata)
sc.pp.log1p(cdata)
sc.pp.highly_variable_genes(cdata)
cdata.raw = cdata
cdata = cdata[:, cdata.var.highly_variable]
cdata
sc.pp.scale(cdata)
sc.tl.pca(cdata)
sc.pp.neighbors(cdata)
sc.tl.leiden(cdata)
sc.tl.umap(cdata)
sc.pl.umap(cdata, color='Type')
cdata = cdata.raw.to_adata()
scu.predcit_unicoord_in_adata(cdata,adata)
sc.pl.umap(cdata, color=['Type','Type_unc_infered', 'CD3E','COL1A1'], ncols = 2)
bdata = adata[cells,:].copy()
bdata
cdata = scu.generate_unicoord_in_adata(bdata, adata,
set_value={'Type':'CAFs'})
sc.pp.normalize_total(cdata)
sc.pp.log1p(cdata)
sc.pp.highly_variable_genes(cdata)
cdata.raw = cdata
cdata = cdata[:, cdata.var.highly_variable]
cdata
sc.pp.scale(cdata)
sc.tl.pca(cdata)
sc.pp.neighbors(cdata)
sc.tl.leiden(cdata)
sc.tl.umap(cdata)
sc.pl.umap(cdata, color='Type')
cdata = cdata.raw.to_adata()
scu.predcit_unicoord_in_adata(cdata,adata)
sc.pl.umap(cdata, color=['Type','Type_unc_infered', 'CD3E', 'COL1A1'], ncols = 2)
cdata = scu.generate_unicoord_in_adata(bdata,adata)
cdata
ddata = cdata.concatenate(bdata, index_unique=None)
ddata
sc.pp.normalize_total(ddata)
sc.pp.log1p(ddata)
sc.pp.highly_variable_genes(ddata)
ddata.raw = ddata
ddata = ddata[:, ddata.var.highly_variable]
ddata
sc.pp.scale(ddata)
sc.tl.pca(ddata)
sc.pp.neighbors(ddata)
sc.tl.leiden(ddata)
sc.tl.umap(ddata)
sc.pl.umap(ddata, color=['Type','batch', 'CD3E', 'COL1A1'],
legend_loc='on data', ncols=2)
cdata = scu.generate_unicoord_in_adata(bdata,adata, set_value={'Type':'T cells'})
cdata
ddata = cdata.concatenate(bdata, index_unique=None)
ddata
sc.pp.normalize_total(ddata)
sc.pp.log1p(ddata)
sc.pp.highly_variable_genes(ddata)
ddata.raw = ddata
ddata = ddata[:, ddata.var.highly_variable]
ddata
sc.pp.scale(ddata)
sc.tl.pca(ddata)
sc.pp.neighbors(ddata)
sc.tl.leiden(ddata)
sc.tl.umap(ddata)
sc.pl.umap(ddata, color=['Type','batch', 'CD3E', 'COL1A1'],
legend_loc='on data', ncols=2)
cdata = scu.generate_unicoord_in_adata(bdata,adata, set_value={'Type':'CAFs'})
cdata
ddata = cdata.concatenate(bdata, index_unique=None)
ddata
sc.pp.normalize_total(ddata)
sc.pp.log1p(ddata)
sc.pp.highly_variable_genes(ddata)
ddata.raw = ddata
ddata = ddata[:, ddata.var.highly_variable]
ddata
sc.pp.scale(ddata)
sc.tl.pca(ddata)
sc.pp.neighbors(ddata)
sc.tl.leiden(ddata)
sc.tl.umap(ddata)
sc.pl.umap(ddata, color=['Type','batch', 'CD3E', 'COL1A1'],
legend_loc='on data', ncols=2)
cdata = scu.generate_unicoord_in_adata(bdata,adata, set_value={'Type':'T cells'})
cdata
cdata1 = scu.generate_unicoord_in_adata(bdata,adata)
cdata1
ddata = cdata.concatenate(cdata1)
ddata
sc.pp.normalize_total(ddata)
sc.pp.log1p(ddata)
sc.pp.highly_variable_genes(ddata)
ddata.raw = ddata
ddata = ddata[:, ddata.var.highly_variable]
ddata
sc.pp.scale(ddata)
sc.tl.pca(ddata)
sc.pp.neighbors(ddata)
sc.tl.leiden(ddata)
sc.tl.umap(ddata)
sc.pl.umap(ddata, color=['Type','batch', 'CD3E', 'COL1A1'],
legend_loc='on data', ncols=2)
cdata = scu.generate_unicoord_in_adata(bdata,adata, set_value={'Type':'CAFs'})
cdata
cdata1 = scu.generate_unicoord_in_adata(bdata,adata)
cdata1
ddata = cdata.concatenate(cdata1)
ddata
sc.pp.normalize_total(ddata)
sc.pp.log1p(ddata)
sc.pp.highly_variable_genes(ddata)
ddata.raw = ddata
ddata = ddata[:, ddata.var.highly_variable]
ddata
sc.pp.scale(ddata)
sc.tl.pca(ddata)
sc.pp.neighbors(ddata)
sc.tl.leiden(ddata)
sc.tl.umap(ddata)
sc.pl.umap(ddata, color=['Type','batch', 'CD3E', 'COL1A1'],
legend_loc='on data', ncols=2)