%matplotlib inline
%load_ext autoreload
%autoreload 2
import scanpy as sc
import random
from unicoord import scu
from unicoord.visualization import draw_loss_curves
import torch
from line_profiler import LineProfiler
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
# sc.settings.set_figure_params(dpi=80, facecolor='white')
sc.settings.set_figure_params(vector_friendly=False)
adata = sc.read_h5ad(r"D:\hECA\Lung_cancer.pp.h5ad")
adata
adata = adata.raw.to_adata()
sc.pp.normalize_total(adata, target_sum=1e4 ,exclude_highly_expressed= True)
sc.pp.log1p(adata)
adata
adata = adata[adata.obs['Cell_type.refined']!='NA',:].copy()
adata
scu.model_unicoord_in_adata(adata, n_diff=0, n_clus=[], n_cont=5,
obs_fitting=['Cell_type.refined', 'Cell_subtype','Sample'])
scu.train_unicoord_in_adata(adata, epochs=10, slot = 'cur', chunk_size=20000)
fig = draw_loss_curves(adata.uns['unc_stuffs']['trainer'].losses)
# if save_figs:
# fig.savefig(os.path.join(savePath, 'img', 'fig1_lossCurves.png'))
fig.show()
scu.embed_unicoord_in_adata(adata, only_sup=True)
adata.obsm['unicoord'].shape
sc.pp.neighbors(adata, use_rep='unicoord', )
sc.tl.leiden(adata, resolution=0.5)
sc.tl.umap(adata)
sc.pl.embedding(adata, 'X_umap',legend_loc='on data', legend_fontsize=10,
color=['Cell_type.refined', 'Cell_subtype','Sample'], ncols=2)
bdata = adata[~adata.obs.unc_training,:].copy()
bdata
scu.predcit_unicoord_in_adata(bdata, adata)
sc.pl.embedding(bdata, 'X_umap',legend_loc='on data', legend_fontsize=10,
color= ['Cell_type.refined','Cell_type.refined_unc_infered',
'Cell_subtype','Cell_subtype_unc_infered',
'Sample','Sample_unc_infered'], ncols=2)
cdata = sc.read_h5ad(r'D:\hECA\Liver_cancer.pp.h5ad')
sc.pl.embedding(cdata, 'X_umap',legend_loc='on data', legend_fontsize=10,
color=['leiden','Type','S_ID','Sample'], ncols=2)
cdata = cdata.raw.to_adata()
sc.pp.normalize_total(cdata, target_sum=1e4 ,exclude_highly_expressed= True)
sc.pp.log1p(cdata)
cdata
scu.predcit_unicoord_in_adata(cdata, adata, chunk_size=20000)
sc.pl.embedding(cdata, 'X_umap',legend_loc='on data', legend_fontsize=10,
color= ['Type','Cell_type.refined_unc_infered',
'S_ID','Cell_subtype_unc_infered',
'Sample','Sample_unc_infered'], ncols=2)
cdata.obs.groupby(['Type', 'Cell_type.refined_unc_infered']).size().unstack()
cdata = sc.read_h5ad(r'D:\hECA\Lung.Adult.pp.h5ad')
sc.pl.embedding(cdata, 'X_umap',legend_loc='on data', legend_fontsize=10,
color=['leiden','cell_type','study_id','tissue_type'], ncols=2)
cdata = cdata.raw.to_adata()
sc.pp.normalize_total(cdata, target_sum=1e4 ,exclude_highly_expressed= True)
sc.pp.log1p(cdata)
cdata
scu.predcit_unicoord_in_adata(cdata, adata)
sc.pl.embedding(cdata, 'X_umap',legend_loc='on data', legend_fontsize=10,
color= ['cell_type','Cell_type.refined_unc_infered',
'study_id','Cell_subtype_unc_infered',
'tissue_type','Sample_unc_infered'], ncols=2)
cdata.obs.groupby(['cell_type', 'Cell_type.refined_unc_infered']).size().unstack()
cdata.obs['Cell_type.refined_unc_infered'].value_counts()
ct_mapping = {"Type I alveolar cell" : "Epithelial cells",
"Type I alveolar cell/Type II alveolar cell" : "Epithelial cells",
"Type II alveolar cell" : "Epithelial cells",
"Club cell" : "Epithelial cells",
"Ciliated columnar cell" : "Epithelial cells",
"Perineural epithelial cell" : "Epithelial cells",
"Epithelial cell" : "Epithelial cells",
"Lymphatic endothelial cell" : "Endothelial cells",
"Vascular endothelial cell" : "Endothelial cells",
"Endothelial cell" : "Endothelial cells",
"Fibrocyte" : "Fibroblasts",
"Smooth muscle cell" : "Fibroblasts",
"Dendritic cell" : "Myeloid cells",
"Macrophage" : "Myeloid cells",
"Monocyte" : "Myeloid cells",
"Neutrophilic granulocyte" : "Myeloid cells",
"Myeloid cell" : "Myeloid cells",
"Mast cell" : "MAST cells",
"NK cell" : "T/NK cells",
"T cell" : "T/NK cells",
"CD8 T cell" : "T/NK cells",
"B cell" : "B lymphocytes",
"Plasma B cell" : "B lymphocytes",
"Chondrocyte" : "rare types",
"Megakaryocyte" : "rare types"}
ct1 = cdata.obs['Cell_type.refined_unc_infered']
ct2 = [ct_mapping[c] if c in ct_mapping else "rare types" for c in cdata.obs['cell_type']]
from sklearn.metrics import accuracy_score
accuracy_score(ct2, ct1)
import seaborn as sns
confusion_mtx = pd.read_csv('./table.csv', index_col=0)
hcl = sns.heatmap(confusion_mtx)
cdata.obs.groupby(['cell_type', 'Cell_subtype_unc_infered']).size().unstack().to_csv('./table.csv')
cdata.obs.cell_type.value_counts()