%matplotlib inline
%load_ext autoreload
%autoreload 2
import scanpy as sc
import random
from unicoord import scu
from unicoord.visualization import draw_loss_curves
import torch
from line_profiler import LineProfiler
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
# sc.settings.set_figure_params(dpi=80, facecolor='white')
sc.settings.set_figure_params(vector_friendly=False)
adata = sc.read_h5ad(r"F:\h5ad\hECA_eachCT2000_pcGenes.h5ad")
sc.pp.normalize_total(adata, target_sum=1e4 ,exclude_highly_expressed= True)
sc.pp.log1p(adata)
adata
scu.model_unicoord_in_adata(adata, n_diff=0, n_clus=[], n_cont=20,
obs_fitting = ['seq_tech', 'organ','cell_type'])
scu.train_unicoord_in_adata(adata, epochs=2, slot = 'cur', chunk_size=20000)
fig = draw_loss_curves(adata.uns['unc_stuffs']['trainer'].losses)
# if save_figs:
# fig.savefig(os.path.join(savePath, 'img', 'fig1_lossCurves.png'))
fig.show()
scu.embed_unicoord_in_adata(adata, only_sup=False)
adata.obsm['unicoord'].shape
sc.pp.neighbors(adata, use_rep='unicoord')
sc.tl.leiden(adata, resolution=0.5)
sc.tl.umap(adata)
sc.pl.embedding(adata, 'X_umap',legend_loc='on data', legend_fontsize=10,
color=['seq_tech', 'study_id', 'tissue_type','cell_type','organ'], ncols=2)
bdata = adata[~adata.obs.unc_training,:].copy()
bdata
scu.predcit_unicoord_in_adata(bdata, adata)
sc.pl.embedding(bdata, 'X_umap',legend_loc='on data', +
color= ['seq_tech', 'seq_tech_unc_infered',
'cell_type', 'cell_type_unc_infered',
'organ','organ_infered'], ncols=2)
cdata = sc.read_h5ad(r'D:\hECA\Lung_cancer.pp.h5ad')
cdata
sc.pl.embedding(cdata, 'X_umap',legend_loc='on data', legend_fontsize=10,
color=['leiden','Sample','Cell_type.refined','Cell_subtype'], ncols=2)
cdata = cdata.raw.to_adata()
sc.pp.normalize_total(cdata, target_sum=1e4 ,exclude_highly_expressed= True)
sc.pp.log1p(cdata)
cdata
scu.predcit_unicoord_in_adata(cdata, adata)
sc.pl.embedding(cdata, 'X_umap',legend_loc='on data', legend_fontsize=10,
color= ['Sample', 'seq_tech_unc_infered',
'Cell_type.refined', 'cell_type_unc_infered',
'Cell_subtype', 'organ_unc_infered'], ncols=2)
confusion_mtx = pd.read_csv('./table.csv', index_col=0)
confusion_mtx.columns
cdata.obs.groupby(['cell_type_unc_infered','Cell_type.refined']).size().unstack()
confusion_matrix = cdata.obs.groupby(['cell_type_unc_infered','Cell_subtype']).size().unstack().loc[:,list(confusion_mtx.columns)[:-1]]
confusion_matrix.to_csv('./hECA_pred_LUAD.csv')
ct_mapping = {"Epithelial cell" : "Epithelial cells",
"Type II alveolar cell" : "Epithelial cells",
"Goblet cell" : "Epithelial cells",
"Basal cell" : "Epithelial cells",
"Enterocyte progenitor" : "Epithelial cells",
"Vascular endothelial cell" : "Endothelial cells",
"Stromal cell" :"Fibroblasts",
"Fibroblast" : "Fibroblasts",
"Fibrocyte" : "Fibroblasts",
"Smooth muscle cell" : "Fibroblasts",
"Dendritic cell" : "Myeloid cells",
"Macrophage" : "Myeloid cells",
"Monocyte" : "Myeloid cells",
"Microglia" : "Myeloid cells",
"Mast cell" : "MAST cells",
"NK cell" : "T/NK cells",
"T cell" : "T/NK cells",
"B cell" : "B lymphocytes",
"Plasma B cell" : "B lymphocytes",
"Acinar cell" : 'other',
"Astrocyte" : 'other',
"Bipolar cell" : 'other',
"Cardiomyocyte cell" : 'other',
"Chief cell" : 'other',
"Endocardial cell" : 'other',
"Endothelial cell" : 'other',
"Erythrocyte" : 'other',
"Follicular epithelial cell" : 'other',
"Hemopoietic stem cell" : 'other',
"Inhibitory neuron" : 'other',
"Intercalated cell" : 'other',
"Loop of henle" : 'other',
"Mesenchymal cell" : 'other',
"Muller cell" : 'other',
"Neutrophilic granulocyte" : 'other',
"Neuron" : 'other',
"Oligodendrocyte" : 'other',
"Oligodendrocyte precursor cell" : 'other',
"Proliferating T cell" : 'other',
"Proximal convoluted tubule" : 'other',
"Pericyte" : 'other',
"Perineural epithelial cell" : 'other'}
ct1 = cdata.obs['Cell_type.refined']
ct2 = pd.Series([ct_mapping[c] if c in ct_mapping else "rare types" \
for c in cdata.obs['cell_type_unc_infered']])
ct1 = ct1[list(~cdata.obs['Cell_type.refined'].isin(['Undetermined','NA']))]
ct2 = ct2[list(~cdata.obs['Cell_type.refined'].isin(['Undetermined','NA']))]
from sklearn.metrics import accuracy_score
accuracy_score(ct1, ct2)
ct2.index = ct1.index
a = pd.concat([ct1,ct2], axis=1)
a.columns = ['LUAD_label', 'pred_label']
a.groupby(['LUAD_label', 'pred_label']).size().unstack()
confusion_mtx = pd.read_csv('./hECA_pred_LUAD.csv', index_col=0)
sns.set(rc = {'figure.figsize':(15,10)})
hcl = sns.heatmap(np.log(np.add(confusion_mtx, 1)),
cmap="YlGnBu",
xticklabels = True).figure.savefig('../UniCoord/hECA_pred_LUAD.pdf', bbox_inches="tight", dpi = 200)
hcl = sns.heatmap(confusion_mtx/np.sum(confusion_mtx, axis=0),
cmap="YlGnBu",
xticklabels = True).figure.savefig('../UniCoord/hECA_pred_LUAD_ratio.pdf', bbox_inches="tight", dpi = 200)
hcl = sns.heatmap((confusion_mtx.T/np.sum(confusion_mtx, axis=1)).T,
cmap="YlGnBu",
xticklabels = True)
cdata = sc.read_h5ad(r'D:\hECA\Lung.Adult.pp.h5ad')
sc.pl.embedding(cdata, 'X_umap',legend_loc='on data', legend_fontsize=10,
color=['leiden','cell_type','study_id','tissue_type'], ncols=2)
cdata = cdata.raw.to_adata()
sc.pp.normalize_total(cdata, target_sum=1e4 ,exclude_highly_expressed= True)
sc.pp.log1p(cdata)
cdata
scu.predcit_unicoord_in_adata(cdata, adata)
sc.pl.embedding(cdata, 'X_umap',legend_loc='on data', legend_fontsize=10,
color= ['seq_tech', 'seq_tech_unc_infered', 'cell_type', 'cell_type_unc_infered'], ncols=2)
ct1_mapping = {"Epithelial cell" : "Epithelial cells",
"Type II alveolar cell" : "Epithelial cells",
"Goblet cell" : "Epithelial cells",
"Basal cell" : "Epithelial cells",
"Enterocyte progenitor" : "Epithelial cells",
"Vascular endothelial cell" : "Endothelial cells",
"Stromal cell" :"Fibroblasts",
"Fibroblast" : "Fibroblasts",
"Fibrocyte" : "Fibroblasts",
"Smooth muscle cell" : "Fibroblasts",
"Dendritic cell" : "Myeloid cells",
"Macrophage" : "Myeloid cells",
"Monocyte" : "Myeloid cells",
"Microglia" : "Myeloid cells",
"Mast cell" : "MAST cells",
"NK cell" : "T/NK cells",
"T cell" : "T/NK cells",
"B cell" : "B lymphocytes",
"Plasma B cell" : "B lymphocytes",
"Acinar cell" : 'other',
"Astrocyte" : 'other',
"Bipolar cell" : 'other',
"Cardiomyocyte cell" : 'other',
"Chief cell" : 'other',
"Endocardial cell" : 'other',
"Endothelial cell" : 'other',
"Erythrocyte" : 'other',
"Follicular epithelial cell" : 'other',
"Hemopoietic stem cell" : 'other',
"Inhibitory neuron" : 'other',
"Intercalated cell" : 'other',
"Loop of henle" : 'other',
"Mesenchymal cell" : 'other',
"Muller cell" : 'other',
"Neutrophilic granulocyte" : 'other',
"Neuron" : 'other',
"Oligodendrocyte" : 'other',
"Oligodendrocyte precursor cell" : 'other',
"Proliferating T cell" : 'other',
"Proximal convoluted tubule" : 'other',
"Pericyte" : 'other',
"Perineural epithelial cell" : 'other'}
ct2_mapping = {"Type I alveolar cell" : "Epithelial cells",
"Type I alveolar cell/Type II alveolar cell" : "Epithelial cells",
"Type II alveolar cell" : "Epithelial cells",
"Club cell" : "Epithelial cells",
"Ciliated columnar cell" : "Epithelial cells",
"Perineural epithelial cell" : "Epithelial cells",
"Epithelial cell" : "Epithelial cells",
"Lymphatic endothelial cell" : "Endothelial cells",
"Vascular endothelial cell" : "Endothelial cells",
"Endothelial cell" : "Endothelial cells",
"Fibrocyte" : "Fibroblasts",
"Smooth muscle cell" : "Fibroblasts",
"Dendritic cell" : "Myeloid cells",
"Macrophage" : "Myeloid cells",
"Monocyte" : "Myeloid cells",
"Neutrophilic granulocyte" : "Myeloid cells",
"Myeloid cell" : "Myeloid cells",
"Mast cell" : "MAST cells",
"NK cell" : "T/NK cells",
"T cell" : "T/NK cells",
"CD8 T cell" : "T/NK cells",
"B cell" : "B lymphocytes",
"Plasma B cell" : "B lymphocytes",
"Chondrocyte" : "rare types",
"Megakaryocyte" : "rare types"}
ct1 = [ct1_mapping[c] if c in ct1_mapping else "rare types"
for c in cdata.obs['cell_type_unc_infered']]
ct2 = [ct2_mapping[c] if c in ct2_mapping else "rare types"
for c in cdata.obs['cell_type']]
from sklearn.metrics import f1_score
f1_score(ct1, ct2, average='micro')
f1_score(ct1, ct2, average='macro')
accuracy_score(ct1, ct2)
a = pd.DataFrame([ct1,ct2]).T
a.columns = ['pred','true']
a.groupby(['pred','true']).size().unstack()