%matplotlib inline
%load_ext autoreload
%autoreload 2
%load_ext line_profiler
import scanpy as sc
import random
from unicoord import scu
from unicoord.visualization import draw_loss_curves
import torch
from line_profiler import LineProfiler
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
# sc.settings.set_figure_params(dpi=80, facecolor='white')
sc.settings.set_figure_params(vector_friendly=False)
adata = sc.read_h5ad(r'F:\h5ad\tabularMuris\TBMU.h5ad', )
adata
adata[adata.obs.cell_ontology_class.isna()].obs.cell_ontology_class[0]
adata.obs.cell_ontology_class.dtype.name
for c in adata.obs.columns:
a = adata.obs[c]
if a.dtype.name == 'category':
adata.obs[c] = a.astype(str)
scu.model_unicoord_in_adata(adata,
n_cont=50, n_diff=0, n_clus = [],
obs_fitting=['cell_ontology_class',
'free_annotation','mouse.id','mouse.sex',
'tissue','tissue_tSNE_1','tissue_tSNE_2','seq_tech'],
min_obs = 500)
unc_stuffs = adata.uns['unc_stuffs']
scu.train_unicoord_in_adata(adata, epochs=10, chunk_size=20000, slot = "cur")
fig = draw_loss_curves(unc_stuffs['loss'])
# if save_figs:
# fig.savefig(os.path.join(savePath, 'img', 'fig1_lossCurves.png'))
fig.show()
scu.write_scu_h5ad(adata, './pretrained_models/TBMU.h5ad', only_model=True)
cdata = scu.load_scu_h5ad('./pretrained_models/TBMU.h5ad')
bdata = sc.read_h5ad('./IPF.h5ad')
bdata = bdata.raw.to_adata()
sc.pp.normalize_total(bdata, target_sum=1e4 ,exclude_highly_expressed= True)
sc.pp.log1p(bdata)
bdata
ct_dict = {0:'Macrophage',1:'Dendritic cell',2:'T cell',3:'B cell',4:'Neutrophil',5:'NK cell'}
bdata.obs['cell.type'] = [ct_dict[i] for i in bdata.obs['cell.type']]
sc.pl.umap(bdata, color=['cell.type'])
scu.predcit_unicoord_in_adata(bdata, cdata)
sc.pl.umap(bdata,
color=['cell.type']+[s+'_unc_infered' for s in \
['cell_ontology_class','mouse.id','mouse.sex','tissue','seq_tech']],
ncols = 1)
bdata.obs.cell_ontology_class_unc_infered.value_counts()
bdata.obs.groupby(['cell_ontology_class_unc_infered','cell.type']).size().unstack().to_csv('./TBMU_ct_pred.csv')
scu.predcit_unicoord_in_adata(adata, cdata)
torch.cuda.empty_cache()
adata
df = adata.obs.groupby(["cell_ontology_class",
"cell_ontology_class_unc_infered"]).size().unstack(fill_value=0)
conf_mat = df / df.sum(axis=1).values[:, np.newaxis]
df.to_csv('./TBMU_ct_self.csv')
import matplotlib.pyplot as plt
plt.figure(figsize=(8, 8))
_ = plt.pcolor(conf_mat)
_ = plt.xticks(np.arange(0.5, len(df.columns), 1), df.columns, rotation=90)
_ = plt.yticks(np.arange(0.5, len(df.index), 1), df.index)
plt.xlabel("Predicted")
plt.ylabel("Observed")
df