In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
%load_ext line_profiler
In [2]:
import scanpy as sc
import random
from unicoord import scu
from unicoord.visualization import draw_loss_curves
import torch
from line_profiler import LineProfiler
In [3]:
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
# sc.settings.set_figure_params(dpi=80, facecolor='white')
sc.settings.set_figure_params(vector_friendly=False)
scanpy==1.8.1 anndata==0.7.6 umap==0.5.1 numpy==1.22.3 scipy==1.7.1 pandas==1.3.3 scikit-learn==1.0 statsmodels==0.13.0 python-igraph==0.9.6 pynndescent==0.5.4

hECA total

In [4]:
adata = sc.read_h5ad(r'F:\h5ad\tabularMuris\TBMU.h5ad', )
In [5]:
adata
Out[5]:
AnnData object with n_obs × n_vars = 100605 × 23433
    obs: 'cell_ontology_class', 'cell_ontology_id', 'free_annotation', 'mouse.id', 'mouse.sex', 'tissue', 'tissue_tSNE_1', 'tissue_tSNE_2', 'seq_tech', 'n_counts', 'n_genes'
    var: 'gene_ids-1'
In [6]:
adata[adata.obs.cell_ontology_class.isna()].obs.cell_ontology_class[0]
Out[6]:
nan
In [7]:
adata.obs.cell_ontology_class.dtype.name
Out[7]:
'category'
In [8]:
for c in adata.obs.columns:
    a = adata.obs[c]
    if a.dtype.name == 'category':
        adata.obs[c] = a.astype(str)
In [31]:
scu.model_unicoord_in_adata(adata, 
                            n_cont=50, n_diff=0, n_clus = [],
                            obs_fitting=['cell_ontology_class',
                                         'free_annotation','mouse.id','mouse.sex',
                                         'tissue','tissue_tSNE_1','tissue_tSNE_2','seq_tech'], 
                            min_obs = 500)
In [32]:
unc_stuffs = adata.uns['unc_stuffs']
In [33]:
scu.train_unicoord_in_adata(adata, epochs=10, chunk_size=20000, slot = "cur")
training chunk 1 / 6 of the data
training chunk 2 / 6 of the data
training chunk 3 / 6 of the data
training chunk 4 / 6 of the data
training chunk 5 / 6 of the data
training chunk 6 / 6 of the data
In [15]:
fig = draw_loss_curves(unc_stuffs['loss'])
# if save_figs:
#     fig.savefig(os.path.join(savePath, 'img', 'fig1_lossCurves.png'))
fig.show()
<ipython-input-15-a1f761dfb391>:4: UserWarning: Matplotlib is currently using module://ipykernel.pylab.backend_inline, which is a non-GUI backend, so cannot show the figure.
  fig.show()
In [34]:
scu.write_scu_h5ad(adata, './pretrained_models/TBMU.h5ad', only_model=True)
G:\anaconda3\envs\torch_geo\lib\site-packages\anndata\_core\anndata.py:1220: FutureWarning: The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Removing unused categories will always return a new Categorical object.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'mouse.sex' as categorical
G:\anaconda3\envs\torch_geo\lib\site-packages\anndata\_core\anndata.py:1220: FutureWarning: The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Removing unused categories will always return a new Categorical object.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'tissue' as categorical
G:\anaconda3\envs\torch_geo\lib\site-packages\anndata\_core\anndata.py:1220: FutureWarning: The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Removing unused categories will always return a new Categorical object.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'seq_tech' as categorical

predict IPF

In [9]:
cdata = scu.load_scu_h5ad('./pretrained_models/TBMU.h5ad')
In [61]:
bdata = sc.read_h5ad('./IPF.h5ad')
G:\anaconda3\envs\torch_geo\lib\site-packages\anndata\compat\__init__.py:180: FutureWarning: Moving element from .uns['neighbors']['distances'] to .obsp['distances'].

This is where adjacency matrices should go now.
  warn(
In [62]:
bdata = bdata.raw.to_adata()
sc.pp.normalize_total(bdata, target_sum=1e4 ,exclude_highly_expressed= True)
sc.pp.log1p(bdata)
bdata
normalizing counts per cell The following highly-expressed genes are not considered during normalization factor computation:
[]
    finished (0:00:00)
Out[62]:
AnnData object with n_obs × n_vars = 15596 × 12247
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'RNA_snn_res.0.25', 'seurat_clusters', 'RNA_snn_res.0.15', 'cell.type', 'doublet'
    var: 'mean', 'variance', 'variance.expected', 'variance.standardized'
    uns: 'neighbors', 'log1p'
    obsm: 'X_pca', 'X_tsne', 'X_umap'
In [63]:
ct_dict = {0:'Macrophage',1:'Dendritic cell',2:'T cell',3:'B cell',4:'Neutrophil',5:'NK cell'}
bdata.obs['cell.type'] = [ct_dict[i] for i in bdata.obs['cell.type']]
In [64]:
sc.pl.umap(bdata, color=['cell.type'])
G:\anaconda3\envs\torch_geo\lib\site-packages\anndata\_core\anndata.py:1220: FutureWarning: The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Removing unused categories will always return a new Categorical object.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'cell.type' as categorical
In [65]:
scu.predcit_unicoord_in_adata(bdata, cdata)
12778 needed genes are not exist in the query adata, filled with zeros
Trying to set attribute `.var` of view, copying.
G:\anaconda3\envs\torch_geo\lib\site-packages\pandas\core\indexing.py:1732: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
In [66]:
sc.pl.umap(bdata, 
           color=['cell.type']+[s+'_unc_infered' for s in \
                                       ['cell_ontology_class','mouse.id','mouse.sex','tissue','seq_tech']],
          ncols = 1)
G:\anaconda3\envs\torch_geo\lib\site-packages\anndata\_core\anndata.py:1220: FutureWarning: The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Removing unused categories will always return a new Categorical object.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'cell_ontology_class_unc_infered' as categorical
G:\anaconda3\envs\torch_geo\lib\site-packages\anndata\_core\anndata.py:1220: FutureWarning: The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Removing unused categories will always return a new Categorical object.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'free_annotation_unc_infered' as categorical
G:\anaconda3\envs\torch_geo\lib\site-packages\anndata\_core\anndata.py:1220: FutureWarning: The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Removing unused categories will always return a new Categorical object.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'mouse.id_unc_infered' as categorical
G:\anaconda3\envs\torch_geo\lib\site-packages\anndata\_core\anndata.py:1220: FutureWarning: The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Removing unused categories will always return a new Categorical object.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'mouse.sex_unc_infered' as categorical
G:\anaconda3\envs\torch_geo\lib\site-packages\anndata\_core\anndata.py:1220: FutureWarning: The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Removing unused categories will always return a new Categorical object.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'tissue_unc_infered' as categorical
G:\anaconda3\envs\torch_geo\lib\site-packages\anndata\_core\anndata.py:1220: FutureWarning: The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Removing unused categories will always return a new Categorical object.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'seq_tech_unc_infered' as categorical
In [60]:
bdata.obs.cell_ontology_class_unc_infered.value_counts()
Out[60]:
T cell                                             3860
myeloid cell                                       2821
B cell                                             2504
bladder urothelial cell                            1990
Slamf1-negative multipotent progenitor cell         818
natural killer cell                                 668
blood cell                                          663
immature T cell                                     579
alveolar macrophage                                 444
endothelial cell                                    390
leukocyte                                           172
skeletal muscle satellite stem cell                 125
macrophage                                           98
granulocyte                                          79
nan                                                  66
epithelial cell of large intestine                   66
hepatocyte                                           52
rare types                                           43
kidney proximal straight tubule epithelial cell      30
basal cell                                           24
basal cell of epidermis                              21
oligodendrocyte                                      20
mesenchymal stem cell                                15
granulocytopoietic cell                              10
precursor B cell                                      9
large intestine goblet cell                           6
monocyte                                              4
microglial cell                                       4
epithelial cell                                       3
hematopoietic precursor cell                          3
lung endothelial cell                                 1
bladder cell                                          1
kidney capillary endothelial cell                     1
keratinocyte                                          1
endothelial cell of hepatic sinusoid                  1
epidermal cell                                        1
fibroblast                                            1
epithelial cell of proximal tubule                    1
late pro-B cell                                       1
Name: cell_ontology_class_unc_infered, dtype: int64
In [71]:
bdata.obs.groupby(['cell_ontology_class_unc_infered','cell.type']).size().unstack().to_csv('./TBMU_ct_pred.csv')

predict it self

In [10]:
scu.predcit_unicoord_in_adata(adata, cdata)
In [11]:
torch.cuda.empty_cache()
In [12]:
adata
Out[12]:
AnnData object with n_obs × n_vars = 100605 × 23433
    obs: 'cell_ontology_class', 'cell_ontology_id', 'free_annotation', 'mouse.id', 'mouse.sex', 'tissue', 'tissue_tSNE_1', 'tissue_tSNE_2', 'seq_tech', 'n_counts', 'n_genes', 'tissue_tSNE_1_unc_infered', 'tissue_tSNE_2_unc_infered', 'cell_ontology_class_unc_infered', 'free_annotation_unc_infered', 'mouse.id_unc_infered', 'mouse.sex_unc_infered', 'tissue_unc_infered', 'seq_tech_unc_infered'
    var: 'gene_ids-1'
In [15]:
df = adata.obs.groupby(["cell_ontology_class",
                        "cell_ontology_class_unc_infered"]).size().unstack(fill_value=0)
conf_mat = df / df.sum(axis=1).values[:, np.newaxis]
In [19]:
df.to_csv('./TBMU_ct_self.csv')
In [17]:
import matplotlib.pyplot as plt
In [18]:
plt.figure(figsize=(8, 8))
_ = plt.pcolor(conf_mat)
_ = plt.xticks(np.arange(0.5, len(df.columns), 1), df.columns, rotation=90)
_ = plt.yticks(np.arange(0.5, len(df.index), 1), df.index)
plt.xlabel("Predicted")
plt.ylabel("Observed")
Out[18]:
Text(0, 0.5, 'Observed')
In [16]:
df
Out[16]:
cell_ontology_class_unc_infered B cell Slamf1-negative multipotent progenitor cell T cell alveolar macrophage astrocyte basal cell basal cell of epidermis bladder cell bladder urothelial cell blood cell ... neuron non-classical monocyte oligodendrocyte precursor B cell proerythroblast promonocyte rare types skeletal muscle satellite stem cell stromal cell type B pancreatic cell
cell_ontology_class
B cell 9706 1 156 1 1 1 7 1 51 4 ... 0 1 3 5 0 0 1 38 0 0
Bergmann glial cell 0 0 0 0 29 0 0 0 0 0 ... 0 0 1 0 0 0 1 0 3 0
Brush cell of epithelium proper of large intestine 0 3 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 3 0 0 0
DN1 thymic pro-T cell 1 0 33 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 2 0 0
Fraction A pre-pro B cell 0 0 0 6 0 2 2 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
smooth muscle cell 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 31 0 0 0
stem cell of epidermis 0 0 0 0 0 9 37 0 2 0 ... 0 0 0 0 0 0 0 0 0 0
stromal cell 0 16 62 3 6 449 3 58 17 26 ... 0 0 4 110 0 0 97 129 2316 1
type B pancreatic cell 0 0 0 0 1 0 0 0 11 0 ... 2 0 3 0 1 0 0 28 0 0
type II pneumocyte 0 0 0 1 0 0 4 0 16 29 ... 0 0 1 0 0 0 4 0 0 0

100 rows × 53 columns