%matplotlib inline
%load_ext autoreload
%autoreload 2

import scanpy as sc
import random
from unicoord import scu
from unicoord.visualization import draw_loss_curves
import torch
from line_profiler import LineProfiler

sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
# sc.settings.set_figure_params(dpi=80, facecolor='white')
sc.settings.set_figure_params(vector_friendly=False)

scanpy==1.8.1 anndata==0.7.6 umap==0.5.1 numpy==1.22.3 scipy==1.7.1 pandas==1.3.3 scikit-learn==1.0 statsmodels==0.13.0 python-igraph==0.9.6 pynndescent==0.5.4

load hECA data¶

adata = sc.read_h5ad(r"F:\h5ad\hECA_eachCT2000_pcGenes.h5ad")
sc.pp.normalize_total(adata, target_sum=1e4 ,exclude_highly_expressed= True)
sc.pp.log1p(adata)
adata

model and training¶

scu.model_unicoord_in_adata(adata, n_diff=0, n_clus=[], n_cont=20,
                            obs_fitting = ['seq_tech', 'organ','cell_type'])

scu.train_unicoord_in_adata(adata, epochs=2, slot = 'cur', chunk_size=20000)

training chunk 1 / 5 of the data

Epoch 10: 100%|████████████████████████████████████████████| 10/10 [00:21<00:00,  2.19s/it, Epoch_average_loss=1717.55]

training chunk 2 / 5 of the data

Epoch 10: 100%|████████████████████████████████████████████| 10/10 [00:21<00:00,  2.19s/it, Epoch_average_loss=1685.16]

training chunk 3 / 5 of the data

Epoch 10: 100%|████████████████████████████████████████████| 10/10 [00:21<00:00,  2.19s/it, Epoch_average_loss=1662.82]

training chunk 4 / 5 of the data

Epoch 10: 100%|████████████████████████████████████████████| 10/10 [00:22<00:00,  2.27s/it, Epoch_average_loss=1645.18]

training chunk 5 / 5 of the data

Epoch 10: 100%|████████████████████████████████████████████| 10/10 [00:17<00:00,  1.79s/it, Epoch_average_loss=1625.56]

fig = draw_loss_curves(adata.uns['unc_stuffs']['trainer'].losses)
# if save_figs:
#     fig.savefig(os.path.join(savePath, 'img', 'fig1_lossCurves.png'))
fig.show()

<ipython-input-31-17c745c75a86>:4: UserWarning: Matplotlib is currently using module://ipykernel.pylab.backend_inline, which is a non-GUI backend, so cannot show the figure.
  fig.show()

scu.embed_unicoord_in_adata(adata, only_sup=False)

adata.obsm['unicoord'].shape

(96000, 20)

sc.pp.neighbors(adata, use_rep='unicoord')

computing neighbors
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:05)

sc.tl.leiden(adata, resolution=0.5)

running Leiden clustering
    finished: found 45 clusters and added
    'leiden', the cluster labels (adata.obs, categorical) (0:00:27)

sc.tl.umap(adata)

computing UMAP
    finished: added
    'X_umap', UMAP coordinates (adata.obsm) (0:01:38)

sc.pl.embedding(adata, 'X_umap',legend_loc='on data', legend_fontsize=10,
                color=['seq_tech', 'study_id', 'tissue_type','cell_type','organ'], ncols=2)

predict test set¶

bdata = adata[~adata.obs.unc_training,:].copy()
bdata

AnnData object with n_obs × n_vars = 9494 × 2000
    obs: 'user_id', 'study_id', 'cell_id', 'organ', 'region', 'subregion', 'seq_tech', 'sample_status', 'donor_id', 'donor_gender', 'donor_age', 'original_name', 'cl_name', 'hcad_name', 'tissue_type', 'cell_type', 'marker_gene', 'cid', 'RNA_snn_res.0.4', 'seurat_clusters', 'olfactory receptor activity', 'sensory perception of smell', 'nuclear-transcribed mRNA catabolic process', 'ribosome assembly', 'regulation of protein modification by small protein conjugation or removal', 'translation regulator activity', 'enzyme inhibitor activity', 'mitochondrial membrane organization', 'mitochondrial transport', 'nucleoside triphosphate metabolic process', 'negative regulation of protein ubiquitination', 'protein targeting', 'negative regulation of binding', 'negative regulation of translation', 'regulation of protein stability', 'negative regulation of hydrolase activity', 'regulation of protein catabolic process', 'positive regulation of cellular protein localization', 'regulation of ATP metabolic process', 'positive regulation of protein modification by small protein conjugation or removal', 'positive regulation of proteolysis involved in cellular protein catabolic process', 'negative regulation of cytoskeleton organization', 'phosphoprotein phosphatase activity', 'epithelial tube morphogenesis', 'maintenance of location in cell', 'cellular response to steroid hormone stimulus', 'protein polyubiquitination', 'regulation of supramolecular fiber organization', 'regulation of actin filament organization', 'protein localization to plasma membrane', 'unc_training', 'leiden', 'subtypes'
    var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable'
    uns: 'unc_stuffs', 'neighbors', 'leiden', 'umap', 'seq_tech_colors', 'tissue_type_colors', 'cell_type_colors', 'subtypes_colors'
    obsm: 'unicoord', 'X_umap'
    obsp: 'distances', 'connectivities'

scu.predcit_unicoord_in_adata(bdata, adata)

sc.pl.embedding(bdata, 'X_umap',legend_loc='on data', +
                color= ['seq_tech', 'seq_tech_unc_infered', 
                        'cell_type', 'cell_type_unc_infered',
                        'organ','organ_infered'], ncols=2)

G:\anaconda3\envs\torch_geo\lib\site-packages\anndata\_core\anndata.py:1220: FutureWarning: The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Removing unused categories will always return a new Categorical object.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'seq_tech_unc_infered' as categorical
G:\anaconda3\envs\torch_geo\lib\site-packages\anndata\_core\anndata.py:1220: FutureWarning: The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Removing unused categories will always return a new Categorical object.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'tissue_type_unc_infered' as categorical
G:\anaconda3\envs\torch_geo\lib\site-packages\anndata\_core\anndata.py:1220: FutureWarning: The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Removing unused categories will always return a new Categorical object.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'cell_type_unc_infered' as categorical

predict LUAD data¶

cdata = sc.read_h5ad(r'D:\hECA\Lung_cancer.pp.h5ad')

cdata

AnnData object with n_obs × n_vars = 188954 × 1943
    obs: 'Index', 'Barcode', 'Sample', 'Sample_Origin', 'Cell_type', 'Cell_type.refined', 'Cell_subtype', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden'
    var: 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'Cell_subtype_colors', 'Cell_type.refined_colors', 'Sample_Origin_colors', 'Sample_colors', 'hvg', 'leiden', 'leiden_colors', 'neighbors', 'pca', 'umap'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    obsp: 'connectivities', 'distances'

sc.pl.embedding(cdata, 'X_umap',legend_loc='on data', legend_fontsize=10,
                color=['leiden','Sample','Cell_type.refined','Cell_subtype'], ncols=2)

cdata = cdata.raw.to_adata()
sc.pp.normalize_total(cdata, target_sum=1e4 ,exclude_highly_expressed= True)
sc.pp.log1p(cdata)
cdata

normalizing counts per cell The following highly-expressed genes are not considered during normalization factor computation:
['ACTB', 'AGR2', 'AKR1B1', 'APOD', 'APOE', 'AREG', 'B2M', 'BPIFA1', 'BPIFB1', 'CCL18', 'CCL19', 'CCL2', 'CCL21', 'CCL3', 'CCL3L3', 'CCL4', 'CCL4L2', 'CD74', 'CFD', 'CLU', 'COL1A1', 'COL1A2', 'COL3A1', 'CST3', 'CSTB', 'CXCL10', 'CXCL13', 'CXCL2', 'CXCL8', 'DCN', 'EEF1A1', 'FDCSP', 'FTH1', 'FTL', 'GNLY', 'GZMB', 'HBA1', 'HBA2', 'HBB', 'HLA-DRA', 'HP', 'HPGD', 'HSP90AA1', 'HSPA6', 'HSPB1', 'IER2', 'IFNG', 'IGHA1', 'IGHA2', 'IGHD', 'IGHE', 'IGHG1', 'IGHG2', 'IGHG3', 'IGHG4', 'IGHGP', 'IGHM', 'IGKC', 'IGLC2', 'IGLC3', 'IGLC7', 'IGLL5', 'ITLN1', 'JCHAIN', 'JUN', 'JUNB', 'LYZ', 'MALAT1', 'MGP', 'MSMB', 'MT1G', 'MT1X', 'MT2A', 'NEAT1', 'NFKBIA', 'NTS', 'PIP', 'PPBP', 'PTGDS', 'RPL10', 'RPL13', 'RPL13A', 'RPL37', 'RPLP1', 'RPS10', 'RPS16', 'RPS18', 'RPS19', 'RPS27', 'S100A6', 'S100A8', 'S100A9', 'SCGB1A1', 'SCGB3A1', 'SCGB3A2', 'SFRP4', 'SFTPA1', 'SFTPA2', 'SFTPC', 'SLPI', 'SPP1', 'TAGLN', 'TFF3', 'TIMP1', 'TMSB10', 'TMSB4X', 'TPSAB1', 'TPSB2', 'TXN']
    finished (0:00:04)

AnnData object with n_obs × n_vars = 188954 × 27578
    obs: 'Index', 'Barcode', 'Sample', 'Sample_Origin', 'Cell_type', 'Cell_type.refined', 'Cell_subtype', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden'
    var: 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'
    uns: 'Cell_subtype_colors', 'Cell_type.refined_colors', 'Sample_Origin_colors', 'Sample_colors', 'hvg', 'leiden', 'leiden_colors', 'neighbors', 'pca', 'umap', 'log1p'
    obsm: 'X_pca', 'X_umap'

scu.predcit_unicoord_in_adata(cdata, adata)

1589 needed genes are not exist in the query adata, filled with zeros

Trying to set attribute `.var` of view, copying.
G:\anaconda3\envs\torch_geo\lib\site-packages\pandas\core\indexing.py:1732: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)

sc.pl.embedding(cdata, 'X_umap',legend_loc='on data', legend_fontsize=10,
                color= ['Sample', 'seq_tech_unc_infered', 
                        'Cell_type.refined', 'cell_type_unc_infered',
                        'Cell_subtype', 'organ_unc_infered'], ncols=2)

confusion_mtx = pd.read_csv('./table.csv', index_col=0)

confusion_mtx.columns

Index(['AT1', 'AT2', 'Club', 'Ciliated', 'Malignant cells', 'tS1', 'tS2',
       'Lymphatic ECs', 'Stalk-like ECs', 'Tip-like ECs', 'Tumor ECs',
       'COL13A1+ matrix FBs', 'COL14A1+ matrix FBs', 'Myofibroblasts',
       'Pericytes', 'Smooth muscle cells', 'Activated DCs', 'CD1c+ DCs',
       'CD141+ DCs', 'CD163+CD14+ DCs', 'Alveolar Mac', 'Microglia/Mac',
       'Pleural Mac', 'mo-Mac', 'Monocytes', 'MAST', 'NK', 'Naive CD4+ T',
       'CD4+ Th', 'Treg', 'Exhausted Tfh', 'Naive CD8+ T', 'CD8 low T',
       'CD8+/CD4+ Mixed Th', 'Cytotoxic CD8+ T', 'Exhausted CD8+ T',
       'Follicular B cells', 'GC B cells in the DZ', 'GC B cells in the LZ',
       'GrB-secreting B cells', 'MALT B cells', 'Plasma cells', 'Undetermined',
       'rare types'],
      dtype='object')

cdata.obs.groupby(['cell_type_unc_infered','Cell_type.refined']).size().unstack()

confusion_matrix = cdata.obs.groupby(['cell_type_unc_infered','Cell_subtype']).size().unstack().loc[:,list(confusion_mtx.columns)[:-1]]
confusion_matrix.to_csv('./hECA_pred_LUAD.csv')

ct_mapping = {"Epithelial cell" : "Epithelial cells",
"Type II alveolar cell" : "Epithelial cells",
"Goblet cell" : "Epithelial cells",
"Basal cell" : "Epithelial cells",
"Enterocyte progenitor" : "Epithelial cells",
"Vascular endothelial cell" : "Endothelial cells",
"Stromal cell" :"Fibroblasts",
"Fibroblast" : "Fibroblasts",
"Fibrocyte" : "Fibroblasts",
"Smooth muscle cell" : "Fibroblasts",
"Dendritic cell" : "Myeloid cells",
"Macrophage" : "Myeloid cells",
"Monocyte" : "Myeloid cells",
"Microglia" : "Myeloid cells",
"Mast cell" : "MAST cells",
"NK cell" : "T/NK cells",
"T cell" : "T/NK cells",
"B cell" : "B lymphocytes",
"Plasma B cell" : "B lymphocytes",
"Acinar cell" : 'other',
"Astrocyte" : 'other',
"Bipolar cell" : 'other',
"Cardiomyocyte cell" : 'other',
"Chief cell" : 'other',
"Endocardial cell" : 'other',
"Endothelial cell" : 'other',
"Erythrocyte" : 'other',
"Follicular epithelial cell" : 'other',
"Hemopoietic stem cell" : 'other',
"Inhibitory neuron" : 'other',
"Intercalated cell" : 'other',
"Loop of henle" : 'other',
"Mesenchymal cell" : 'other',
"Muller cell" : 'other',
"Neutrophilic granulocyte" : 'other',
"Neuron" : 'other',
"Oligodendrocyte" : 'other',
"Oligodendrocyte precursor cell" : 'other',
"Proliferating T cell" : 'other',
"Proximal convoluted tubule" : 'other',
"Pericyte" : 'other',
"Perineural epithelial cell" : 'other'}

ct1 = cdata.obs['Cell_type.refined']
ct2 = pd.Series([ct_mapping[c] if c in ct_mapping else "rare types" \
                 for c in cdata.obs['cell_type_unc_infered']])
ct1 = ct1[list(~cdata.obs['Cell_type.refined'].isin(['Undetermined','NA']))]
ct2 = ct2[list(~cdata.obs['Cell_type.refined'].isin(['Undetermined','NA']))]

from sklearn.metrics import accuracy_score

accuracy_score(ct1, ct2)

0.8549266273439782

ct2.index = ct1.index
a = pd.concat([ct1,ct2], axis=1)
a.columns = ['LUAD_label', 'pred_label']

a.groupby(['LUAD_label', 'pred_label']).size().unstack()

confusion_mtx = pd.read_csv('./hECA_pred_LUAD.csv', index_col=0)

sns.set(rc = {'figure.figsize':(15,10)})

hcl = sns.heatmap(np.log(np.add(confusion_mtx, 1)),
                  cmap="YlGnBu",
                  xticklabels = True).figure.savefig('../UniCoord/hECA_pred_LUAD.pdf', bbox_inches="tight", dpi = 200)

hcl = sns.heatmap(confusion_mtx/np.sum(confusion_mtx, axis=0),
                  cmap="YlGnBu",
                  xticklabels = True).figure.savefig('../UniCoord/hECA_pred_LUAD_ratio.pdf', bbox_inches="tight", dpi = 200)

hcl = sns.heatmap((confusion_mtx.T/np.sum(confusion_mtx, axis=1)).T,
                  cmap="YlGnBu",
                  xticklabels = True)

predict lung ECA data¶

cdata = sc.read_h5ad(r'D:\hECA\Lung.Adult.pp.h5ad')

sc.pl.embedding(cdata, 'X_umap',legend_loc='on data', legend_fontsize=10,
                color=['leiden','cell_type','study_id','tissue_type'], ncols=2)

G:\anaconda3\envs\torch_geo\lib\site-packages\anndata\_core\anndata.py:1220: FutureWarning: The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Removing unused categories will always return a new Categorical object.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'seq_tech_unc_infered' as categorical
G:\anaconda3\envs\torch_geo\lib\site-packages\anndata\_core\anndata.py:1220: FutureWarning: The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Removing unused categories will always return a new Categorical object.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'organ_unc_infered' as categorical
G:\anaconda3\envs\torch_geo\lib\site-packages\anndata\_core\anndata.py:1220: FutureWarning: The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Removing unused categories will always return a new Categorical object.
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'cell_type_unc_infered' as categorical

cdata = cdata.raw.to_adata()
sc.pp.normalize_total(cdata, target_sum=1e4 ,exclude_highly_expressed= True)
sc.pp.log1p(cdata)
cdata

normalizing counts per cell The following highly-expressed genes are not considered during normalization factor computation:
['CCL21', 'FTL', 'SCGB1A1', 'SCGB3A1', 'SCGB3A2', 'SFTPA2', 'SFTPC']
    finished (0:00:00)

AnnData object with n_obs × n_vars = 54615 × 20770
    obs: 'user_id', 'study_id', 'cell_id', 'organ', 'region', 'subregion', 'seq_tech', 'sample_status', 'donor_id', 'donor_gender', 'donor_age', 'original_name', 'cl_name', 'hcad_name', 'tissue_type', 'cell_type', 'marker_gene', 'cid', 'RNA_snn_res.0.4', 'seurat_clusters', 'nCount_RNA', 'nFeature_RNA', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden'
    var: 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'
    uns: 'cell_type_colors', 'hvg', 'leiden', 'leiden_colors', 'neighbors', 'pca', 'study_id_colors', 'tissue_type_colors', 'umap', 'log1p'
    obsm: 'X_pca', 'X_umap'

scu.predcit_unicoord_in_adata(cdata, adata)

2024 needed genes are not exist in the query adata, filled with zeros

Trying to set attribute `.var` of view, copying.
G:\anaconda3\envs\torch_geo\lib\site-packages\pandas\core\indexing.py:1732: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)

sc.pl.embedding(cdata, 'X_umap',legend_loc='on data', legend_fontsize=10,
                color= ['seq_tech', 'seq_tech_unc_infered', 'cell_type', 'cell_type_unc_infered'], ncols=2)

ct1_mapping = {"Epithelial cell" : "Epithelial cells",
"Type II alveolar cell" : "Epithelial cells",
"Goblet cell" : "Epithelial cells",
"Basal cell" : "Epithelial cells",
"Enterocyte progenitor" : "Epithelial cells",
"Vascular endothelial cell" : "Endothelial cells",
"Stromal cell" :"Fibroblasts",
"Fibroblast" : "Fibroblasts",
"Fibrocyte" : "Fibroblasts",
"Smooth muscle cell" : "Fibroblasts",
"Dendritic cell" : "Myeloid cells",
"Macrophage" : "Myeloid cells",
"Monocyte" : "Myeloid cells",
"Microglia" : "Myeloid cells",
"Mast cell" : "MAST cells",
"NK cell" : "T/NK cells",
"T cell" : "T/NK cells",
"B cell" : "B lymphocytes",
"Plasma B cell" : "B lymphocytes",
"Acinar cell" : 'other',
"Astrocyte" : 'other',
"Bipolar cell" : 'other',
"Cardiomyocyte cell" : 'other',
"Chief cell" : 'other',
"Endocardial cell" : 'other',
"Endothelial cell" : 'other',
"Erythrocyte" : 'other',
"Follicular epithelial cell" : 'other',
"Hemopoietic stem cell" : 'other',
"Inhibitory neuron" : 'other',
"Intercalated cell" : 'other',
"Loop of henle" : 'other',
"Mesenchymal cell" : 'other',
"Muller cell" : 'other',
"Neutrophilic granulocyte" : 'other',
"Neuron" : 'other',
"Oligodendrocyte" : 'other',
"Oligodendrocyte precursor cell" : 'other',
"Proliferating T cell" : 'other',
"Proximal convoluted tubule" : 'other',
"Pericyte" : 'other',
"Perineural epithelial cell" : 'other'}

ct2_mapping = {"Type I alveolar cell" : "Epithelial cells",
"Type I alveolar cell/Type II alveolar cell" : "Epithelial cells",
"Type II alveolar cell" : "Epithelial cells",
"Club cell" : "Epithelial cells",
"Ciliated columnar cell" : "Epithelial cells",
"Perineural epithelial cell" : "Epithelial cells",
"Epithelial cell" : "Epithelial cells",
"Lymphatic endothelial cell" : "Endothelial cells",
"Vascular endothelial cell" : "Endothelial cells",
"Endothelial cell" : "Endothelial cells",
"Fibrocyte" : "Fibroblasts",
"Smooth muscle cell" : "Fibroblasts",
"Dendritic cell" : "Myeloid cells",
"Macrophage" : "Myeloid cells",
"Monocyte" : "Myeloid cells",
"Neutrophilic granulocyte" : "Myeloid cells",
"Myeloid cell" : "Myeloid cells",
"Mast cell" : "MAST cells",
"NK cell" : "T/NK cells",
"T cell" : "T/NK cells",
"CD8 T cell" : "T/NK cells",
"B cell" : "B lymphocytes",
"Plasma B cell" : "B lymphocytes",
"Chondrocyte" : "rare types",
"Megakaryocyte" : "rare types"}

ct1 = [ct1_mapping[c] if c in ct1_mapping else "rare types" 
                 for c in cdata.obs['cell_type_unc_infered']]
ct2 = [ct2_mapping[c] if c in ct2_mapping else "rare types" 
                 for c in cdata.obs['cell_type']]

from sklearn.metrics import f1_score

f1_score(ct1, ct2, average='micro')

0.8633342488327382

f1_score(ct1, ct2, average='macro')

0.6409801311060411

accuracy_score(ct1, ct2)

0.8633342488327382

a = pd.DataFrame([ct1,ct2]).T
a.columns = ['pred','true']

a.groupby(['pred','true']).size().unstack()

pred_label	B lymphocytes	Endothelial cells	Epithelial cells	Fibroblasts	MAST cells	Myeloid cells	T/NK cells	other
LUAD_label
B lymphocytes	12591	80	73	0	829	285	12838	669
Endothelial cells	0	1933	0	0	0	18	18	7
Epithelial cells	619	124	18867	406	620	540	140	618
Fibroblasts	3	1668	140	1169	274	84	21	142
MAST cells	1	1	0	0	3332	17	5	0
Myeloid cells	651	62	20	27	288	36706	784	81
NA	0	0	0	0	0	0	0	0
T/NK cells	415	33	1	0	84	96	62952	559

true	B lymphocytes	Endothelial cells	Epithelial cells	Fibroblasts	MAST cells	Myeloid cells	T/NK cells	rare types
pred
B lymphocytes	925.0	NaN	7.0	7.0	NaN	28.0	348.0	NaN
Endothelial cells	50.0	3727.0	115.0	1801.0	8.0	42.0	144.0	11.0
Epithelial cells	63.0	36.0	7678.0	830.0	23.0	63.0	206.0	3.0
Fibroblasts	2.0	1.0	34.0	830.0	NaN	1.0	2.0	1.0
MAST cells	79.0	9.0	28.0	93.0	3169.0	70.0	224.0	NaN
Myeloid cells	76.0	15.0	78.0	201.0	38.0	12007.0	345.0	12.0
T/NK cells	570.0	4.0	15.0	13.0	20.0	42.0	18803.0	NaN
other	22.0	644.0	297.0	415.0	2.0	230.0	89.0	7.0
rare types	NaN	NaN	NaN	NaN	NaN	NaN	NaN	12.0

Cell_type.refined	B lymphocytes	Endothelial cells	Epithelial cells	Fibroblasts	MAST cells	Myeloid cells	NA	T/NK cells
cell_type_unc_infered
Acinar cell	0	0	15	0	0	0	0	1
Astrocyte	0	0	1	2	0	0	12	0
B cell	12190	0	611	3	1	651	509	415
Basal cell	0	0	217	1	0	0	0	0
Bipolar cell	0	0	2	0	0	0	0	0
Cardiomyocyte cell	0	0	1	0	0	0	0	0
Chief cell	0	0	3	0	0	0	0	0
Dendritic cell	117	2	450	32	2	1740	91	87
Endocardial cell	0	0	1	0	0	0	0	0
Endothelial cell	2	6	46	48	0	6	4	0
Enterocyte progenitor	0	0	312	0	0	0	3	0
Epithelial cell	3	0	2231	7	0	8	13	1
Erythrocyte	27	0	30	0	0	3	69	1
Fibroblast	0	0	14	271	0	7	1	0
Fibrocyte	0	0	3	104	0	0	0	0
Follicular epithelial cell	0	0	5	0	0	0	0	0
Goblet cell	0	0	548	13	0	0	0	0
Hemopoietic stem cell	555	0	136	1	0	64	769	549
Inhibitory neuron	0	0	3	0	0	0	52	0
Intercalated cell	0	0	5	0	0	0	0	0
Loop of henle	0	0	1	0	0	0	0	0
Macrophage	7	12	52	51	15	15373	5	0
Mast cell	829	0	620	274	3332	288	88	84
Mesenchymal cell	80	0	39	36	0	2	16	5
Microglia	86	0	24	0	0	4215	37	8
Monocyte	75	4	14	1	0	15378	12	1
Muller cell	0	0	36	0	0	0	30	0
NK cell	112	11	58	10	3	42	8848	18630
Neuron	0	0	4	0	0	2	4	3
Neutrophilic granulocyte	0	0	12	2	0	3	5	0
Oligodendrocyte	0	0	0	0	0	0	488	0
Oligodendrocyte precursor cell	0	0	0	0	0	0	2	0
Pericyte	0	1	31	50	0	0	2	0
Perineural epithelial cell	0	0	22	0	0	0	3	0
Plasma B cell	401	0	8	0	0	0	1	0
Proliferating T cell	5	0	221	2	0	1	1	0
Proximal convoluted tubule	0	0	4	1	0	0	0	0
Smooth muscle cell	0	0	386	169	0	4	13	0
Stromal cell	0	0	3	625	0	16	3	0
T cell	12726	7	82	11	2	742	16744	44322
Type II alveolar cell	70	0	15559	119	0	12	21	0
Vascular endothelial cell	80	1933	124	1668	1	62	217	33