%config InlineBackend.figure_format = 'retina'
%pylab inline
from glob import glob
import anndata
import pandas as pd
Populating the interactive namespace from numpy and matplotlib
datasets = [
anndata.read('../Data/output/10x v3 HEK293T.h5ad'),
anndata.read('../Data/output/10x v3 NIH3T3.h5ad'),
anndata.read('../Data/output/Klein et al 2015.h5ad'),
anndata.read('../Data/output/Macosko et al 2015.h5ad'),
anndata.read('../Data/output/Svensson et al 2017 (1).h5ad'),
anndata.read('../Data/output/Svensson et al 2017 (2).h5ad'),
anndata.read('../Data/output/Zheng et al 2017.h5ad'),
anndata.read('../Data/output/Padovan-Merhar et al 2015 (SMARTer).h5ad'),
anndata.read('../Data/output/10x v3 PBMC.h5ad')
]
annotation = {
'Macosko et al 2015': 'Solution (ERCC spike-ins)',
'Svensson et al 2017 (1)': 'Solution (brain endogenuous RNA & ERCC spike-ins)',
'Zheng et al 2017': 'Solution (ERCC spike-ins)',
'Svensson et al 2017 (2)': 'Solution (brain endogenuous RNA & ERCC spike-ins)',
'10x v3 HEK293T': 'Single cells (homogenuous)',
'10x v3 NIH3T3': 'Single cells (homogenuous)',
'10x v3 PBMC': 'Single cells (heterogenuous)',
'Klein et al 2015': 'Solution (K562 endogenuous RNA & ERCC spike-ins)',
'Padovan-Merhar et al 2015 (SMARTer)': 'Single cells (homogeneuous, full length)'
}
location = {
'Macosko et al 2015': 6,
'Svensson et al 2017 (1)': 3,
'Zheng et al 2017': 7,
'Svensson et al 2017 (2)': 5,
'10x v3 HEK293T': 2,
'10x v3 NIH3T3': 1,
'10x v3 PBMC': 0,
'Klein et al 2015': 4,
'Padovan-Merhar et al 2015 (SMARTer)': 8
}
mins = []
maxs = []
for adata in datasets:
difference1 = adata.var['empirical_zero_fraction'] - adata.var['global_zero_fraction']
mins.append(difference1.min()),
maxs.append(difference1.max())
min(mins), max(maxs)
(-0.3979434284180565, 0.8805231468904677)
fig = plt.figure(figsize=(15, 18))
outer_grid = fig.add_gridspec(3, 3, hspace=0.4, wspace=0.6)
for adata in datasets:
i = location[adata.uns['name']]
grid_box = outer_grid[i]
inner_grid = grid_box.subgridspec(2, 1)
# -- Global --
ax = fig.add_subplot(inner_grid[0])
ax.set_title('Poisson')
ax.set_xscale('log')
ax.set_xlim(left=1e-9, right=1e-1)
ax.scatter(adata.var['scaled_count_mean'],
adata.var['empirical_zero_fraction'],
c='k', label='Observed', rasterized=True);
ax.scatter(adata.var['scaled_count_mean'],
adata.var['poisson_zero_fraction'],
ec='w', c='grey', label='Expected', rasterized=True);
ax.set_ylabel('Fraction zeros')
ax.legend(title='Genes', loc='lower left', scatterpoints=3, fontsize=8)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
## _
ax = fig.add_subplot(inner_grid[1])
ax.set_xscale('log')
ax.set_xlim(left=1e-9, right=1e-1)
ax.set_ylim(top=1., bottom=-0.4)
difference = adata.var['empirical_zero_fraction'] - adata.var['poisson_zero_fraction']
ax.scatter(adata.var['scaled_count_mean'],
difference,
c='k', marker='.', label='Genes', rasterized=True)
ax.set_ylabel('Difference \n(Observed - Expected)')
ax.set_xlabel('Mean (of scaled counts)')
ax.legend(loc='lower left', scatterpoints=3)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
# -- Annotation --
bbox = grid_box.get_position(fig)
x = (bbox.x0 + bbox.x1) / 2
y = bbox.y1 + 0.02
title_str = adata.uns['name'] + '\n' + annotation[adata.uns['name']]
fig.text(x, y, title_str, ha='center', fontsize=12)
fig.savefig('../Figures/poisson_plot.pdf', dpi=500, bbox_inches='tight')