%pylab inline
from glob import glob
import anndata
import pandas as pd
from scipy import optimize
from scipy.special import gammaln
from scipy.special import psi
from scipy.special import factorial
from scipy.optimize import fmin_l_bfgs_b as optim
from tqdm import tqdm
Populating the interactive namespace from numpy and matplotlib
datasets = [anndata.read(fname) for fname in glob('../Data/output/*.h5ad')]
datasets[4].var.head().T
for adata in datasets:
mean_of_scaled = np.array(adata.X.sum(0) / adata.X.sum())
adata.var['scaled_count_mean'] = mean_of_scaled[0]
adata.obs['total_counts'] = np.array(adata.X.sum(1))
def prob_zero_fun(mu, counts):
return np.exp(-(mu[:,np.newaxis]).dot(counts.values[np.newaxis])).sum(1) / len(counts)
datasets[4].var.head().T
for adata in datasets:
adata.var['poisson_zero_fraction'] = prob_zero_fun(adata.var['scaled_count_mean'],
adata.obs['total_counts'])
for adata in datasets:
adata.write('../Data/output/' + adata.uns['name'] + '.h5ad')