In [1]:
import numpy as np
import pandas as pd
import pickle

### Load in read count data (Qiime2 taxa barplot csv files)

Samples were subsampled to 45,386 reads. Samples with less than this number of reads after DADA2 processing were removed.

In [2]:
df_seq_orig_species = pd.read_csv('data_files/species_counts_duodenum_45386.csv').set_index('index')
read_depth = df_seq_orig_species.sum(axis=1)[0]

In [3]:
read_depth

45386.0

### Set the number of metadata columns in the sequencing data

In [4]:
num_metadata_cols = 2

### Remove the duplicate sequencing samples and rename the columns with '_Duo' in the name just to the sample ID

In [5]:
df_seq_orig_species = df_seq_orig_species.drop(['387_Duo', '388_Duo', '390_Duo', '391_Duo', '392_Duo', '394_Duo', '409_Duo', '410_Duo', '418_Duo', '423_Duo', '425_Duo', '433_Duo'])
df_seq_orig_species.rename({'417_Duo':'417', '434_Duo':'434', '437_Duo':'437', '438_Duo':'438', '441_Duo':'441', '446_Duo':'446', '447_Duo':'447', '448_Duo':'448', '449_Duo':'449', '451_Duo':'451'}, axis='index', inplace=True)
df_seq_orig_species.sort_index(inplace=True)
df_seq_orig_species.index = df_seq_orig_species.index.astype(int)
df_seq_orig_species

Unnamed: 0_level_0,D_0__Archaea;D_1__Euryarchaeota;D_2__Halobacteria;D_3__Halobacteriales;D_4__Haloferacaceae;D_5__Halopenitus;D_6__Halopenitus persicus,D_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobacterium;D_6__Methanobacterium formicicum,D_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobacterium;D_6__uncultured archaeon,D_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobrevibacter;__,D_0__Archaea;D_1__Nanoarchaeaeota;D_2__Woesearchaeia;__;__;__;__,D_0__Bacteria;D_1__Acidobacteria;D_2__Blastocatellia (Subgroup 4);D_3__Blastocatellales;D_4__Blastocatellaceae;D_5__Blastocatella;__,D_0__Bacteria;D_1__Acidobacteria;D_2__Blastocatellia (Subgroup 4);D_3__Blastocatellales;D_4__Blastocatellaceae;D_5__uncultured;__,D_0__Bacteria;D_1__Acidobacteria;D_2__Holophagae;D_3__Subgroup 7;__;__;__,D_0__Bacteria;D_1__Actinobacteria;D_2__Acidimicrobiia;D_3__Microtrichales;D_4__Microtrichaceae;D_5__IMCC26207;D_6__uncultured bacterium,D_0__Bacteria;D_1__Actinobacteria;D_2__Acidimicrobiia;D_3__Microtrichales;D_4__uncultured;D_5__uncultured Acidimicrobidae bacterium;D_6__uncultured Acidimicrobidae bacterium,...,D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Opitutales;D_4__Opitutaceae;D_5__Opitutus;D_6__uncultured Verrucomicrobia bacterium,D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Opitutales;D_4__Opitutaceae;__;__,D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;D_6__uncultured bacterium,D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;__,D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Verrucomicrobiaceae;D_5__uncultured;D_6__uncultured bacterium,D_0__Bacteria;D_1__WPS-2;D_2__metagenome;D_3__metagenome;D_4__metagenome;D_5__metagenome;D_6__metagenome,D_0__Bacteria;__;__;__;__;__;__,Unassigned;__;__;__;__;__;__,Description,Body_Site
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
141,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,141,Duodenum
142,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,17.0,0.0,142,Duodenum
144,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,25.0,0.0,144,Duodenum
145,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,145,Duodenum
146,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,146,Duodenum
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
446,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,70.0,0.0,446_Duo,Duodenum
447,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,447_Duo,Duodenum
448,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,448_Duo,Duodenum
449,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,449_Duo,Duodenum


In [6]:
# This taxa was only in second batch of sequenced duodenum samples likely indicating it is a contaminant. It is removed because
# it interferes with a plot comparing saliva to duodenum samples.
df_seq_orig_species.drop(['D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3__Bacillales;D_4__Paenibacillaceae;D_5__Paenibacillus;D_6__Paenibacillus darwinianus'], axis=1, inplace=True)

### Load in absolute abundance data (dPCR)

In [7]:
df_total_load = pd.read_excel('dPCR data/dPCR_total_loads_duodenum.xlsx', index_col=0)
df_weights = pd.read_csv('data_files/sample weights.csv')

# Merge the two dataframes together based on the sample ID
df_total_load = df_total_load.merge(df_weights, left_on='Sample', right_on='Study ID')

# Add a column saying whether the sample weight is missing or not
df_total_load['Weight (True/False)'] = df_total_load.apply(lambda x: x['Weight (mL)'][0].isdigit(), axis=1)

# Determine the average sample weight for all samples
mean_weight = df_total_load[df_total_load['Weight (True/False)']==True]['Weight (mL)'].astype(float).mean()

# Create new column where any sample with a missing weight is set to the average weight of all samples
df_total_load['Corrected Weight (mL)'] = df_total_load.apply(lambda x: float(x['Weight (mL)']) if x['Weight (True/False)'] else mean_weight, axis=1)

In [8]:
# Print out the samples without weights for reference (N=11)
df_total_load[~df_total_load['Weight (True/False)']]

Unnamed: 0,Well,Concentration,PoissonConfMax,PoissonConfMin,Total,Positives,Primer,Sample,Dilution,Corrected Concentration,Study ID,Weight (mL),Weight (True/False),Corrected Weight (mL)
74,B08,1430.0,1456.0,1417.0,18706,13158,mod_Caporaso,215,10,1430000.0,215,no data,False,0.884958
88,E03,671.0,686.0,663.0,17883,7771,mod_Caporaso,233,500,33550000.0,233,no weight,False,0.884958
119,A10,23.3,25.7,22.0,17629,345,mod_Caporaso,280,10,23300.0,280,no data,False,0.884958
142,B06,268.0,277.0,264.0,19925,4064,mod_Caporaso,318,10,268000.0,318,no data,False,0.884958
145,F06,782.0,799.0,774.0,18020,8753,mod_Caporaso,324,10,782000.0,324,no data,False,0.884958
178,H02,286.0,294.0,282.0,20243,4368,mod_Caporaso,360,10,286000.0,360,no data,False,0.884958
189,C10,3079.0,3139.0,3050.0,19684,18247,mod_Caporaso,372,10,3079000.0,372,no sample,False,0.884958
201,G11,3910.0,4010.0,3860.0,16826,16220,mod_Caporaso,384,10,3910000.0,384,no data,False,0.884958
214,F04,16.7,18.7,15.7,19006,268,mod_Caporaso,400,10,16700.0,400,no data,False,0.884958
242,B09,90.1,94.9,87.7,18334,1352,mod_Caporaso,430,10,90100.0,430,no data,False,0.884958


### Normalize concentration to the input volume

In [9]:
## Set the lower dPCR threshold. 95% CI is +-1X and the dPCR blanks are <1cp/uL with +3std dev of ~1 cp/uL. 
## This means we would have ~2X resolution at 2 cp/uL.
df_total_load = df_total_load[(df_total_load['Concentration']>2)]

## Calculate Copies/mL
df_total_load['Copies/mL'] = df_total_load['Corrected Concentration']/df_total_load['Corrected Weight (mL)']
df_total_load['Log Copies/mL'] = np.log10(df_total_load['Copies/mL'])

In [10]:
df_total_load[['Sample', 'Copies/mL']].to_excel('duodenum_total_loads.xlsx')

### Calculate LOD in terms of absolute abundance and relative abundance, 95% confidence of the template being added to the sample (3 copy input)

In [11]:
# These samples were diluted before placing sample in library reaction due to inhibitors preventing amplification in undiluted sample
diluted_samples = {423:100, 437:10, 438:10, 441:10, 446:10, 447:10, 448:10, 449:10, 451:10,
                   395:100, 198:50, 423:50, 427:50, 373:10, 321:10, 169:10, 375:10, 353:10,
                   242:10, 411:10, 312:10, 433:2, 366:2}

# Create column to account for the fact that some samples were diluted before input into library prep reaction
df_total_load['Seq_Dilution'] = df_total_load.apply(lambda x: diluted_samples[x['Sample']] if x['Sample'] in diluted_samples.keys() else 1, axis=1)

In [12]:
# uL added to the amplification rxn
seq_volume = 3.5
copy_input_threshold = 3

df_total_load['Copies in Amp Rxn'] = df_total_load['Concentration']*df_total_load['Dilution']/df_total_load['Seq_Dilution']*seq_volume
df_total_load['Rel. Abundance LOD (%)'] = copy_input_threshold/df_total_load['Copies in Amp Rxn']*100
df_total_load['Abs. Abundance LOD'] = df_total_load['Rel. Abundance LOD (%)']*df_total_load['Copies/mL']/100
df_total_load

Unnamed: 0,Well,Concentration,PoissonConfMax,PoissonConfMin,Total,Positives,Primer,Sample,Dilution,Corrected Concentration,Study ID,Weight (mL),Weight (True/False),Corrected Weight (mL),Copies/mL,Log Copies/mL,Seq_Dilution,Copies in Amp Rxn,Rel. Abundance LOD (%),Abs. Abundance LOD
0,A02,400.0,411.0,394.0,17179,4950,mod_Caporaso,141,10,400000.0,141,1.6259,True,1.6259,2.460176e+05,5.390966,1,14000.0,0.021429,52.718055
1,B02,24.3,26.9,23.1,17527,359,mod_Caporaso,142,10,24300.0,142,0.2091,True,0.2091,1.162123e+05,5.065252,1,850.5,0.352734,409.920066
2,A05,373.0,383.0,368.0,19427,5276,mod_Caporaso,145,500,18650000.0,145,1.77,True,1.7700,1.053672e+07,7.022706,1,652750.0,0.000460,48.426150
3,D02,1234.0,1258.0,1222.0,17000,11044,mod_Caporaso,146,10,1234000.0,146,1.6973,True,1.6973,7.270371e+05,5.861557,1,43190.0,0.006946,50.500375
4,E02,642.0,657.0,634.0,17180,7226,mod_Caporaso,147,10,642000.0,147,1.7476,True,1.7476,3.673610e+05,5.565093,1,22470.0,0.013351,49.046856
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
255,D01,5810.0,6060.0,5590.0,13577,13480,mod_Caporaso,451,500,290500000.0,451,0.605,True,0.6050,4.801653e+08,8.681391,10,1016750.0,0.000295,1416.765053
256,A07,154.0,161.0,150.0,14056,1720,mod_Caporaso,207,25000,385000000.0,207,1.8095,True,1.8095,2.127660e+08,8.327902,1,13475000.0,0.000022,47.369044
257,B07,170.0,177.0,167.0,16105,2169,mod_Caporaso,274,25000,425000000.0,274,1.083,True,1.0830,3.924284e+08,8.593760,1,14875000.0,0.000020,79.145231
258,C07,405.0,417.0,399.0,15574,4539,mod_Caporaso,322,5000,202500000.0,322,0.219,True,0.2190,9.246575e+08,8.965981,1,7087500.0,0.000042,391.389432


### Generate dictionary for easier downstream conversion of relative to absolute abundances

In [13]:
total_load_dict = {df_total_load['Sample'].iloc[i] : df_total_load['Copies/mL'].iloc[i] for i in range(len(df_total_load))}
len(total_load_dict)

256

### Determine LOD thresholds. If LOD from poisson loading > LOD from sequencing use the sequencing value. LOD from sequencing is based on a 50% CV from replicates (Fig 2d from quant-seq paper).

In [14]:
seq_lloq = 7.115*(read_depth**(-0.556))

df_total_load['Rel. Abundance LOD (%) Corrected'] = df_total_load['Rel. Abundance LOD (%)'].where(df_total_load['Rel. Abundance LOD (%)']>seq_lloq, seq_lloq)

In [15]:
lod_dict = {df_total_load['Sample'].iloc[i] : df_total_load['Rel. Abundance LOD (%) Corrected'].iloc[i]*read_depth/100 for i in range(len(df_total_load))}

### Filter out samples without accurate total loads and store metadata in separate file

In [16]:
df_seq_samples = df_seq_orig_species[df_seq_orig_species.index.isin(total_load_dict.keys())][df_seq_orig_species.columns[:-1*num_metadata_cols]]

# This is num_metadata_cols-1 because we don't need the description column since it is already stored as the index
seq_metadata = df_seq_orig_species[df_seq_orig_species.columns[-1*(num_metadata_cols-1):]]
df_seq_samples

Unnamed: 0_level_0,D_0__Archaea;D_1__Euryarchaeota;D_2__Halobacteria;D_3__Halobacteriales;D_4__Haloferacaceae;D_5__Halopenitus;D_6__Halopenitus persicus,D_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobacterium;D_6__Methanobacterium formicicum,D_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobacterium;D_6__uncultured archaeon,D_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobrevibacter;__,D_0__Archaea;D_1__Nanoarchaeaeota;D_2__Woesearchaeia;__;__;__;__,D_0__Bacteria;D_1__Acidobacteria;D_2__Blastocatellia (Subgroup 4);D_3__Blastocatellales;D_4__Blastocatellaceae;D_5__Blastocatella;__,D_0__Bacteria;D_1__Acidobacteria;D_2__Blastocatellia (Subgroup 4);D_3__Blastocatellales;D_4__Blastocatellaceae;D_5__uncultured;__,D_0__Bacteria;D_1__Acidobacteria;D_2__Holophagae;D_3__Subgroup 7;__;__;__,D_0__Bacteria;D_1__Actinobacteria;D_2__Acidimicrobiia;D_3__Microtrichales;D_4__Microtrichaceae;D_5__IMCC26207;D_6__uncultured bacterium,D_0__Bacteria;D_1__Actinobacteria;D_2__Acidimicrobiia;D_3__Microtrichales;D_4__uncultured;D_5__uncultured Acidimicrobidae bacterium;D_6__uncultured Acidimicrobidae bacterium,...,D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Chthoniobacterales;D_4__Chthoniobacteraceae;D_5__Chthoniobacter;D_6__uncultured bacterium,D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Chthoniobacterales;D_4__Xiphinematobacteraceae;D_5__Candidatus Xiphinematobacter;D_6__metagenome,D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Opitutales;D_4__Opitutaceae;D_5__Opitutus;D_6__uncultured Verrucomicrobia bacterium,D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Opitutales;D_4__Opitutaceae;__;__,D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;D_6__uncultured bacterium,D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;__,D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Verrucomicrobiaceae;D_5__uncultured;D_6__uncultured bacterium,D_0__Bacteria;D_1__WPS-2;D_2__metagenome;D_3__metagenome;D_4__metagenome;D_5__metagenome;D_6__metagenome,D_0__Bacteria;__;__;__;__;__;__,Unassigned;__;__;__;__;__;__
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
141,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
142,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.0,0.0
144,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0,0.0
145,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
146,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
446,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,70.0,0.0
447,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
448,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
449,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Set abundance to zero for taxa below LOD defined by # molecules input into amplification rxn or sequencing 50% CV threshold
This is defined as the load at which there should be a 95% chance of one copy being loaded into the amplification reaction (3 copy average).

In [17]:
df_species_lod_filter = pd.DataFrame()

for col in df_seq_samples.columns:
    df_species_lod_filter[col] = df_seq_samples.apply(lambda x: x[col] if x[col]>lod_dict[x.name] else 0, axis=1)
    
# Remove columns (taxa) that have zero counts after filtering
df_species_lod_filter = df_species_lod_filter[df_species_lod_filter.sum(axis=1)>0]

# Remove rows (samples) that have zero counts after filtering
df_species_lod_filter = df_species_lod_filter.loc[:, (df_species_lod_filter != 0).any(axis=0)]
df_species_lod_filter

Unnamed: 0_level_0,D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Actinomycetales;D_4__Actinomycetaceae;D_5__F0332;D_6__uncultured bacterium,D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Bifidobacteriales;D_4__Bifidobacteriaceae;D_5__Alloscardovia;D_6__Bifidobacterium longum subsp. longum,D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Bifidobacteriales;D_4__Bifidobacteriaceae;D_5__Scardovia;D_6__unidentified,D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;D_6__Corynebacterium kroppenstedtii,D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;D_6__Corynebacterium pseudodiphtheriticum,D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;__,D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium;D_6__Corynebacterium durum,D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium;__,D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Lawsonella;D_6__uncultured bacterium,D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;__;__,...,D_0__Bacteria;D_1__Synergistetes;D_2__Synergistia;D_3__Synergistales;D_4__Synergistaceae;D_5__Pyramidobacter;D_6__Pyramidobacter piscolens,D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__Firmicutes oral clone FM046;D_5__Firmicutes oral clone FM046;D_6__Firmicutes oral clone FM046,D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__gut metagenome;D_5__gut metagenome;D_6__gut metagenome,D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__uncultured bacterium;D_5__uncultured bacterium;D_6__uncultured bacterium,D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;__;__;__,D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mycoplasmatales;D_4__Mycoplasmataceae;D_5__Mycoplasma;D_6__Mycoplasma salivarium ATCC 23064,D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mycoplasmatales;D_4__Mycoplasmataceae;D_5__Mycoplasma;__,D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;D_6__uncultured bacterium,D_0__Bacteria;__;__;__;__;__;__,Unassigned;__;__;__;__;__;__
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
141,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
142,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,328.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
144,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0,0.0
145,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
146,0.0,11.0,93.0,0.0,0.0,0.0,75.0,18.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
446,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
447,0.0,0.0,13.0,0.0,0.0,0.0,18.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
448,0.0,0.0,13.0,0.0,0.0,0.0,14.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
449,0.0,0.0,715.0,0.0,0.0,0.0,1306.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Determine which samples (if any) were filtered out

In [18]:
orig_indexes = df_seq_samples.index.tolist()
filter_indexes = df_species_lod_filter.index.tolist()

lost = list(set(set(orig_indexes) - set(filter_indexes)))

In [19]:
df_total_load[df_total_load['Sample'].isin(lost)]

Unnamed: 0,Well,Concentration,PoissonConfMax,PoissonConfMin,Total,Positives,Primer,Sample,Dilution,Corrected Concentration,...,Weight (mL),Weight (True/False),Corrected Weight (mL),Copies/mL,Log Copies/mL,Seq_Dilution,Copies in Amp Rxn,Rel. Abundance LOD (%),Abs. Abundance LOD,Rel. Abundance LOD (%) Corrected


### Generate dataframes for each taxonomy level

In [20]:
def collapse_taxonomy(_df, level):
    collapsed_dict = {}
    index=0
    
    # Evaluate the selected taxonomy level to collapse to
    if level == 'Genus':
        index = -1
    elif level == 'Family':
        index = -2
    elif level == 'Order':
        index = -3
    elif level == 'Class':
        index = -4
    elif level == 'Phylum':
        index = -5
    else:
        raise ValueError('Could not interpret taxonomy level. Please use (Phylum, Class, Order, Family, Genus)')

    # Iterate through columns adding values together for each sample if the new column name already exists
    for col in _df:
        new_col = ";".join(col.split(';')[:index])

        if new_col in collapsed_dict.keys():
            collapsed_dict[new_col] += np.array(_df[col])
        else:
            collapsed_dict[new_col] = np.array(_df[col])

    df_collapsed = pd.DataFrame.from_dict(collapsed_dict).set_index(_df.index)
    return df_collapsed

In [21]:
df_lod_list = [None]*6

df_lod_list[0] = collapse_taxonomy(df_species_lod_filter, 'Phylum')
df_lod_list[1] = collapse_taxonomy(df_species_lod_filter, 'Class')
df_lod_list[2] = collapse_taxonomy(df_species_lod_filter, 'Order')
df_lod_list[3] = collapse_taxonomy(df_species_lod_filter, 'Family')
df_lod_list[4] = collapse_taxonomy(df_species_lod_filter, 'Genus')
df_lod_list[5] = df_species_lod_filter

### Generate relative and absolute abundance tables

In [22]:
df_rel_lod_list = [None]*6
df_abs_lod_list = [None]*6

df_pseudo_rel_lod_list = [None]*6
df_pseudo_abs_lod_list = [None]*6

for index, df in enumerate(df_lod_list):
    df_rel_lod_list[index] = df.div(read_depth, axis=0).multiply(100)
    df_abs_lod_list[index] = df_rel_lod_list[index].apply(lambda x: x*total_load_dict[x.name], 1).div(100)
    
    df_pseudo_rel_lod_list[index] = df_rel_lod_list[index]+(0.1/read_depth)*100
    #df_pseudo_abs_lod_list[index] = df_pseudo_rel_lod_list[index].multiply(1e4).div(100)
    df_pseudo_abs_lod_list[index] = df_pseudo_rel_lod_list[index].apply(lambda x: x*total_load_dict[x.name], 1).div(100)

df_pseudo_abs_lod_list[5]

Unnamed: 0_level_0,D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Actinomycetales;D_4__Actinomycetaceae;D_5__F0332;D_6__uncultured bacterium,D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Bifidobacteriales;D_4__Bifidobacteriaceae;D_5__Alloscardovia;D_6__Bifidobacterium longum subsp. longum,D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Bifidobacteriales;D_4__Bifidobacteriaceae;D_5__Scardovia;D_6__unidentified,D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;D_6__Corynebacterium kroppenstedtii,D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;D_6__Corynebacterium pseudodiphtheriticum,D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;__,D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium;D_6__Corynebacterium durum,D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium;__,D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Lawsonella;D_6__uncultured bacterium,D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;__;__,...,D_0__Bacteria;D_1__Synergistetes;D_2__Synergistia;D_3__Synergistales;D_4__Synergistaceae;D_5__Pyramidobacter;D_6__Pyramidobacter piscolens,D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__Firmicutes oral clone FM046;D_5__Firmicutes oral clone FM046;D_6__Firmicutes oral clone FM046,D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__gut metagenome;D_5__gut metagenome;D_6__gut metagenome,D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__uncultured bacterium;D_5__uncultured bacterium;D_6__uncultured bacterium,D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;__;__;__,D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mycoplasmatales;D_4__Mycoplasmataceae;D_5__Mycoplasma;D_6__Mycoplasma salivarium ATCC 23064,D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mycoplasmatales;D_4__Mycoplasmataceae;D_5__Mycoplasma;__,D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;D_6__uncultured bacterium,D_0__Bacteria;__;__;__;__;__;__,Unassigned;__;__;__;__;__;__
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
141,0.542056,0.542056,0.542056,0.542056,0.542056,0.542056,5.420561e-01,0.542056,0.542056,0.542056,...,0.542056,0.542056,0.542056,0.542056,0.542056,0.542056,0.542056,0.542056,0.542056,0.542056
142,0.256053,0.256053,0.256053,0.256053,0.256053,0.256053,2.560533e-01,0.256053,0.256053,0.256053,...,0.256053,840.110790,0.256053,0.256053,0.256053,0.256053,0.256053,0.256053,0.256053,0.256053
144,2.677622,2.677622,2.677622,2.677622,2.677622,2.677622,2.677622e+00,2.677622,2.677622,2.677622,...,2.677622,2.677622,2.677622,2.677622,2.677622,2.677622,2.677622,2.677622,672.083146,2.677622
145,23.215800,23.215800,23.215800,23.215800,23.215800,23.215800,2.321580e+01,23.215800,23.215800,23.215800,...,23.215800,23.215800,23.215800,23.215800,23.215800,23.215800,23.215800,23.215800,23.215800,23.215800
146,1.601897,177.810588,1491.366284,1.601897,1.601897,1.601897,1.203025e+03,289.943391,1.601897,1.601897,...,1.601897,1.601897,1.601897,1.601897,1.601897,1.601897,1.601897,1.601897,1.601897,1.601897
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
446,1.573802,1.573802,1.573802,1.573802,1.573802,1.573802,1.573802e+00,1.573802,1.573802,1.573802,...,1.573802,1.573802,1.573802,1.573802,1.573802,1.573802,1.573802,1.573802,1.573802,1.573802
447,28.031160,28.031160,3672.081944,28.031160,28.031160,28.031160,5.073640e+03,28.031160,28.031160,28.031160,...,28.031160,28.031160,28.031160,28.031160,28.031160,28.031160,28.031160,28.031160,28.031160,28.031160
448,9.311661,9.311661,1219.827534,9.311661,9.311661,9.311661,1.312944e+03,9.311661,9.311661,9.311661,...,9.311661,9.311661,9.311661,9.311661,9.311661,9.311661,9.311661,9.311661,9.311661,9.311661
449,1.855765,1.855765,13270.577008,1.855765,1.855765,1.855765,2.423815e+04,1.855765,1.855765,1.855765,...,1.855765,1.855765,1.855765,1.855765,1.855765,1.855765,1.855765,1.855765,1.855765,1.855765


### Transform column taxa names into unique IDs.
This overcomes downstream issue when multiple columns have the same name

In [23]:
df_col_names_lod_list = [None]*6

for index, df in enumerate(df_rel_lod_list):
    num_cols = len(df.columns)
    col_names = ['ASV' + str(x) for x in range(num_cols)]
    df_col_names_lod_list[index] = pd.DataFrame(index=col_names, data={'taxonomy':df.columns.tolist()})
    
    df_rel_lod_list[index].columns = col_names
    df_abs_lod_list[index].columns = col_names
    
    df_pseudo_rel_lod_list[index].columns = col_names
    df_pseudo_abs_lod_list[index].columns = col_names
    
df_col_names_lod_list[0]

Unnamed: 0,taxonomy
ASV0,D_0__Bacteria;D_1__Actinobacteria
ASV1,D_0__Bacteria;D_1__Bacteroidetes
ASV2,D_0__Bacteria;D_1__Chloroflexi
ASV3,D_0__Bacteria;D_1__Cyanobacteria
ASV4,D_0__Bacteria;D_1__Epsilonbacteraeota
ASV5,D_0__Bacteria;D_1__Firmicutes
ASV6,D_0__Bacteria;D_1__Fusobacteria
ASV7,D_0__Bacteria;D_1__Patescibacteria
ASV8,D_0__Bacteria;D_1__Proteobacteria
ASV9,D_0__Bacteria;D_1__Spirochaetes


### Generate shorter taxonomy names for plotting purposes

In [24]:
exclusion_list = ['', 'uncultured bacterium', 'metagenome', 'uncultured', 
                  'gut metagenome', 'uncultured organism', 'unidentified', 
                  'uncultured Bacteroidales bacterium', 'uncultured Mollicutes bacterium', 'uncultured archaeon']

for i in range(6):
    if i == 0:
        df_col_names_lod_list[i][['Kingdom', 'Phylum']] = df_col_names_lod_list[i]['taxonomy'].str.split(';', expand=True)
    elif i==1:
        df_col_names_lod_list[i][['Kingdom', 'Phylum', 'Class']] = df_col_names_lod_list[i]['taxonomy'].str.split(';', expand=True)
    elif i==2:
        df_col_names_lod_list[i][['Kingdom', 'Phylum', 'Class', 'Order']] = df_col_names_lod_list[i]['taxonomy'].str.split(';', expand=True)
    elif i==3:
        df_col_names_lod_list[i][['Kingdom', 'Phylum', 'Class', 'Order', 'Family']] = df_col_names_lod_list[i]['taxonomy'].str.split(';', expand=True)
    elif i==4:
        df_col_names_lod_list[i][['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus']] = df_col_names_lod_list[i]['taxonomy'].str.split(';', expand=True)
    else:
        df_col_names_lod_list[i][['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']] = df_col_names_lod_list[i]['taxonomy'].str.split(';', expand=True)
        
    labels_list = []
    for index, row in df_col_names_lod_list[i].iterrows():
        # Species
        if row[-1][5:] in exclusion_list:
            # Genus
            if row[-2][5:] in exclusion_list:
                # Family
                if row[-3][5:] in exclusion_list:
                    # Order
                    if row[-4][5:] in exclusion_list:
                        # Class
                        if row[-5][5:] in exclusion_list:
                            # Phylum
                            if row[-6][5:] in exclusion_list:
                                labels_list.append(row[-7][5:] + '(' + df_col_names_lod_list[i].columns[-7][0].lower() + ')')
                            else:
                                labels_list.append(row[-6][5:] + '(' + df_col_names_lod_list[i].columns[-6][0].lower() + ')')
                        else:
                            labels_list.append(row[-5][5:] + '(' + df_col_names_lod_list[i].columns[-5][0].lower() + ')')
                    else:
                        labels_list.append(row[-4][5:] + '(' + df_col_names_lod_list[i].columns[-4][0].lower() + ')')
                else:
                    labels_list.append(row[-3][5:] + '(' + df_col_names_lod_list[i].columns[-3][0].lower() + ')')
            else:
                labels_list.append(row[-2][5:] + '(' + df_col_names_lod_list[i].columns[-2][0].lower() + ')')
        else:
            labels_list.append(row[-1][5:] + '(' + df_col_names_lod_list[i].columns[-1][0].lower() + ')')
                                   
    df_col_names_lod_list[i]['label'] = labels_list

### Sort the columns by the max abundance of taxa across all samples

In [25]:
df_rel_sort_lod_list = [None]*6
df_abs_sort_lod_list = [None]*6

df_pseudo_rel_sort_lod_list = [None]*6
df_pseudo_abs_sort_lod_list = [None]*6

for i in range(6):
    taxa_sorted = df_abs_lod_list[i].mean().sort_values(ascending=False).index
    
    df_rel_sort_lod_list[i] = df_rel_lod_list[i].loc[:, taxa_sorted]
    df_abs_sort_lod_list[i] = df_abs_lod_list[i].loc[:, taxa_sorted]
    
    df_pseudo_rel_sort_lod_list[i] = df_pseudo_rel_lod_list[i].loc[:, taxa_sorted]
    df_pseudo_abs_sort_lod_list[i] = df_pseudo_abs_lod_list[i].loc[:, taxa_sorted]
    
df_abs_sort_lod_list[4]

Unnamed: 0_level_0,ASV76,ASV259,ASV255,ASV191,ASV247,ASV29,ASV188,ASV241,ASV61,ASV81,...,ASV256,ASV111,ASV262,ASV89,ASV226,ASV269,ASV151,ASV229,ASV180,ASV31
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
141,2.577477e+04,7252.710875,0.000000e+00,1.036953e+04,0.000000e+00,76511.221224,3.498972e+04,3908.224619,6.016823e+02,0.000000e+00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,54.205612
142,3.738122e+04,0.000000,0.000000e+00,1.258758e+04,0.000000e+00,6001.888725,1.603150e+04,0.000000,3.774225e+03,0.000000e+00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
144,1.877013e+04,267.762210,2.583905e+04,0.000000e+00,1.155207e+06,0.000000,2.677622e+02,0.000000,9.103915e+02,2.945384e+02,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
145,1.950127e+04,808142.011487,5.256522e+06,2.321580e+03,0.000000e+00,0.000000,1.085107e+06,3018.054050,2.785896e+03,1.798064e+06,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
146,3.716241e+05,17124.280966,5.446450e+02,1.728447e+04,0.000000e+00,8538.112025,3.996733e+04,20087.790768,2.745652e+04,0.000000e+00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
446,3.187421e+05,13817.980372,0.000000e+00,5.738082e+04,0.000000e+00,38778.477945,6.306224e+04,19121.692655,3.322296e+04,0.000000e+00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
447,8.643969e+06,22424.927902,0.000000e+00,9.110127e+04,0.000000e+00,58865.435744,7.904787e+05,15977.761130,5.894953e+05,0.000000e+00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
448,1.287523e+06,290989.392571,0.000000e+00,8.101145e+04,0.000000e+00,772774.710062,3.945351e+05,112671.092803,4.614859e+05,0.000000e+00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
449,4.342119e+05,83991.933352,0.000000e+00,2.551677e+04,0.000000e+00,0.000000,3.099128e+03,27780.805176,3.305118e+04,0.000000e+00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000


### Save the working files to allow use in individual analysis workbooks

In [26]:
pickle.dump(df_rel_sort_lod_list, open('pickle_files/rel_sort_lod_list.pkl', 'wb'))
pickle.dump(df_abs_sort_lod_list, open('pickle_files/abs_sort_lod_list.pkl', 'wb'))

pickle.dump(df_pseudo_rel_sort_lod_list, open('pickle_files/pseudo_rel_sort_lod_list.pkl', 'wb'))
pickle.dump(df_pseudo_abs_sort_lod_list, open('pickle_files/pseudo_abs_sort_lod_list.pkl', 'wb'))

pickle.dump(df_col_names_lod_list, open('pickle_files/col_names_lod_list.pkl', 'wb'))
pickle.dump(df_total_load, open('pickle_files/total_load_duodenum.pkl', 'wb'))
pickle.dump(seq_metadata, open('pickle_files/seq_duodenum_metadata.pkl', 'wb'))

In [34]:
df_col_names_lod_list[5].loc[df_abs_sort_lod_list[5].columns[:10].tolist()]

Unnamed: 0,taxonomy,Kingdom,Phylum,Class,Order,Family,Genus,Species,label
ASV193,D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3...,D_0__Bacteria,D_1__Firmicutes,D_2__Bacilli,D_3__Lactobacillales,D_4__Streptococcaceae,D_5__Streptococcus,__,Streptococcus(g)
ASV501,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,D_0__Bacteria,D_1__Proteobacteria,D_2__Gammaproteobacteria,D_3__Pasteurellales,D_4__Pasteurellaceae,D_5__Haemophilus,__,Haemophilus(g)
ASV494,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,D_0__Bacteria,D_1__Proteobacteria,D_2__Gammaproteobacteria,D_3__Enterobacteriales,D_4__Enterobacteriaceae,__,__,Enterobacteriaceae(f)
ASV404,D_0__Bacteria;D_1__Fusobacteria;D_2__Fusobacte...,D_0__Bacteria,D_1__Fusobacteria,D_2__Fusobacteriia,D_3__Fusobacteriales,D_4__Fusobacteriaceae,D_5__Fusobacterium,__,Fusobacterium(g)
ASV486,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,D_0__Bacteria,D_1__Proteobacteria,D_2__Gammaproteobacteria,D_3__Enterobacteriales,D_4__Enterobacteriaceae,D_5__Escherichia-Shigella,__,Escherichia-Shigella(g)
ASV397,D_0__Bacteria;D_1__Firmicutes;D_2__Negativicut...,D_0__Bacteria,D_1__Firmicutes,D_2__Negativicutes,D_3__Selenomonadales,D_4__Veillonellaceae,D_5__Veillonella,__,Veillonella(g)
ASV477,D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...,D_0__Bacteria,D_1__Proteobacteria,D_2__Gammaproteobacteria,D_3__Betaproteobacteriales,D_4__Neisseriaceae,D_5__Neisseria,D_6__uncultured bacterium,Neisseria(g)
ASV67,D_0__Bacteria;D_1__Bacteroidetes;D_2__Bacteroi...,D_0__Bacteria,D_1__Bacteroidetes,D_2__Bacteroidia,D_3__Bacteroidales,D_4__Prevotellaceae,D_5__Prevotella 7,D_6__Prevotella melaninogenica,Prevotella melaninogenica(s)
ASV155,D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3...,D_0__Bacteria,D_1__Firmicutes,D_2__Bacilli,D_3__Bacillales,D_4__Family XI,D_5__Gemella,__,Gemella(g)
ASV206,D_0__Bacteria;D_1__Firmicutes;D_2__Clostridia;...,D_0__Bacteria,D_1__Firmicutes,D_2__Clostridia,D_3__Clostridiales,D_4__Clostridiaceae 1,D_5__Clostridium sensu stricto 1,__,Clostridium sensu stricto 1(g)
