In [1]:

import numpy as np
import pandas as pd
import pickle

Set the number of metadata columns in the sequencing data¶

In [2]:

num_metadata_cols = 3

Load in read count data (Qiime2 taxa barplot csv files)¶

Samples were subsampled to 45,386 reads. Samples with less than this number of reads after DADA2 processing were removed.

In [3]:

df_seq_orig_species = pd.read_csv('data_files/species_counts_paired_43922.csv').set_index('index')

read_depth = df_seq_orig_species[df_seq_orig_species.columns[:-num_metadata_cols]].sum(axis=1)[0]

In [4]:

read_depth

Out[4]:

In [5]:

# This taxa was only in second batch of sequenced duodenum samples likely indicating it is a contaminant. It is removed because
# it interferes with a plot comparing saliva to duodenum samples.
df_seq_orig_species.drop(['D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3__Bacillales;D_4__Paenibacillaceae;D_5__Paenibacillus;D_6__Paenibacillus darwinianus'], axis=1, inplace=True)

Load in absolute abundance data (dPCR)¶

In [6]:

df_total_load = pd.read_excel('dPCR data/dPCR_total_loads_paired.xlsx', index_col=0)
df_total_load['Sample'] = df_total_load['Sample'].astype(str)

Normalize concentration to the input volume¶

Several saliva volumes (weights) were missing. The volume was set to the average of all other saliva samples.
Stool samples did not have weights. These were approximated. The median collected weight is 0.5g of stool and these are collected in Omnigut tubes which contain 2mL of fluid. 1/8 of this volume is used for extraction. Therefore the weight of stool was set to 0.5/8 (0.0625g).

In [7]:

## Filter out samples below LLOQ
df_total_load = df_total_load[df_total_load['Concentration']>2]

## Calculate Copies/mL
df_total_load['Copies/mL'] = df_total_load['Corrected Concentration']/df_total_load['Weight']
df_total_load['Log Copies/mL'] = np.log10(df_total_load['Copies/mL'])

Calculate LOD in terms of absolute abundance and relative abundance, 95% confidence of the template being added to the sample (3 copy input)¶

In [8]:

# These samples were diluted before placing sample in library reaction due to inhibitors preventing amplification in undiluted sample
seq_dilutions = pd.read_csv('dPCR data/seq_dilution_paired.csv')
seq_dilutions_dict = {seq_dilutions['mod_ID'].iloc[i] : seq_dilutions['Seq_Dilution'].iloc[i] for i in range(len(seq_dilutions))}


# Create column to account for the fact that some samples were diluted before input into library prep reaction
df_total_load['Seq_Dilution'] = df_total_load.apply(lambda x: seq_dilutions_dict[x['mod_ID']], axis=1)

In [9]:

# uL added to the amplification rxn
seq_volume = 3.5
copy_input_threshold = 3

df_total_load['Copies in Amp Rxn'] = df_total_load['Concentration']*df_total_load['Dilution']/df_total_load['Seq_Dilution']*seq_volume
df_total_load['Rel. Abundance LOD (%)'] = copy_input_threshold/df_total_load['Copies in Amp Rxn']*100
df_total_load['Abs. Abundance LOD'] = df_total_load['Rel. Abundance LOD (%)']*df_total_load['Copies/mL']/100
df_total_load

Out[9]:

.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } Well Concentration PoissonConfMax PoissonConfMin Total Positives Primer Sample Type Dilution Weight Corrected Concentration 16S Copies/g mod_ID Copies/mL Log Copies/mL Seq_Dilution Copies in Amp Rxn Rel. Abundance LOD (%) Abs. Abundance LOD 0 A05 1633.0 1666.0 1617.0 15457 11600 mod_Caporaso 387 Duodenum 10 0.397 1.633000e+06 4.113350e+06 387_Duo 4.113350e+06 6.614196 1 57155.0 0.005249 215.905002 1 B05 2093.0 2135.0 2073.0 15484 12871 mod_Caporaso 388 Duodenum 10 1.105 2.093000e+06 1.894118e+06 388_Duo 1.894118e+06 6.277407 1 73255.0 0.004095 77.569489 2 C05 6.1 7.5 5.5 17225 89 mod_Caporaso 390 Duodenum 10 0.555 6.100000e+03 1.099099e+04 390_Duo 1.099099e+04 4.041037 1 213.5 1.405152 154.440154 3 D05 4.9 6.1 4.3 16945 70 mod_Caporaso 391 Duodenum 10 0.308 4.900000e+03 1.590909e+04 391_Duo 1.590909e+04 4.201645 1 171.5 1.749271 278.293135 4 E05 571.0 585.0 563.0 15789 6068 mod_Caporaso 392 Duodenum 10 0.365 5.710000e+05 1.564384e+06 392_Duo 1.564384e+06 6.194343 1 19985.0 0.015011 234.833659 6 G05 2582.0 2634.0 2556.0 16087 14295 mod_Caporaso 409 Duodenum 10 1.825 2.582000e+06 1.414795e+06 409_Duo 1.414795e+06 6.150693 1 90370.0 0.003320 46.966732 8 A06 3290.0 3370.0 3250.0 14354 13478 mod_Caporaso 417 Duodenum 10 0.279 3.290000e+06 1.179211e+07 417_Duo 1.179211e+07 7.071592 1 115150.0 0.002605 307.219662 9 B06 2988.0 3054.0 2956.0 15026 13841 mod_Caporaso 418 Duodenum 10 0.202 2.988000e+06 1.479208e+07 418_Duo 1.479208e+07 7.170029 1 104580.0 0.002869 424.328147 10 C06 1612.0 1644.0 1596.0 15631 11660 mod_Caporaso 423 Duodenum 10 0.479 1.612000e+06 3.365344e+06 423_Duo 3.365344e+06 6.527030 100 564.2 0.531726 17894.422905 11 D06 651.0 668.0 643.0 15261 6489 mod_Caporaso 425 Duodenum 10 1.802 6.510000e+05 3.612653e+05 425_Duo 3.612653e+05 5.557826 1 22785.0 0.013167 47.566196 12 E06 154.0 161.0 150.0 15643 1916 mod_Caporaso 433 Duodenum 10 1.205 1.540000e+05 1.278008e+05 433_Duo 1.278008e+05 5.106534 1 5390.0 0.055659 71.132187 13 F06 3320.0 3390.0 3280.0 15130 14228 mod_Caporaso 434 Duodenum 10 0.781 3.320000e+06 4.250960e+06 434_Duo 4.250960e+06 6.628487 1 116200.0 0.002582 109.749406 15 H06 3050.0 3130.0 3020.0 13916 12879 mod_Caporaso 438 Duodenum 10 0.415 3.050000e+06 7.349398e+06 438_Duo 7.349398e+06 6.866252 10 10675.0 0.028103 2065.404475 16 A07 1374.0 1404.0 1359.0 13580 9357 mod_Caporaso 441 Duodenum 10 0.355 1.374000e+06 3.870423e+06 441_Duo 3.870423e+06 6.587758 10 4809.0 0.062383 2414.486922 17 B07 385.0 396.0 379.0 16380 4569 mod_Caporaso 446 Duodenum 10 0.539 3.850000e+05 7.142857e+05 446_Duo 7.142857e+05 5.853872 10 1347.5 0.222635 1590.246488 19 D07 3740.0 3830.0 3700.0 15466 14823 mod_Caporaso 448 Duodenum 10 0.712 3.740000e+06 5.252809e+06 448_Duo 5.252809e+06 6.720392 10 13090.0 0.022918 1203.852327 20 E07 1164.0 1189.0 1152.0 15335 9635 mod_Caporaso 449 Duodenum 10 1.382 1.164000e+06 8.422576e+05 449_Duo 8.422576e+05 5.925445 10 4074.0 0.073638 620.219144 0 A01 878.0 898.0 858.0 14732 7745 mod_Caporaso 410 Duodenum 500 0.555 4.390000e+07 7.909910e+07 410_Duo 7.909910e+07 7.898172 1 1536500.0 0.000195 154.440154 1 B01 685.0 702.0 669.0 14865 6564 mod_Caporaso 437 Duodenum 500 0.629 3.425000e+07 5.445151e+07 437_Duo 5.445151e+07 7.736010 10 119875.0 0.002503 1362.707245 2 C01 229.0 238.0 221.0 16076 2848 mod_Caporaso 447 Duodenum 500 0.900 1.145000e+07 1.272222e+07 447_Duo 1.272222e+07 7.104563 10 40075.0 0.007486 952.380952 3 D01 5810.0 6060.0 5590.0 13577 13480 mod_Caporaso 451 Duodenum 500 0.605 2.905000e+08 4.801653e+08 451_Duo 4.801653e+08 8.681391 10 1016750.0 0.000295 1416.765053 24 A04 663.0 679.0 647.0 15761 6787 mod_Caporaso 387 Saliva 10000 0.780 6.630000e+08 8.500000e+08 387_Sal 8.500000e+08 8.929419 10 2320500.0 0.000129 1098.901099 25 B04 1418.0 1447.0 1389.0 14972 10486 mod_Caporaso 388 Saliva 1000 0.780 1.418000e+08 1.817949e+08 388_Sal 1.817949e+08 8.259582 10 496300.0 0.000604 1098.901099 26 C04 1001.0 1023.0 980.0 15088 8647 mod_Caporaso 390 Saliva 5000 0.780 5.005000e+08 6.416667e+08 390_Sal 6.416667e+08 8.807309 10 1751750.0 0.000171 1098.901099 27 D04 434.0 446.0 421.0 15930 4911 mod_Caporaso 391 Saliva 1000 0.780 4.340000e+07 5.564103e+07 391_Sal 5.564103e+07 7.745395 10 151900.0 0.001975 1098.901099 28 E04 535.0 551.0 520.0 12812 4684 mod_Caporaso 392 Saliva 10000 0.780 5.350000e+08 6.858974e+08 392_Sal 6.858974e+08 8.836259 10 1872500.0 0.000160 1098.901099 30 G04 811.0 829.0 793.0 15561 7750 mod_Caporaso 409 Saliva 50000 1.000 4.055000e+09 4.055000e+09 409_Sal 4.055000e+09 9.607991 10 14192500.0 0.000021 857.142857 31 H04 895.0 915.0 875.0 14848 7907 mod_Caporaso 410 Saliva 50000 1.200 4.475000e+09 3.729167e+09 410_Sal 3.729167e+09 9.571612 10 15662500.0 0.000019 714.285714 32 A05 657.0 673.0 641.0 15284 6537 mod_Caporaso 417 Saliva 10000 0.700 6.570000e+08 9.385714e+08 417_Sal 9.385714e+08 8.972467 10 2299500.0 0.000130 1224.489796 33 B05 961.0 982.0 940.0 15238 8506 mod_Caporaso 418 Saliva 5000 1.000 4.805000e+08 4.805000e+08 418_Sal 4.805000e+08 8.681693 10 1681750.0 0.000178 857.142857 34 C05 2283.0 2330.0 2237.0 14605 12507 mod_Caporaso 423 Saliva 10000 0.500 2.283000e+09 4.566000e+09 423_Sal 4.566000e+09 9.659536 10 7990500.0 0.000038 1714.285714 35 D05 1610.0 1643.0 1577.0 14344 10693 mod_Caporaso 425 Saliva 10000 1.000 1.610000e+09 1.610000e+09 425_Sal 1.610000e+09 9.206826 10 5635000.0 0.000053 857.142857 37 F05 1279.0 1306.0 1252.0 14287 9469 mod_Caporaso 433 Saliva 5000 0.780 6.395000e+08 8.198718e+08 433_Sal 8.198718e+08 8.913746 10 2238250.0 0.000134 1098.901099 38 G05 1176.0 1201.0 1152.0 15113 9551 mod_Caporaso 434 Saliva 1000 0.750 1.176000e+08 1.568000e+08 434_Sal 1.568000e+08 8.195346 10 411600.0 0.000729 1142.857143 40 A06 1614.0 1651.0 1577.0 11393 8503 mod_Caporaso 437 Saliva 10000 0.750 1.614000e+09 2.152000e+09 437_Sal 2.152000e+09 9.332842 10 5649000.0 0.000053 1142.857143 41 B06 383.0 396.0 371.0 12849 3573 mod_Caporaso 438 Saliva 5000 0.500 1.915000e+08 3.830000e+08 438_Sal 3.830000e+08 8.583199 10 670250.0 0.000448 1714.285714 42 C06 443.0 456.0 429.0 13892 4356 mod_Caporaso 441 Saliva 5000 0.800 2.215000e+08 2.768750e+08 441_Sal 2.768750e+08 8.442284 10 775250.0 0.000387 1071.428571 43 D06 935.0 957.0 912.0 12614 6914 mod_Caporaso 446 Saliva 5000 1.000 4.675000e+08 4.675000e+08 446_Sal 4.675000e+08 8.669782 10 1636250.0 0.000183 857.142857 44 E06 541.0 557.0 526.0 12876 4749 mod_Caporaso 447 Saliva 5000 0.780 2.705000e+08 3.467949e+08 447_Sal 3.467949e+08 8.540073 10 946750.0 0.000317 1098.901099 45 F06 1894.0 1936.0 1853.0 12449 9960 mod_Caporaso 448 Saliva 10000 1.000 1.894000e+09 1.894000e+09 448_Sal 1.894000e+09 9.277380 10 6629000.0 0.000045 857.142857 46 G06 967.0 990.0 944.0 12630 7078 mod_Caporaso 449 Saliva 5000 0.500 4.835000e+08 9.670000e+08 449_Sal 9.670000e+08 8.985426 10 1692250.0 0.000177 1714.285714 47 H06 831.0 852.0 811.0 13101 6638 mod_Caporaso 451 Saliva 5000 0.780 4.155000e+08 5.326923e+08 451_Sal 5.326923e+08 8.726476 10 1454250.0 0.000206 1098.901099

Generate dictionary for easier downstream conversion of relative to absolute abundances¶

In [10]:

total_load_dict = {df_total_load['mod_ID'].iloc[i] : df_total_load['Copies/mL'].iloc[i] for i in range(len(df_total_load))}
len(total_load_dict)

Out[10]:

Determine LOD thresholds. If LOD from poisson loading > LOD from sequencing use the sequencing value. LOD from sequencing is based on a 50% CV from replicates (Fig 2d from quant-seq paper).¶

In [11]:

seq_lloq = 7.115*(read_depth**(-0.556))

df_total_load['Rel. Abundance LOD (%) Corrected'] = df_total_load['Rel. Abundance LOD (%)'].where(df_total_load['Rel. Abundance LOD (%)']>seq_lloq, seq_lloq)

In [12]:

lod_dict = {df_total_load['mod_ID'].iloc[i] : df_total_load['Rel. Abundance LOD (%) Corrected'].iloc[i]*read_depth/100 for i in range(len(df_total_load))}

Filter out samples without accurate total loads and store metadata in separate file¶

In [13]:

df_seq_samples = df_seq_orig_species[df_seq_orig_species.index.isin(total_load_dict.keys())][df_seq_orig_species.columns[:-1*num_metadata_cols]]

# This is num_metadata_cols-1 because we don't need the description column since it is already stored as the index
seq_metadata = df_seq_orig_species[df_seq_orig_species.columns[-1*(num_metadata_cols-1):]]
df_seq_samples

Out[13]:

.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } D_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobrevibacter;__ D_0__Archaea;D_1__Euryarchaeota;D_2__Thermoplasmata;D_3__Methanomassiliicoccales;D_4__Methanomethylophilaceae;D_5__uncultured;__ D_0__Bacteria;D_1__Acidobacteria;D_2__Blastocatellia (Subgroup 4);D_3__Blastocatellales;D_4__Blastocatellaceae;D_5__Blastocatella;__ D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Actinomycetales;D_4__Actinomycetaceae;D_5__Actinomyces;__ D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Actinomycetales;D_4__Actinomycetaceae;D_5__F0332;D_6__uncultured bacterium D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Actinomycetales;D_4__Actinomycetaceae;D_5__F0332;D_6__unidentified D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Bifidobacteriales;D_4__Bifidobacteriaceae;D_5__Alloscardovia;D_6__Bifidobacterium longum subsp. longum D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Bifidobacteriales;D_4__Bifidobacteriaceae;D_5__Bifidobacterium;__ D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Bifidobacteriales;D_4__Bifidobacteriaceae;D_5__Scardovia;D_6__unidentified D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;D_6__Corynebacterium coyleae ... D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__Firmicutes oral clone FM046;D_5__Firmicutes oral clone FM046;D_6__Firmicutes oral clone FM046 D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__gut metagenome;D_5__gut metagenome;D_6__gut metagenome D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__uncultured bacterium;D_5__uncultured bacterium;D_6__uncultured bacterium D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;__;__;__ D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mycoplasmatales;D_4__Mycoplasmataceae;D_5__Mycoplasma;D_6__Mycoplasma salivarium ATCC 23064 D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mycoplasmatales;D_4__Mycoplasmataceae;D_5__Mycoplasma;__ D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mycoplasmatales;D_4__Mycoplasmataceae;D_5__Ureaplasma;__ D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;D_6__uncultured bacterium D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;__ D_0__Bacteria;__;__;__;__;__;__ index 387_Duo 0 0 0 0 0 0 94 0 6 0 ... 0 0 0 0 0 0 0 0 0 0 387_Sal 0 0 0 0 0 0 276 1 0 0 ... 0 0 0 0 0 0 0 0 0 0 388_Duo 0 0 0 0 0 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 388_Sal 0 0 0 0 0 0 10 0 2 0 ... 0 0 0 0 0 0 0 0 0 0 390_Duo 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 390_Sal 0 0 0 0 2 0 26 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 391_Duo 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 391_Sal 0 0 0 0 4 0 0 0 9 0 ... 0 0 0 0 0 0 0 0 0 0 392_Duo 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 13 392_Sal 0 0 0 0 0 0 5 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 409_Duo 0 0 0 0 0 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0 409_Sal 0 0 0 0 3 0 0 0 1 0 ... 0 0 0 0 0 2 0 0 0 0 410_Duo 0 0 0 0 0 0 0 0 2 0 ... 0 0 0 0 0 0 0 0 0 0 410_Sal 0 0 0 5 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 417_Duo 0 0 0 0 0 0 0 0 0 0 ... 0 0 1274 0 111 129 0 0 0 0 417_Sal 0 0 0 0 0 0 0 0 0 0 ... 0 0 245 0 0 34 0 0 0 0 418_Duo 0 0 0 0 0 0 0 0 0 0 ... 8 0 171 0 0 0 0 0 0 0 418_Sal 0 0 0 0 0 0 0 0 0 0 ... 4 0 111 0 0 31 0 0 0 0 423_Duo 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 150 423_Sal 0 0 0 0 0 0 10 0 23 0 ... 0 0 0 0 9 0 0 0 0 0 425_Duo 0 0 3 0 0 0 0 0 2 0 ... 0 0 5 0 0 0 0 0 2 0 425_Sal 0 0 0 0 0 0 0 0 6 0 ... 0 0 1 0 0 0 0 0 0 0 433_Duo 0 0 0 0 1 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 433_Sal 0 0 0 0 24 4 0 0 33 0 ... 0 0 0 0 0 0 0 0 0 0 434_Duo 0 0 0 0 0 0 0 0 58 0 ... 0 0 0 0 0 0 0 0 0 10 434_Sal 0 0 0 0 0 0 0 0 144 0 ... 0 0 0 0 26 0 0 0 0 0 437_Duo 0 0 0 0 2 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 437_Sal 0 0 0 0 4 0 0 0 0 0 ... 0 0 0 0 6 0 0 0 0 0 438_Duo 0 0 0 0 0 0 33 0 519 0 ... 0 0 0 0 9 0 0 0 0 0 438_Sal 0 0 0 0 1 0 100 0 575 0 ... 0 0 0 0 67 0 3 0 0 0 441_Duo 0 0 0 0 0 0 0 0 4 0 ... 0 0 0 0 0 0 0 0 0 20 441_Sal 0 0 0 0 0 0 0 0 5 0 ... 0 0 0 0 0 0 0 0 0 0 446_Duo 0 0 0 0 0 0 0 0 25 0 ... 0 0 0 0 0 0 0 0 0 97 446_Sal 0 0 0 0 3 0 27 0 7 0 ... 0 0 0 0 0 0 0 0 0 0 447_Duo 0 0 0 0 0 0 0 0 12 0 ... 0 0 0 0 0 0 0 0 0 0 447_Sal 0 0 0 0 6 0 3 0 27 0 ... 0 0 0 0 1 0 0 0 0 0 448_Duo 0 0 0 0 0 0 0 0 10 0 ... 0 0 2 0 0 0 0 0 0 0 448_Sal 0 0 0 0 2 0 0 0 34 0 ... 0 0 0 0 0 0 0 0 0 0 449_Duo 0 0 0 0 3 0 0 0 707 0 ... 0 0 0 0 0 0 0 0 0 0 449_Sal 0 0 0 0 2 0 0 0 327 0 ... 0 0 0 0 0 0 0 0 0 0 451_Duo 0 0 0 0 0 0 0 0 0 0 ... 0 0 14 0 0 0 0 0 0 0 451_Sal 0 0 0 0 4 0 0 0 0 0 ... 1 0 33 0 0 0 0 0 0 0

42 rows × 755 columns

Set abundance to zero for taxa below LOD defined by # molecules input into amplification rxn or sequencing 50% CV threshold¶

This is defined as the load at which there should be a 95% chance of one copy being loaded into the amplification reaction (3 copy average).

In [14]:

df_species_lod_filter = pd.DataFrame()

for col in df_seq_samples.columns:
    df_species_lod_filter[col] = df_seq_samples.apply(lambda x: x[col] if x[col]>lod_dict[x.name] else 0, axis=1)
    
# Remove columns (taxa) that have zero counts after filtering
df_species_lod_filter = df_species_lod_filter[df_species_lod_filter.sum(axis=1)>0]

# Remove rows (samples) that have zero counts after filtering
df_species_lod_filter = df_species_lod_filter.loc[:, (df_species_lod_filter != 0).any(axis=0)]
df_species_lod_filter

Out[14]:

.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Actinomycetales;D_4__Actinomycetaceae;D_5__F0332;D_6__uncultured bacterium D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Bifidobacteriales;D_4__Bifidobacteriaceae;D_5__Alloscardovia;D_6__Bifidobacterium longum subsp. longum D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Bifidobacteriales;D_4__Bifidobacteriaceae;D_5__Scardovia;D_6__unidentified D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;D_6__Corynebacterium pseudodiphtheriticum D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium;D_6__Corynebacterium durum D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium;__ D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Micrococcales;D_4__Micrococcaceae;D_5__Rothia;D_6__uncultured bacterium D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Micrococcales;D_4__Micrococcaceae;D_5__Rothia;D_6__uncultured organism D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Micrococcales;D_4__Micrococcaceae;D_5__Rothia;__ D_0__Bacteria;D_1__Actinobacteria;D_2__Coriobacteriia;D_3__Coriobacteriales;D_4__Atopobiaceae;D_5__Atopobium;D_6__uncultured Actinomyces sp. ... D_0__Bacteria;D_1__Spirochaetes;D_2__Spirochaetia;D_3__Spirochaetales;D_4__Spirochaetaceae;D_5__Treponema 2;D_6__Treponema sp. canine oral taxon 087 D_0__Bacteria;D_1__Spirochaetes;D_2__Spirochaetia;D_3__Spirochaetales;D_4__Spirochaetaceae;D_5__Treponema 2;D_6__Treponema sp. canine oral taxon 201 D_0__Bacteria;D_1__Spirochaetes;D_2__Spirochaetia;D_3__Spirochaetales;D_4__Spirochaetaceae;D_5__Treponema 2;D_6__Treponema sp. oral taxon 230 D_0__Bacteria;D_1__Spirochaetes;D_2__Spirochaetia;D_3__Spirochaetales;D_4__Spirochaetaceae;D_5__Treponema 2;__ D_0__Bacteria;D_1__Synergistetes;D_2__Synergistia;D_3__Synergistales;D_4__Synergistaceae;D_5__Fretibacterium;D_6__uncultured Deferribacteraceae bacterium D_0__Bacteria;D_1__Synergistetes;D_2__Synergistia;D_3__Synergistales;D_4__Synergistaceae;D_5__Fretibacterium;__ D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__uncultured bacterium;D_5__uncultured bacterium;D_6__uncultured bacterium D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mycoplasmatales;D_4__Mycoplasmataceae;D_5__Mycoplasma;D_6__Mycoplasma salivarium ATCC 23064 D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mycoplasmatales;D_4__Mycoplasmataceae;D_5__Mycoplasma;__ D_0__Bacteria;__;__;__;__;__;__ index 387_Duo 0 94 0 26 15 22 179 0 1373 0 ... 0 0 0 0 0 0 0 0 0 0 387_Sal 0 276 0 0 33 45 100 0 1344 0 ... 0 0 0 0 0 0 0 0 0 0 388_Duo 0 0 0 0 0 0 761 0 108 0 ... 0 0 0 0 0 0 0 0 0 0 388_Sal 0 10 0 0 0 0 3280 0 439 0 ... 0 0 0 0 0 0 0 0 0 0 390_Duo 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 390_Sal 0 26 0 0 0 0 542 146 608 0 ... 0 0 0 0 0 0 0 0 0 0 391_Duo 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 391_Sal 0 0 9 0 0 19 131 35 331 0 ... 0 0 0 0 0 0 0 0 0 0 392_Duo 0 0 0 0 0 0 80 91 20 0 ... 0 0 0 0 0 0 0 0 0 13 392_Sal 0 0 0 0 16 10 322 50 97 0 ... 0 0 0 0 0 0 0 0 0 0 409_Duo 0 0 0 0 11 27 48 1837 212 11 ... 0 0 0 0 0 0 0 0 0 0 409_Sal 0 0 0 0 36 43 20 53 11 0 ... 0 0 0 0 0 0 0 0 0 0 410_Duo 0 0 0 0 34 0 2888 1356 0 0 ... 0 0 0 0 0 0 0 0 0 0 410_Sal 0 0 0 0 104 0 229 58 0 0 ... 0 0 0 0 0 0 0 0 0 0 417_Duo 0 0 0 0 0 0 0 0 429 0 ... 0 0 0 0 0 0 1274 111 129 0 417_Sal 0 0 0 0 102 0 31 0 49 0 ... 0 0 0 0 0 0 245 0 34 0 418_Duo 0 0 0 0 57 9 336 272 13 0 ... 0 0 0 0 0 0 171 0 0 0 418_Sal 0 0 0 0 218 14 57 44 0 0 ... 210 0 16 106 0 0 111 0 31 0 423_Duo 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 423_Sal 0 10 23 0 95 25 67 0 0 0 ... 0 0 0 0 0 0 0 9 0 0 425_Duo 0 0 0 0 0 0 144 61 310 0 ... 0 0 0 0 0 0 0 0 0 0 425_Sal 0 0 0 0 39 0 79 19 92 0 ... 0 0 0 0 0 0 0 0 0 0 433_Duo 0 0 0 0 0 0 74 0 495 0 ... 0 0 0 0 0 0 0 0 0 0 433_Sal 24 0 33 0 517 174 580 0 96 25 ... 0 0 0 0 0 0 0 0 0 0 434_Duo 0 0 58 0 57 30 731 0 0 13 ... 0 0 0 0 0 0 0 0 0 10 434_Sal 0 0 144 0 178 287 2723 0 0 221 ... 0 0 0 0 13 0 0 26 0 0 437_Duo 0 0 0 0 45 15 24 0 60 0 ... 0 0 0 0 0 0 0 0 0 0 437_Sal 0 0 0 0 74 44 50 0 30 0 ... 0 0 0 0 0 0 0 0 0 0 438_Duo 0 33 519 0 0 0 1080 0 0 0 ... 0 0 0 0 28 14 0 0 0 0 438_Sal 0 100 575 0 0 36 1432 0 0 65 ... 0 14 0 0 35 37 0 67 0 0 441_Duo 0 0 0 0 143 0 81 0 56 0 ... 0 0 0 0 0 0 0 0 0 0 441_Sal 0 0 0 0 362 0 138 0 39 9 ... 0 0 0 0 0 0 0 0 0 0 446_Duo 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 446_Sal 0 27 0 0 101 44 81 41 0 0 ... 0 0 0 0 0 0 0 0 0 0 447_Duo 0 0 12 0 18 0 95 0 184 12 ... 0 0 0 0 0 0 0 0 0 0 447_Sal 0 0 27 0 73 0 100 0 211 29 ... 0 0 0 0 0 0 0 0 0 0 448_Duo 0 0 0 0 13 0 0 158 0 0 ... 0 0 0 0 0 0 0 0 0 0 448_Sal 0 0 34 0 36 0 36 67 0 0 ... 0 0 0 0 0 0 0 0 0 0 449_Duo 0 0 707 0 1258 0 247 0 0 42 ... 0 0 0 0 0 0 0 0 0 0 449_Sal 0 0 327 0 647 119 274 0 0 24 ... 0 0 0 0 0 0 0 0 0 0 451_Duo 0 0 0 0 158 0 25 0 21 0 ... 0 0 0 0 0 0 14 0 0 0 451_Sal 0 0 0 0 150 0 81 0 13 0 ... 0 0 0 0 0 0 33 0 0 0

42 rows × 222 columns

Determine which samples (if any) were filtered out¶

In [15]:

orig_indexes = df_seq_samples.index.tolist()
filter_indexes = df_species_lod_filter.index.tolist()

lost = list(set(set(orig_indexes) - set(filter_indexes)))

In [16]:

df_total_load[df_total_load['Sample'].isin(lost)]

Out[16]:

.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } Well Concentration PoissonConfMax PoissonConfMin Total Positives Primer Sample Type Dilution ... Corrected Concentration 16S Copies/g mod_ID Copies/mL Log Copies/mL Seq_Dilution Copies in Amp Rxn Rel. Abundance LOD (%) Abs. Abundance LOD Rel. Abundance LOD (%) Corrected

0 rows × 21 columns

Generate dataframes for each taxonomy level¶

In [17]:

def collapse_taxonomy(_df, level):
    collapsed_dict = {}
    index=0
    
    # Evaluate the selected taxonomy level to collapse to
    if level == 'Genus':
        index = -1
    elif level == 'Family':
        index = -2
    elif level == 'Order':
        index = -3
    elif level == 'Class':
        index = -4
    elif level == 'Phylum':
        index = -5
    else:
        raise ValueError('Could not interprest taxonomy level. Please use (Phylum, Class, Order, Family, Genus)')

    # Iterate through columns adding values together for each sample if the new column name already exists
    for col in _df:
        new_col = ";".join(col.split(';')[:index])

        if new_col in collapsed_dict.keys():
            collapsed_dict[new_col] += np.array(_df[col])
        else:
            collapsed_dict[new_col] = np.array(_df[col])

    df_collapsed = pd.DataFrame.from_dict(collapsed_dict).set_index(_df.index)
    return df_collapsed

In [18]:

df_lod_list = [None]*6

df_lod_list[0] = collapse_taxonomy(df_species_lod_filter, 'Phylum')
df_lod_list[1] = collapse_taxonomy(df_species_lod_filter, 'Class')
df_lod_list[2] = collapse_taxonomy(df_species_lod_filter, 'Order')
df_lod_list[3] = collapse_taxonomy(df_species_lod_filter, 'Family')
df_lod_list[4] = collapse_taxonomy(df_species_lod_filter, 'Genus')
df_lod_list[5] = df_species_lod_filter

Generate relative and absolute abundance tables¶

In [19]:

df_rel_lod_list = [None]*6
df_abs_lod_list = [None]*6

df_pseudo_rel_lod_list = [None]*6
df_pseudo_abs_lod_list = [None]*6

for index, df in enumerate(df_lod_list):
    df_rel_lod_list[index] = df.div(read_depth, axis=0).multiply(100)
    df_abs_lod_list[index] = df_rel_lod_list[index].apply(lambda x: x*total_load_dict[x.name], 1).div(100)
    
    df_pseudo_rel_lod_list[index] = df_rel_lod_list[index]+(0.01/read_depth)*100
    df_pseudo_abs_lod_list[index] = df_pseudo_rel_lod_list[index].apply(lambda x: x*total_load_dict[x.name], 1).div(100)
    #df_pseudo_abs_lloq_list[index] = df_abs_lloq_list[index]+0.001

df_rel_lod_list[0]

Out[19]:

.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } D_0__Bacteria;D_1__Actinobacteria D_0__Bacteria;D_1__Bacteroidetes D_0__Bacteria;D_1__Epsilonbacteraeota D_0__Bacteria;D_1__Firmicutes D_0__Bacteria;D_1__Fusobacteria D_0__Bacteria;D_1__Patescibacteria D_0__Bacteria;D_1__Proteobacteria D_0__Bacteria;D_1__Spirochaetes D_0__Bacteria;D_1__Synergistetes D_0__Bacteria;D_1__Tenericutes D_0__Bacteria;__ index 387_Duo 4.066299 0.259551 0.000000 91.191203 4.314467 0.000000 0.027321 0.000000 0.000000 0.000000 0.000000 387_Sal 5.047584 13.530805 0.612449 63.291744 10.541414 0.000000 6.843951 0.000000 0.000000 0.000000 0.000000 388_Duo 2.026319 25.586267 0.020491 23.238924 4.312190 0.000000 44.686034 0.000000 0.000000 0.000000 0.000000 388_Sal 8.606165 9.821957 0.259551 66.408633 1.343290 0.000000 13.462502 0.000000 0.000000 0.000000 0.000000 390_Duo 0.000000 53.519876 0.000000 25.012522 2.265380 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 390_Sal 4.371386 22.765357 0.478120 65.529803 3.799918 0.000000 2.952962 0.000000 0.000000 0.000000 0.000000 391_Duo 0.000000 4.314467 0.000000 75.529347 1.983061 0.000000 4.159647 0.000000 0.000000 0.000000 0.000000 391_Sal 1.675698 27.389463 0.161650 56.208734 7.711397 0.000000 6.623105 0.000000 0.000000 0.000000 0.000000 392_Duo 0.607896 49.241838 0.182141 22.264469 14.762534 0.027321 12.610992 0.000000 0.000000 0.000000 0.029598 392_Sal 1.746277 20.481763 0.270935 58.883931 6.502436 0.000000 11.991713 0.000000 0.000000 0.000000 0.000000 409_Duo 5.935522 2.411092 0.000000 84.201539 6.616274 0.000000 0.562361 0.000000 0.000000 0.000000 0.000000 409_Sal 1.092846 46.359455 2.008105 43.697919 2.354173 0.000000 4.216566 0.000000 0.000000 0.000000 0.000000 410_Duo 11.012704 1.026820 0.266381 58.098447 3.708848 0.000000 25.786622 0.000000 0.000000 0.000000 0.000000 410_Sal 1.427531 40.943035 3.406038 45.437366 5.796639 0.000000 2.868722 0.000000 0.000000 0.000000 0.000000 417_Duo 1.054141 16.096717 0.025044 28.067939 43.206138 3.667866 2.347343 1.894267 0.000000 3.447020 0.000000 417_Sal 0.498611 25.984700 1.026820 36.671827 19.291016 3.986613 11.670689 0.025044 0.000000 0.635217 0.000000 418_Duo 1.652930 1.484450 0.000000 83.113246 11.538637 0.289149 1.204408 0.000000 0.000000 0.389327 0.000000 418_Sal 0.801421 21.836437 0.833295 36.926825 8.981831 1.076909 27.863030 0.965348 0.000000 0.323300 0.000000 423_Duo 0.000000 28.400346 0.000000 36.466919 26.182779 0.000000 3.676973 0.000000 0.000000 0.000000 0.000000 423_Sal 1.120168 45.883612 0.815081 41.881062 4.273485 0.000000 5.689632 0.000000 0.000000 0.020491 0.000000 425_Duo 1.518601 38.261008 0.040982 49.565138 6.588953 0.000000 3.620054 0.000000 0.000000 0.000000 0.000000 425_Sal 1.161149 44.132781 1.017713 38.388507 3.952461 0.000000 11.071900 0.000000 0.000000 0.000000 0.000000 433_Duo 1.295478 1.527708 0.000000 88.493238 1.930695 0.000000 5.527981 0.000000 0.000000 0.000000 0.000000 433_Sal 3.378717 12.474386 0.922089 55.842175 8.360275 0.000000 18.833386 0.075133 0.000000 0.000000 0.000000 434_Duo 2.242612 0.000000 0.000000 96.607623 0.979008 0.000000 0.000000 0.000000 0.000000 0.000000 0.022768 434_Sal 9.022813 2.190246 0.093347 84.998406 2.855061 0.000000 0.541870 0.020491 0.029598 0.059196 0.000000 437_Duo 0.491781 26.399071 0.343791 38.793771 7.545194 0.000000 26.273849 0.000000 0.000000 0.000000 0.000000 437_Sal 0.594235 42.605073 0.958517 30.647511 8.052912 0.000000 16.952780 0.000000 0.000000 0.000000 0.000000 438_Duo 3.774874 1.707573 0.289149 86.355357 5.953736 0.000000 1.238559 0.000000 0.095624 0.000000 0.000000 438_Sal 5.268430 10.138427 0.425755 78.115751 4.257547 0.000000 0.885661 0.270935 0.163927 0.152543 0.000000 441_Duo 0.637494 0.537316 0.000000 96.571194 0.514549 0.000000 0.856063 0.000000 0.000000 0.000000 0.000000 441_Sal 1.247666 10.024589 0.869724 69.409408 2.128774 0.000000 16.103547 0.022768 0.000000 0.000000 0.000000 446_Duo 0.000000 11.295023 0.000000 67.037020 12.916079 0.000000 5.095396 0.000000 0.000000 0.000000 0.000000 446_Sal 0.910705 34.226583 1.425254 40.373845 7.467784 0.000000 15.502482 0.000000 0.000000 0.000000 0.000000 447_Duo 1.557306 0.500888 0.027321 96.730568 0.737671 0.000000 0.323300 0.000000 0.000000 0.000000 0.000000 447_Sal 1.509494 24.142799 3.544921 60.350166 2.886936 0.000000 7.451846 0.000000 0.000000 0.000000 0.000000 448_Duo 1.154319 26.522016 0.257274 58.829288 2.351897 0.000000 10.607440 0.000000 0.000000 0.000000 0.000000 448_Sal 2.183416 45.312144 2.126497 40.799599 0.915259 0.025044 8.455899 0.000000 0.000000 0.000000 0.000000 449_Duo 5.680525 0.000000 0.824188 67.257866 5.159146 0.000000 20.301899 0.000000 0.000000 0.000000 0.000000 449_Sal 3.501662 9.075179 0.751332 24.345431 4.530759 0.441692 57.292473 0.000000 0.000000 0.000000 0.000000 451_Duo 0.523656 0.000000 0.000000 96.140886 2.798142 0.000000 0.382496 0.000000 0.000000 0.031875 0.000000 451_Sal 0.598789 29.040117 0.576021 50.097901 8.733664 0.000000 10.402532 0.000000 0.000000 0.075133 0.000000

Transform column taxa names into unique IDs.¶

This overcomes downstream issue when multiple columns have the same name

In [20]:

df_col_names_lod_list = [None]*6

for index, df in enumerate(df_rel_lod_list):
    num_cols = len(df.columns)
    col_names = ['ASV' + str(x) for x in range(num_cols)]
    df_col_names_lod_list[index] = pd.DataFrame(index=col_names, data={'taxonomy':df.columns.tolist()})
    
    df_rel_lod_list[index].columns = col_names
    df_abs_lod_list[index].columns = col_names
    
    df_pseudo_rel_lod_list[index].columns = col_names
    df_pseudo_abs_lod_list[index].columns = col_names
    
df_col_names_lod_list[0]

Out[20]:

.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } taxonomy ASV0 D_0__Bacteria;D_1__Actinobacteria ASV1 D_0__Bacteria;D_1__Bacteroidetes ASV2 D_0__Bacteria;D_1__Epsilonbacteraeota ASV3 D_0__Bacteria;D_1__Firmicutes ASV4 D_0__Bacteria;D_1__Fusobacteria ASV5 D_0__Bacteria;D_1__Patescibacteria ASV6 D_0__Bacteria;D_1__Proteobacteria ASV7 D_0__Bacteria;D_1__Spirochaetes ASV8 D_0__Bacteria;D_1__Synergistetes ASV9 D_0__Bacteria;D_1__Tenericutes ASV10 D_0__Bacteria;__

Generate shorter taxonomy names for plotting purposes¶

In [21]:

exclusion_list = ['', 'uncultured bacterium', 'metagenome', 'uncultured', 
                  'gut metagenome', 'uncultured organism', 'unidentified', 
                  'uncultured Bacteroidales bacterium', 'uncultured Mollicutes bacterium', 'uncultured archaeon']

for i in range(6):
    if i == 0:
        df_col_names_lod_list[i][['Kingdom', 'Phylum']] = df_col_names_lod_list[i]['taxonomy'].str.split(';', expand=True)
    elif i==1:
        df_col_names_lod_list[i][['Kingdom', 'Phylum', 'Class']] = df_col_names_lod_list[i]['taxonomy'].str.split(';', expand=True)
    elif i==2:
        df_col_names_lod_list[i][['Kingdom', 'Phylum', 'Class', 'Order']] = df_col_names_lod_list[i]['taxonomy'].str.split(';', expand=True)
    elif i==3:
        df_col_names_lod_list[i][['Kingdom', 'Phylum', 'Class', 'Order', 'Family']] = df_col_names_lod_list[i]['taxonomy'].str.split(';', expand=True)
    elif i==4:
        df_col_names_lod_list[i][['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus']] = df_col_names_lod_list[i]['taxonomy'].str.split(';', expand=True)
    else:
        df_col_names_lod_list[i][['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']] = df_col_names_lod_list[i]['taxonomy'].str.split(';', expand=True)
        
    labels_list = []
    for index, row in df_col_names_lod_list[i].iterrows():
        # Species
        if row[-1][5:] in exclusion_list:
            # Genus
            if row[-2][5:] in exclusion_list:
                # Family
                if row[-3][5:] in exclusion_list:
                    # Order
                    if row[-4][5:] in exclusion_list:
                        # Class
                        if row[-5][5:] in exclusion_list:
                            # Phylum
                            if row[-6][5:] in exclusion_list:
                                labels_list.append(row[-7][5:] + '(' + df_col_names_lod_list[i].columns[-7][0].lower() + ')')
                            else:
                                labels_list.append(row[-6][5:] + '(' + df_col_names_lod_list[i].columns[-6][0].lower() + ')')
                        else:
                            labels_list.append(row[-5][5:] + '(' + df_col_names_lod_list[i].columns[-5][0].lower() + ')')
                    else:
                        labels_list.append(row[-4][5:] + '(' + df_col_names_lod_list[i].columns[-4][0].lower() + ')')
                else:
                    labels_list.append(row[-3][5:] + '(' + df_col_names_lod_list[i].columns[-3][0].lower() + ')')
            else:
                labels_list.append(row[-2][5:] + '(' + df_col_names_lod_list[i].columns[-2][0].lower() + ')')
        else:
            labels_list.append(row[-1][5:] + '(' + df_col_names_lod_list[i].columns[-1][0].lower() + ')')
                                   
    df_col_names_lod_list[i]['label'] = labels_list

Sort the columns by the max abundance of taxa across all samples¶

In [22]:

df_rel_sort_lod_list = [None]*6
df_abs_sort_lod_list = [None]*6

df_pseudo_rel_sort_lod_list = [None]*6
df_pseudo_abs_sort_lod_list = [None]*6

for i in range(6):
    taxa_sorted = df_abs_lod_list[i].mean().sort_values(ascending=False).index
    
    df_rel_sort_lod_list[i] = df_rel_lod_list[i].loc[:, taxa_sorted]
    df_abs_sort_lod_list[i] = df_abs_lod_list[i].loc[:, taxa_sorted]
    
    df_pseudo_rel_sort_lod_list[i] = df_pseudo_rel_lod_list[i].loc[:, taxa_sorted]
    df_pseudo_abs_sort_lod_list[i] = df_pseudo_abs_lod_list[i].loc[:, taxa_sorted]
    
df_abs_sort_lod_list[4]

Out[22]:

.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } ASV13 ASV30 ASV68 ASV14 ASV94 ASV87 ASV12 ASV70 ASV23 ASV71 ... ASV36 ASV103 ASV28 ASV19 ASV35 ASV73 ASV24 ASV78 ASV97 ASV59 index 387_Duo 8.428612e+03 3.223476e+06 2.016311e+05 1.123815e+03 1.123815e+03 0.000000e+00 0.000000e+00 7.773054e+03 7.192416e+04 1.696961e+05 ... 0.000000 0.000000 1217.466227 1123.814979 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 387_Sal 5.563840e+07 3.814375e+08 4.369792e+07 2.660967e+07 4.592346e+07 1.201790e+07 1.273394e+07 9.753654e+06 2.734507e+07 7.984837e+07 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 388_Duo 1.962599e+05 1.560678e+05 2.329590e+05 1.811232e+03 3.838087e+04 5.675194e+04 7.805548e+03 2.078605e+04 1.241988e+04 6.007254e+04 ... 0.000000 0.000000 0.000000 0.000000 0.000000 819.366953 0.000000 0.000000 0.000000 0.000000 388_Sal 1.553381e+07 8.815325e+07 1.491296e+07 1.531444e+05 1.574904e+07 8.514003e+06 1.618364e+06 6.374120e+05 3.427124e+06 1.804621e+06 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 390_Duo 4.856886e+03 1.140589e+03 5.978206e+02 4.136448e+02 0.000000e+00 0.000000e+00 1.704127e+02 2.489877e+02 0.000000e+00 0.000000e+00 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 390_Sal 1.180718e+08 3.266770e+08 3.677144e+07 9.685920e+06 1.850990e+07 4.382769e+05 1.212566e+06 1.092771e+07 1.488681e+07 1.345510e+07 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 391_Duo 6.863924e+02 9.157815e+03 1.568742e+03 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 3.154870e+02 0.000000e+00 0.000000e+00 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 360.763502 0.000000 0.000000 391_Sal 1.026246e+07 1.963309e+07 4.272965e+06 1.464438e+06 2.638775e+06 1.023586e+06 4.725218e+05 3.761172e+06 1.910356e+06 5.295285e+05 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 392_Duo 4.803707e+05 8.605142e+04 1.305731e+05 9.370915e+04 1.189618e+04 1.841059e+05 5.025603e+04 1.933664e+05 7.764574e+03 3.757626e+04 ... 0.000000 463.025051 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 392_Sal 7.730049e+07 2.872767e+08 2.344001e+07 1.558503e+07 3.231004e+07 4.912876e+07 1.269602e+07 3.797875e+07 5.609361e+07 6.621295e+06 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 409_Duo 3.085864e+04 6.266752e+05 3.496561e+05 2.061537e+03 3.349998e+03 2.963460e+03 4.831728e+02 1.472067e+04 1.716874e+04 7.888602e+04 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 409_Sal 1.556469e+09 8.546317e+08 4.929112e+08 1.484550e+08 7.385820e+07 3.065115e+07 1.007241e+08 2.760450e+07 5.890192e+07 6.785722e+07 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 410_Duo 7.887939e+05 2.208983e+07 1.790454e+07 0.000000e+00 1.837277e+07 1.860329e+06 2.341169e+04 1.716257e+06 3.133565e+05 1.217408e+06 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 410_Sal 1.066398e+09 3.892013e+08 8.557504e+08 1.164887e+08 7.361203e+07 2.776371e+07 2.573449e+08 3.370701e+07 2.343359e+07 1.824593e+08 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 417_Duo 4.190950e+05 8.132215e+05 1.465893e+06 4.883625e+05 1.004110e+05 1.763904e+05 0.000000e+00 4.761467e+06 9.369901e+04 3.334504e+05 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 417_Sal 2.250161e+07 1.614218e+08 7.043239e+07 3.446828e+07 8.254864e+07 1.983048e+07 0.000000e+00 1.669137e+08 2.656173e+07 1.414631e+07 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 418_Duo 5.321134e+04 1.067898e+07 6.685096e+05 3.031026e+04 2.997348e+04 1.108008e+05 0.000000e+00 1.100599e+06 2.633625e+05 6.062051e+05 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 418_Sal 2.009650e+07 1.095188e+08 1.690207e+07 2.017308e+07 6.265251e+07 3.482154e+07 3.938345e+05 3.512785e+07 1.972455e+07 8.029848e+06 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 423_Duo 3.187431e+05 4.032560e+05 4.087727e+05 1.234363e+05 9.623589e+04 2.750691e+04 3.118472e+04 7.701170e+05 0.000000e+00 2.153048e+04 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 423_Sal 1.635244e+09 8.786447e+08 6.239500e+08 1.989737e+08 1.649798e+08 8.243791e+07 1.611334e+08 1.015660e+08 1.777665e+07 7.474509e+07 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 425_Duo 1.068859e+05 9.428495e+04 5.188428e+04 2.458499e+04 6.423846e+03 6.234668e+03 4.178379e+03 1.285592e+04 5.148947e+03 1.094768e+04 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 164.503101 425_Sal 3.146542e+08 3.050870e+08 1.449740e+08 2.619063e+08 1.294319e+08 3.947839e+07 5.458062e+07 3.247712e+07 2.752857e+07 3.115751e+07 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 433_Duo 1.542153e+03 5.287257e+04 6.549785e+03 2.153195e+02 1.425764e+02 4.905792e+03 0.000000e+00 1.041681e+03 2.156104e+03 1.425764e+03 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 206.590295 0.000000 433_Sal 5.627962e+07 2.884914e+08 9.717801e+07 1.308524e+07 2.919447e+07 1.158072e+08 3.098646e+06 5.123965e+07 1.409324e+07 1.730388e+07 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 434_Duo 0.000000e+00 3.342736e+06 1.047206e+05 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 3.764910e+04 3.932346e+05 3.968157e+03 ... 0.000000 967.843064 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 434_Sal 6.854333e+05 9.114477e+07 2.842406e+07 1.306607e+06 0.000000e+00 4.997951e+04 0.000000e+00 2.684614e+06 3.191549e+06 1.792122e+06 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 437_Duo 7.266069e+06 1.325150e+07 2.531533e+06 2.423676e+06 5.332087e+06 8.288848e+06 3.632415e+05 2.362929e+06 2.191846e+06 1.745543e+06 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 437_Sal 4.511547e+08 3.315066e+08 1.086730e+08 1.583059e+08 1.158754e+08 2.056360e+08 2.562488e+07 1.204320e+08 8.878066e+07 5.286663e+07 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 438_Duo 5.237379e+04 5.785044e+06 9.805444e+04 3.262904e+04 7.295518e+04 0.000000e+00 4.852523e+03 4.220022e+05 2.782671e+05 1.556154e+04 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 438_Sal 9.086244e+06 2.234501e+08 3.982426e+07 1.091744e+07 0.000000e+00 7.412003e+05 2.153841e+06 1.209465e+07 4.473362e+06 4.211762e+06 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 441_Duo 3.701055e+03 2.971595e+06 1.414332e+05 8.107073e+03 1.462798e+04 4.141657e+03 0.000000e+00 7.930833e+03 3.879940e+05 1.198437e+04 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 441_Sal 5.377132e+06 1.414318e+08 1.763170e+07 1.178808e+07 1.623856e+07 8.503811e+06 1.821795e+06 4.904347e+06 1.519843e+07 9.896948e+05 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 446_Duo 3.792437e+04 3.187469e+05 6.220443e+04 1.797017e+04 1.406715e+04 1.959643e+04 2.797166e+03 5.766716e+04 3.296428e+04 3.459054e+04 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 446_Sal 9.599705e+07 8.519352e+07 6.910000e+07 3.573147e+07 5.536940e+07 8.280930e+06 6.971734e+06 2.130903e+07 5.779621e+06 1.360286e+07 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 447_Duo 5.764132e+04 8.640405e+06 7.843855e+05 2.896549e+03 2.519998e+04 1.593102e+04 3.186204e+03 8.921371e+04 5.807581e+05 4.634478e+03 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 447_Sal 5.505671e+07 8.792650e+07 7.434590e+07 8.448397e+06 1.682573e+07 7.753576e+06 1.906022e+07 9.711709e+06 1.132243e+07 3.000365e+05 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 448_Duo 9.553171e+05 1.602201e+06 4.853126e+05 1.026117e+05 3.581841e+05 1.432736e+05 5.525244e+04 1.041664e+05 5.751277e+05 1.937423e+04 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 448_Sal 5.252247e+08 3.587311e+08 2.005168e+08 1.025008e+08 8.512263e+07 6.606275e+07 1.200514e+08 1.483393e+07 2.276836e+07 2.501070e+06 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 449_Duo 0.000000e+00 4.338810e+05 3.106546e+03 0.000000e+00 8.487392e+04 2.632894e+04 0.000000e+00 2.640564e+04 3.273379e+04 1.704765e+04 ... 1687.506684 0.000000 0.000000 0.000000 1035.515465 0.000000 671.167431 0.000000 0.000000 0.000000 449_Sal 4.491326e+06 1.739948e+08 7.749738e+06 1.061186e+07 2.613335e+07 2.703822e+08 0.000000e+00 1.983669e+07 1.536738e+07 2.397575e+07 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 451_Duo 0.000000e+00 3.012703e+08 3.935602e+06 0.000000e+00 5.356791e+05 9.511038e+05 0.000000e+00 1.247367e+07 1.141871e+08 9.620360e+05 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 451_Sal 4.356429e+07 1.504739e+08 2.412288e+07 1.605766e+07 2.111510e+07 1.682173e+07 0.000000e+00 3.925880e+07 3.492905e+07 7.264758e+06 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000

42 rows × 104 columns

Save the working files to allow use in individual analysis workbooks¶

In [24]:

pickle.dump(df_rel_sort_lod_list, open('pickle_files/rel_sort_lod_list_paired.pkl', 'wb'))
pickle.dump(df_abs_sort_lod_list, open('pickle_files/abs_sort_lod_list_paired.pkl', 'wb'))

pickle.dump(df_pseudo_rel_sort_lod_list, open('pickle_files/pseudo_rel_sort_lod_list_paired.pkl', 'wb'))
pickle.dump(df_pseudo_abs_sort_lod_list, open('pickle_files/pseudo_abs_sort_lod_list_paired.pkl', 'wb'))

pickle.dump(df_col_names_lod_list, open('pickle_files/col_names_lod_list_paired.pkl', 'wb'))
pickle.dump(df_total_load, open('pickle_files/total_load_paired.pkl', 'wb'))
pickle.dump(seq_metadata, open('pickle_files/seq_paired_metadata_paired.pkl', 'wb'))

In [ ]: