{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import pickle"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load in read count data (Qiime2 taxa barplot csv files)\n",
"\n",
"Samples were subsampled to 45,386 reads. Samples with less than this number of reads after DADA2 processing were removed."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"df_seq_orig_species = pd.read_csv('data_files/species_counts_duodenum_45386.csv').set_index('index')\n",
"read_depth = df_seq_orig_species.sum(axis=1)[0]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"45386.0"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"read_depth"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Set the number of metadata columns in the sequencing data"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"num_metadata_cols = 2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Remove the duplicate sequencing samples and rename the columns with '_Duo' in the name just to the sample ID"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" D_0__Archaea;D_1__Euryarchaeota;D_2__Halobacteria;D_3__Halobacteriales;D_4__Haloferacaceae;D_5__Halopenitus;D_6__Halopenitus persicus | \n",
" D_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobacterium;D_6__Methanobacterium formicicum | \n",
" D_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobacterium;D_6__uncultured archaeon | \n",
" D_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobrevibacter;__ | \n",
" D_0__Archaea;D_1__Nanoarchaeaeota;D_2__Woesearchaeia;__;__;__;__ | \n",
" D_0__Bacteria;D_1__Acidobacteria;D_2__Blastocatellia (Subgroup 4);D_3__Blastocatellales;D_4__Blastocatellaceae;D_5__Blastocatella;__ | \n",
" D_0__Bacteria;D_1__Acidobacteria;D_2__Blastocatellia (Subgroup 4);D_3__Blastocatellales;D_4__Blastocatellaceae;D_5__uncultured;__ | \n",
" D_0__Bacteria;D_1__Acidobacteria;D_2__Holophagae;D_3__Subgroup 7;__;__;__ | \n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Acidimicrobiia;D_3__Microtrichales;D_4__Microtrichaceae;D_5__IMCC26207;D_6__uncultured bacterium | \n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Acidimicrobiia;D_3__Microtrichales;D_4__uncultured;D_5__uncultured Acidimicrobidae bacterium;D_6__uncultured Acidimicrobidae bacterium | \n",
" ... | \n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Opitutales;D_4__Opitutaceae;D_5__Opitutus;D_6__uncultured Verrucomicrobia bacterium | \n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Opitutales;D_4__Opitutaceae;__;__ | \n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;D_6__uncultured bacterium | \n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;__ | \n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Verrucomicrobiaceae;D_5__uncultured;D_6__uncultured bacterium | \n",
" D_0__Bacteria;D_1__WPS-2;D_2__metagenome;D_3__metagenome;D_4__metagenome;D_5__metagenome;D_6__metagenome | \n",
" D_0__Bacteria;__;__;__;__;__;__ | \n",
" Unassigned;__;__;__;__;__;__ | \n",
" Description | \n",
" Body_Site | \n",
"
\n",
" \n",
" index | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 141 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 141 | \n",
" Duodenum | \n",
"
\n",
" \n",
" 142 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 17.0 | \n",
" 0.0 | \n",
" 142 | \n",
" Duodenum | \n",
"
\n",
" \n",
" 144 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 25.0 | \n",
" 0.0 | \n",
" 144 | \n",
" Duodenum | \n",
"
\n",
" \n",
" 145 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 145 | \n",
" Duodenum | \n",
"
\n",
" \n",
" 146 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 146 | \n",
" Duodenum | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 446 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 70.0 | \n",
" 0.0 | \n",
" 446_Duo | \n",
" Duodenum | \n",
"
\n",
" \n",
" 447 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 447_Duo | \n",
" Duodenum | \n",
"
\n",
" \n",
" 448 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 448_Duo | \n",
" Duodenum | \n",
"
\n",
" \n",
" 449 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 449_Duo | \n",
" Duodenum | \n",
"
\n",
" \n",
" 451 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 451_Duo | \n",
" Duodenum | \n",
"
\n",
" \n",
"
\n",
"
254 rows × 1068 columns
\n",
"
"
],
"text/plain": [
" D_0__Archaea;D_1__Euryarchaeota;D_2__Halobacteria;D_3__Halobacteriales;D_4__Haloferacaceae;D_5__Halopenitus;D_6__Halopenitus persicus \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobacterium;D_6__Methanobacterium formicicum \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobacterium;D_6__uncultured archaeon \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobrevibacter;__ \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Archaea;D_1__Nanoarchaeaeota;D_2__Woesearchaeia;__;__;__;__ \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Acidobacteria;D_2__Blastocatellia (Subgroup 4);D_3__Blastocatellales;D_4__Blastocatellaceae;D_5__Blastocatella;__ \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Acidobacteria;D_2__Blastocatellia (Subgroup 4);D_3__Blastocatellales;D_4__Blastocatellaceae;D_5__uncultured;__ \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Acidobacteria;D_2__Holophagae;D_3__Subgroup 7;__;__;__ \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Acidimicrobiia;D_3__Microtrichales;D_4__Microtrichaceae;D_5__IMCC26207;D_6__uncultured bacterium \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Acidimicrobiia;D_3__Microtrichales;D_4__uncultured;D_5__uncultured Acidimicrobidae bacterium;D_6__uncultured Acidimicrobidae bacterium \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" ... \\\n",
"index ... \n",
"141 ... \n",
"142 ... \n",
"144 ... \n",
"145 ... \n",
"146 ... \n",
"... ... \n",
"446 ... \n",
"447 ... \n",
"448 ... \n",
"449 ... \n",
"451 ... \n",
"\n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Opitutales;D_4__Opitutaceae;D_5__Opitutus;D_6__uncultured Verrucomicrobia bacterium \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Opitutales;D_4__Opitutaceae;__;__ \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;D_6__uncultured bacterium \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;__ \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Verrucomicrobiaceae;D_5__uncultured;D_6__uncultured bacterium \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__WPS-2;D_2__metagenome;D_3__metagenome;D_4__metagenome;D_5__metagenome;D_6__metagenome \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;__;__;__;__;__;__ Unassigned;__;__;__;__;__;__ \\\n",
"index \n",
"141 0.0 0.0 \n",
"142 17.0 0.0 \n",
"144 25.0 0.0 \n",
"145 0.0 0.0 \n",
"146 0.0 0.0 \n",
"... ... ... \n",
"446 70.0 0.0 \n",
"447 0.0 0.0 \n",
"448 0.0 0.0 \n",
"449 0.0 0.0 \n",
"451 0.0 0.0 \n",
"\n",
" Description Body_Site \n",
"index \n",
"141 141 Duodenum \n",
"142 142 Duodenum \n",
"144 144 Duodenum \n",
"145 145 Duodenum \n",
"146 146 Duodenum \n",
"... ... ... \n",
"446 446_Duo Duodenum \n",
"447 447_Duo Duodenum \n",
"448 448_Duo Duodenum \n",
"449 449_Duo Duodenum \n",
"451 451_Duo Duodenum \n",
"\n",
"[254 rows x 1068 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_seq_orig_species = df_seq_orig_species.drop(['387_Duo', '388_Duo', '390_Duo', '391_Duo', '392_Duo', '394_Duo', '409_Duo', '410_Duo', '418_Duo', '423_Duo', '425_Duo', '433_Duo'])\n",
"df_seq_orig_species.rename({'417_Duo':'417', '434_Duo':'434', '437_Duo':'437', '438_Duo':'438', '441_Duo':'441', '446_Duo':'446', '447_Duo':'447', '448_Duo':'448', '449_Duo':'449', '451_Duo':'451'}, axis='index', inplace=True)\n",
"df_seq_orig_species.sort_index(inplace=True)\n",
"df_seq_orig_species.index = df_seq_orig_species.index.astype(int)\n",
"df_seq_orig_species"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# This taxa was only in second batch of sequenced duodenum samples likely indicating it is a contaminant. It is removed because\n",
"# it interferes with a plot comparing saliva to duodenum samples.\n",
"df_seq_orig_species.drop(['D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3__Bacillales;D_4__Paenibacillaceae;D_5__Paenibacillus;D_6__Paenibacillus darwinianus'], axis=1, inplace=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load in absolute abundance data (dPCR)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"df_total_load = pd.read_excel('dPCR data/dPCR_total_loads_duodenum.xlsx', index_col=0)\n",
"df_weights = pd.read_csv('data_files/sample weights.csv')\n",
"\n",
"# Merge the two dataframes together based on the sample ID\n",
"df_total_load = df_total_load.merge(df_weights, left_on='Sample', right_on='Study ID')\n",
"\n",
"# Add a column saying whether the sample weight is missing or not\n",
"df_total_load['Weight (True/False)'] = df_total_load.apply(lambda x: x['Weight (mL)'][0].isdigit(), axis=1)\n",
"\n",
"# Determine the average sample weight for all samples\n",
"mean_weight = df_total_load[df_total_load['Weight (True/False)']==True]['Weight (mL)'].astype(float).mean()\n",
"\n",
"# Create new column where any sample with a missing weight is set to the average weight of all samples\n",
"df_total_load['Corrected Weight (mL)'] = df_total_load.apply(lambda x: float(x['Weight (mL)']) if x['Weight (True/False)'] else mean_weight, axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Well | \n",
" Concentration | \n",
" PoissonConfMax | \n",
" PoissonConfMin | \n",
" Total | \n",
" Positives | \n",
" Primer | \n",
" Sample | \n",
" Dilution | \n",
" Corrected Concentration | \n",
" Study ID | \n",
" Weight (mL) | \n",
" Weight (True/False) | \n",
" Corrected Weight (mL) | \n",
"
\n",
" \n",
" \n",
" \n",
" 74 | \n",
" B08 | \n",
" 1430.0 | \n",
" 1456.0 | \n",
" 1417.0 | \n",
" 18706 | \n",
" 13158 | \n",
" mod_Caporaso | \n",
" 215 | \n",
" 10 | \n",
" 1430000.0 | \n",
" 215 | \n",
" no data | \n",
" False | \n",
" 0.884958 | \n",
"
\n",
" \n",
" 88 | \n",
" E03 | \n",
" 671.0 | \n",
" 686.0 | \n",
" 663.0 | \n",
" 17883 | \n",
" 7771 | \n",
" mod_Caporaso | \n",
" 233 | \n",
" 500 | \n",
" 33550000.0 | \n",
" 233 | \n",
" no weight | \n",
" False | \n",
" 0.884958 | \n",
"
\n",
" \n",
" 119 | \n",
" A10 | \n",
" 23.3 | \n",
" 25.7 | \n",
" 22.0 | \n",
" 17629 | \n",
" 345 | \n",
" mod_Caporaso | \n",
" 280 | \n",
" 10 | \n",
" 23300.0 | \n",
" 280 | \n",
" no data | \n",
" False | \n",
" 0.884958 | \n",
"
\n",
" \n",
" 142 | \n",
" B06 | \n",
" 268.0 | \n",
" 277.0 | \n",
" 264.0 | \n",
" 19925 | \n",
" 4064 | \n",
" mod_Caporaso | \n",
" 318 | \n",
" 10 | \n",
" 268000.0 | \n",
" 318 | \n",
" no data | \n",
" False | \n",
" 0.884958 | \n",
"
\n",
" \n",
" 145 | \n",
" F06 | \n",
" 782.0 | \n",
" 799.0 | \n",
" 774.0 | \n",
" 18020 | \n",
" 8753 | \n",
" mod_Caporaso | \n",
" 324 | \n",
" 10 | \n",
" 782000.0 | \n",
" 324 | \n",
" no data | \n",
" False | \n",
" 0.884958 | \n",
"
\n",
" \n",
" 178 | \n",
" H02 | \n",
" 286.0 | \n",
" 294.0 | \n",
" 282.0 | \n",
" 20243 | \n",
" 4368 | \n",
" mod_Caporaso | \n",
" 360 | \n",
" 10 | \n",
" 286000.0 | \n",
" 360 | \n",
" no data | \n",
" False | \n",
" 0.884958 | \n",
"
\n",
" \n",
" 189 | \n",
" C10 | \n",
" 3079.0 | \n",
" 3139.0 | \n",
" 3050.0 | \n",
" 19684 | \n",
" 18247 | \n",
" mod_Caporaso | \n",
" 372 | \n",
" 10 | \n",
" 3079000.0 | \n",
" 372 | \n",
" no sample | \n",
" False | \n",
" 0.884958 | \n",
"
\n",
" \n",
" 201 | \n",
" G11 | \n",
" 3910.0 | \n",
" 4010.0 | \n",
" 3860.0 | \n",
" 16826 | \n",
" 16220 | \n",
" mod_Caporaso | \n",
" 384 | \n",
" 10 | \n",
" 3910000.0 | \n",
" 384 | \n",
" no data | \n",
" False | \n",
" 0.884958 | \n",
"
\n",
" \n",
" 214 | \n",
" F04 | \n",
" 16.7 | \n",
" 18.7 | \n",
" 15.7 | \n",
" 19006 | \n",
" 268 | \n",
" mod_Caporaso | \n",
" 400 | \n",
" 10 | \n",
" 16700.0 | \n",
" 400 | \n",
" no data | \n",
" False | \n",
" 0.884958 | \n",
"
\n",
" \n",
" 242 | \n",
" B09 | \n",
" 90.1 | \n",
" 94.9 | \n",
" 87.7 | \n",
" 18334 | \n",
" 1352 | \n",
" mod_Caporaso | \n",
" 430 | \n",
" 10 | \n",
" 90100.0 | \n",
" 430 | \n",
" no data | \n",
" False | \n",
" 0.884958 | \n",
"
\n",
" \n",
" 251 | \n",
" D07 | \n",
" 3740.0 | \n",
" 3830.0 | \n",
" 3700.0 | \n",
" 15466 | \n",
" 14823 | \n",
" mod_Caporaso | \n",
" 448 | \n",
" 10 | \n",
" 3740000.0 | \n",
" 448 | \n",
" no data | \n",
" False | \n",
" 0.884958 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Well Concentration PoissonConfMax PoissonConfMin Total Positives \\\n",
"74 B08 1430.0 1456.0 1417.0 18706 13158 \n",
"88 E03 671.0 686.0 663.0 17883 7771 \n",
"119 A10 23.3 25.7 22.0 17629 345 \n",
"142 B06 268.0 277.0 264.0 19925 4064 \n",
"145 F06 782.0 799.0 774.0 18020 8753 \n",
"178 H02 286.0 294.0 282.0 20243 4368 \n",
"189 C10 3079.0 3139.0 3050.0 19684 18247 \n",
"201 G11 3910.0 4010.0 3860.0 16826 16220 \n",
"214 F04 16.7 18.7 15.7 19006 268 \n",
"242 B09 90.1 94.9 87.7 18334 1352 \n",
"251 D07 3740.0 3830.0 3700.0 15466 14823 \n",
"\n",
" Primer Sample Dilution Corrected Concentration Study ID \\\n",
"74 mod_Caporaso 215 10 1430000.0 215 \n",
"88 mod_Caporaso 233 500 33550000.0 233 \n",
"119 mod_Caporaso 280 10 23300.0 280 \n",
"142 mod_Caporaso 318 10 268000.0 318 \n",
"145 mod_Caporaso 324 10 782000.0 324 \n",
"178 mod_Caporaso 360 10 286000.0 360 \n",
"189 mod_Caporaso 372 10 3079000.0 372 \n",
"201 mod_Caporaso 384 10 3910000.0 384 \n",
"214 mod_Caporaso 400 10 16700.0 400 \n",
"242 mod_Caporaso 430 10 90100.0 430 \n",
"251 mod_Caporaso 448 10 3740000.0 448 \n",
"\n",
" Weight (mL) Weight (True/False) Corrected Weight (mL) \n",
"74 no data False 0.884958 \n",
"88 no weight False 0.884958 \n",
"119 no data False 0.884958 \n",
"142 no data False 0.884958 \n",
"145 no data False 0.884958 \n",
"178 no data False 0.884958 \n",
"189 no sample False 0.884958 \n",
"201 no data False 0.884958 \n",
"214 no data False 0.884958 \n",
"242 no data False 0.884958 \n",
"251 no data False 0.884958 "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Print out the samples without weights for reference (N=11)\n",
"df_total_load[~df_total_load['Weight (True/False)']]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Normalize concentration to the input volume"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"## Set the lower dPCR threshold. 95% CI is +-1X and the dPCR blanks are <1cp/uL with +3std dev of ~1 cp/uL. \n",
"## This means we would have ~2X resolution at 2 cp/uL.\n",
"df_total_load = df_total_load[(df_total_load['Concentration']>2)]\n",
"\n",
"## Calculate Copies/mL\n",
"df_total_load['Copies/mL'] = df_total_load['Corrected Concentration']/df_total_load['Corrected Weight (mL)']\n",
"df_total_load['Log Copies/mL'] = np.log10(df_total_load['Copies/mL'])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"df_total_load[['Sample', 'Copies/mL']].to_excel('duodenum_total_loads.xlsx')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Calculate LOD in terms of absolute abundance and relative abundance, 95% confidence of the template being added to the sample (3 copy input)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# These samples were diluted before placing sample in library reaction due to inhibitors preventing amplification in undiluted sample\n",
"diluted_samples = {423:100, 437:10, 438:10, 441:10, 446:10, 447:10, 448:10, 449:10, 451:10,\n",
" 395:100, 198:50, 423:50, 427:50, 373:10, 321:10, 169:10, 375:10, 353:10,\n",
" 242:10, 411:10, 312:10, 433:2, 366:2}\n",
"\n",
"# Create column to account for the fact that some samples were diluted before input into library prep reaction\n",
"df_total_load['Seq_Dilution'] = df_total_load.apply(lambda x: diluted_samples[x['Sample']] if x['Sample'] in diluted_samples.keys() else 1, axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Well | \n",
" Concentration | \n",
" PoissonConfMax | \n",
" PoissonConfMin | \n",
" Total | \n",
" Positives | \n",
" Primer | \n",
" Sample | \n",
" Dilution | \n",
" Corrected Concentration | \n",
" Study ID | \n",
" Weight (mL) | \n",
" Weight (True/False) | \n",
" Corrected Weight (mL) | \n",
" Copies/mL | \n",
" Log Copies/mL | \n",
" Seq_Dilution | \n",
" Copies in Amp Rxn | \n",
" Rel. Abundance LOD (%) | \n",
" Abs. Abundance LOD | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" A02 | \n",
" 400.0 | \n",
" 411.0 | \n",
" 394.0 | \n",
" 17179 | \n",
" 4950 | \n",
" mod_Caporaso | \n",
" 141 | \n",
" 10 | \n",
" 400000.0 | \n",
" 141 | \n",
" 1.6259 | \n",
" True | \n",
" 1.6259 | \n",
" 2.460176e+05 | \n",
" 5.390966 | \n",
" 1 | \n",
" 14000.0 | \n",
" 0.021429 | \n",
" 52.718055 | \n",
"
\n",
" \n",
" 1 | \n",
" B02 | \n",
" 24.3 | \n",
" 26.9 | \n",
" 23.1 | \n",
" 17527 | \n",
" 359 | \n",
" mod_Caporaso | \n",
" 142 | \n",
" 10 | \n",
" 24300.0 | \n",
" 142 | \n",
" 0.2091 | \n",
" True | \n",
" 0.2091 | \n",
" 1.162123e+05 | \n",
" 5.065252 | \n",
" 1 | \n",
" 850.5 | \n",
" 0.352734 | \n",
" 409.920066 | \n",
"
\n",
" \n",
" 2 | \n",
" A05 | \n",
" 373.0 | \n",
" 383.0 | \n",
" 368.0 | \n",
" 19427 | \n",
" 5276 | \n",
" mod_Caporaso | \n",
" 145 | \n",
" 500 | \n",
" 18650000.0 | \n",
" 145 | \n",
" 1.77 | \n",
" True | \n",
" 1.7700 | \n",
" 1.053672e+07 | \n",
" 7.022706 | \n",
" 1 | \n",
" 652750.0 | \n",
" 0.000460 | \n",
" 48.426150 | \n",
"
\n",
" \n",
" 3 | \n",
" D02 | \n",
" 1234.0 | \n",
" 1258.0 | \n",
" 1222.0 | \n",
" 17000 | \n",
" 11044 | \n",
" mod_Caporaso | \n",
" 146 | \n",
" 10 | \n",
" 1234000.0 | \n",
" 146 | \n",
" 1.6973 | \n",
" True | \n",
" 1.6973 | \n",
" 7.270371e+05 | \n",
" 5.861557 | \n",
" 1 | \n",
" 43190.0 | \n",
" 0.006946 | \n",
" 50.500375 | \n",
"
\n",
" \n",
" 4 | \n",
" E02 | \n",
" 642.0 | \n",
" 657.0 | \n",
" 634.0 | \n",
" 17180 | \n",
" 7226 | \n",
" mod_Caporaso | \n",
" 147 | \n",
" 10 | \n",
" 642000.0 | \n",
" 147 | \n",
" 1.7476 | \n",
" True | \n",
" 1.7476 | \n",
" 3.673610e+05 | \n",
" 5.565093 | \n",
" 1 | \n",
" 22470.0 | \n",
" 0.013351 | \n",
" 49.046856 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 255 | \n",
" D01 | \n",
" 5810.0 | \n",
" 6060.0 | \n",
" 5590.0 | \n",
" 13577 | \n",
" 13480 | \n",
" mod_Caporaso | \n",
" 451 | \n",
" 500 | \n",
" 290500000.0 | \n",
" 451 | \n",
" 0.605 | \n",
" True | \n",
" 0.6050 | \n",
" 4.801653e+08 | \n",
" 8.681391 | \n",
" 10 | \n",
" 1016750.0 | \n",
" 0.000295 | \n",
" 1416.765053 | \n",
"
\n",
" \n",
" 256 | \n",
" A07 | \n",
" 154.0 | \n",
" 161.0 | \n",
" 150.0 | \n",
" 14056 | \n",
" 1720 | \n",
" mod_Caporaso | \n",
" 207 | \n",
" 25000 | \n",
" 385000000.0 | \n",
" 207 | \n",
" 1.8095 | \n",
" True | \n",
" 1.8095 | \n",
" 2.127660e+08 | \n",
" 8.327902 | \n",
" 1 | \n",
" 13475000.0 | \n",
" 0.000022 | \n",
" 47.369044 | \n",
"
\n",
" \n",
" 257 | \n",
" B07 | \n",
" 170.0 | \n",
" 177.0 | \n",
" 167.0 | \n",
" 16105 | \n",
" 2169 | \n",
" mod_Caporaso | \n",
" 274 | \n",
" 25000 | \n",
" 425000000.0 | \n",
" 274 | \n",
" 1.083 | \n",
" True | \n",
" 1.0830 | \n",
" 3.924284e+08 | \n",
" 8.593760 | \n",
" 1 | \n",
" 14875000.0 | \n",
" 0.000020 | \n",
" 79.145231 | \n",
"
\n",
" \n",
" 258 | \n",
" C07 | \n",
" 405.0 | \n",
" 417.0 | \n",
" 399.0 | \n",
" 15574 | \n",
" 4539 | \n",
" mod_Caporaso | \n",
" 322 | \n",
" 5000 | \n",
" 202500000.0 | \n",
" 322 | \n",
" 0.219 | \n",
" True | \n",
" 0.2190 | \n",
" 9.246575e+08 | \n",
" 8.965981 | \n",
" 1 | \n",
" 7087500.0 | \n",
" 0.000042 | \n",
" 391.389432 | \n",
"
\n",
" \n",
" 259 | \n",
" D07 | \n",
" 428.0 | \n",
" 440.0 | \n",
" 422.0 | \n",
" 15624 | \n",
" 4766 | \n",
" mod_Caporaso | \n",
" 395 | \n",
" 5000 | \n",
" 214000000.0 | \n",
" 395 | \n",
" 0.491 | \n",
" True | \n",
" 0.4910 | \n",
" 4.358452e+08 | \n",
" 8.639332 | \n",
" 100 | \n",
" 74900.0 | \n",
" 0.004005 | \n",
" 17457.084667 | \n",
"
\n",
" \n",
"
\n",
"
256 rows × 20 columns
\n",
"
"
],
"text/plain": [
" Well Concentration PoissonConfMax PoissonConfMin Total Positives \\\n",
"0 A02 400.0 411.0 394.0 17179 4950 \n",
"1 B02 24.3 26.9 23.1 17527 359 \n",
"2 A05 373.0 383.0 368.0 19427 5276 \n",
"3 D02 1234.0 1258.0 1222.0 17000 11044 \n",
"4 E02 642.0 657.0 634.0 17180 7226 \n",
".. ... ... ... ... ... ... \n",
"255 D01 5810.0 6060.0 5590.0 13577 13480 \n",
"256 A07 154.0 161.0 150.0 14056 1720 \n",
"257 B07 170.0 177.0 167.0 16105 2169 \n",
"258 C07 405.0 417.0 399.0 15574 4539 \n",
"259 D07 428.0 440.0 422.0 15624 4766 \n",
"\n",
" Primer Sample Dilution Corrected Concentration Study ID \\\n",
"0 mod_Caporaso 141 10 400000.0 141 \n",
"1 mod_Caporaso 142 10 24300.0 142 \n",
"2 mod_Caporaso 145 500 18650000.0 145 \n",
"3 mod_Caporaso 146 10 1234000.0 146 \n",
"4 mod_Caporaso 147 10 642000.0 147 \n",
".. ... ... ... ... ... \n",
"255 mod_Caporaso 451 500 290500000.0 451 \n",
"256 mod_Caporaso 207 25000 385000000.0 207 \n",
"257 mod_Caporaso 274 25000 425000000.0 274 \n",
"258 mod_Caporaso 322 5000 202500000.0 322 \n",
"259 mod_Caporaso 395 5000 214000000.0 395 \n",
"\n",
" Weight (mL) Weight (True/False) Corrected Weight (mL) Copies/mL \\\n",
"0 1.6259 True 1.6259 2.460176e+05 \n",
"1 0.2091 True 0.2091 1.162123e+05 \n",
"2 1.77 True 1.7700 1.053672e+07 \n",
"3 1.6973 True 1.6973 7.270371e+05 \n",
"4 1.7476 True 1.7476 3.673610e+05 \n",
".. ... ... ... ... \n",
"255 0.605 True 0.6050 4.801653e+08 \n",
"256 1.8095 True 1.8095 2.127660e+08 \n",
"257 1.083 True 1.0830 3.924284e+08 \n",
"258 0.219 True 0.2190 9.246575e+08 \n",
"259 0.491 True 0.4910 4.358452e+08 \n",
"\n",
" Log Copies/mL Seq_Dilution Copies in Amp Rxn Rel. Abundance LOD (%) \\\n",
"0 5.390966 1 14000.0 0.021429 \n",
"1 5.065252 1 850.5 0.352734 \n",
"2 7.022706 1 652750.0 0.000460 \n",
"3 5.861557 1 43190.0 0.006946 \n",
"4 5.565093 1 22470.0 0.013351 \n",
".. ... ... ... ... \n",
"255 8.681391 10 1016750.0 0.000295 \n",
"256 8.327902 1 13475000.0 0.000022 \n",
"257 8.593760 1 14875000.0 0.000020 \n",
"258 8.965981 1 7087500.0 0.000042 \n",
"259 8.639332 100 74900.0 0.004005 \n",
"\n",
" Abs. Abundance LOD \n",
"0 52.718055 \n",
"1 409.920066 \n",
"2 48.426150 \n",
"3 50.500375 \n",
"4 49.046856 \n",
".. ... \n",
"255 1416.765053 \n",
"256 47.369044 \n",
"257 79.145231 \n",
"258 391.389432 \n",
"259 17457.084667 \n",
"\n",
"[256 rows x 20 columns]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# uL added to the amplification rxn\n",
"seq_volume = 3.5\n",
"copy_input_threshold = 3\n",
"\n",
"df_total_load['Copies in Amp Rxn'] = df_total_load['Concentration']*df_total_load['Dilution']/df_total_load['Seq_Dilution']*seq_volume\n",
"df_total_load['Rel. Abundance LOD (%)'] = copy_input_threshold/df_total_load['Copies in Amp Rxn']*100\n",
"df_total_load['Abs. Abundance LOD'] = df_total_load['Rel. Abundance LOD (%)']*df_total_load['Copies/mL']/100\n",
"df_total_load"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Generate dictionary for easier downstream conversion of relative to absolute abundances"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"256"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"total_load_dict = {df_total_load['Sample'].iloc[i] : df_total_load['Copies/mL'].iloc[i] for i in range(len(df_total_load))}\n",
"len(total_load_dict)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Determine LOD thresholds. If LOD from poisson loading > LOD from sequencing use the sequencing value. LOD from sequencing is based on a 50% CV from replicates (Fig 2d from quant-seq paper)."
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"seq_lloq = 7.115*(read_depth**(-0.556))\n",
"\n",
"df_total_load['Rel. Abundance LOD (%) Corrected'] = df_total_load['Rel. Abundance LOD (%)'].where(df_total_load['Rel. Abundance LOD (%)']>seq_lloq, seq_lloq)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"lod_dict = {df_total_load['Sample'].iloc[i] : df_total_load['Rel. Abundance LOD (%) Corrected'].iloc[i]*read_depth/100 for i in range(len(df_total_load))}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Filter out samples without accurate total loads and store metadata in separate file"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" D_0__Archaea;D_1__Euryarchaeota;D_2__Halobacteria;D_3__Halobacteriales;D_4__Haloferacaceae;D_5__Halopenitus;D_6__Halopenitus persicus | \n",
" D_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobacterium;D_6__Methanobacterium formicicum | \n",
" D_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobacterium;D_6__uncultured archaeon | \n",
" D_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobrevibacter;__ | \n",
" D_0__Archaea;D_1__Nanoarchaeaeota;D_2__Woesearchaeia;__;__;__;__ | \n",
" D_0__Bacteria;D_1__Acidobacteria;D_2__Blastocatellia (Subgroup 4);D_3__Blastocatellales;D_4__Blastocatellaceae;D_5__Blastocatella;__ | \n",
" D_0__Bacteria;D_1__Acidobacteria;D_2__Blastocatellia (Subgroup 4);D_3__Blastocatellales;D_4__Blastocatellaceae;D_5__uncultured;__ | \n",
" D_0__Bacteria;D_1__Acidobacteria;D_2__Holophagae;D_3__Subgroup 7;__;__;__ | \n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Acidimicrobiia;D_3__Microtrichales;D_4__Microtrichaceae;D_5__IMCC26207;D_6__uncultured bacterium | \n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Acidimicrobiia;D_3__Microtrichales;D_4__uncultured;D_5__uncultured Acidimicrobidae bacterium;D_6__uncultured Acidimicrobidae bacterium | \n",
" ... | \n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Chthoniobacterales;D_4__Chthoniobacteraceae;D_5__Chthoniobacter;D_6__uncultured bacterium | \n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Chthoniobacterales;D_4__Xiphinematobacteraceae;D_5__Candidatus Xiphinematobacter;D_6__metagenome | \n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Opitutales;D_4__Opitutaceae;D_5__Opitutus;D_6__uncultured Verrucomicrobia bacterium | \n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Opitutales;D_4__Opitutaceae;__;__ | \n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;D_6__uncultured bacterium | \n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;__ | \n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Verrucomicrobiaceae;D_5__uncultured;D_6__uncultured bacterium | \n",
" D_0__Bacteria;D_1__WPS-2;D_2__metagenome;D_3__metagenome;D_4__metagenome;D_5__metagenome;D_6__metagenome | \n",
" D_0__Bacteria;__;__;__;__;__;__ | \n",
" Unassigned;__;__;__;__;__;__ | \n",
"
\n",
" \n",
" index | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 141 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 142 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 17.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 144 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 25.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 145 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 146 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 446 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 70.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 447 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 448 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 449 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 451 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
250 rows × 1065 columns
\n",
"
"
],
"text/plain": [
" D_0__Archaea;D_1__Euryarchaeota;D_2__Halobacteria;D_3__Halobacteriales;D_4__Haloferacaceae;D_5__Halopenitus;D_6__Halopenitus persicus \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobacterium;D_6__Methanobacterium formicicum \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobacterium;D_6__uncultured archaeon \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobrevibacter;__ \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Archaea;D_1__Nanoarchaeaeota;D_2__Woesearchaeia;__;__;__;__ \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Acidobacteria;D_2__Blastocatellia (Subgroup 4);D_3__Blastocatellales;D_4__Blastocatellaceae;D_5__Blastocatella;__ \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Acidobacteria;D_2__Blastocatellia (Subgroup 4);D_3__Blastocatellales;D_4__Blastocatellaceae;D_5__uncultured;__ \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Acidobacteria;D_2__Holophagae;D_3__Subgroup 7;__;__;__ \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Acidimicrobiia;D_3__Microtrichales;D_4__Microtrichaceae;D_5__IMCC26207;D_6__uncultured bacterium \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Acidimicrobiia;D_3__Microtrichales;D_4__uncultured;D_5__uncultured Acidimicrobidae bacterium;D_6__uncultured Acidimicrobidae bacterium \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" ... \\\n",
"index ... \n",
"141 ... \n",
"142 ... \n",
"144 ... \n",
"145 ... \n",
"146 ... \n",
"... ... \n",
"446 ... \n",
"447 ... \n",
"448 ... \n",
"449 ... \n",
"451 ... \n",
"\n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Chthoniobacterales;D_4__Chthoniobacteraceae;D_5__Chthoniobacter;D_6__uncultured bacterium \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Chthoniobacterales;D_4__Xiphinematobacteraceae;D_5__Candidatus Xiphinematobacter;D_6__metagenome \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Opitutales;D_4__Opitutaceae;D_5__Opitutus;D_6__uncultured Verrucomicrobia bacterium \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Opitutales;D_4__Opitutaceae;__;__ \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;D_6__uncultured bacterium \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;__ \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Verrucomicrobiaceae;D_5__uncultured;D_6__uncultured bacterium \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__WPS-2;D_2__metagenome;D_3__metagenome;D_4__metagenome;D_5__metagenome;D_6__metagenome \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;__;__;__;__;__;__ Unassigned;__;__;__;__;__;__ \n",
"index \n",
"141 0.0 0.0 \n",
"142 17.0 0.0 \n",
"144 25.0 0.0 \n",
"145 0.0 0.0 \n",
"146 0.0 0.0 \n",
"... ... ... \n",
"446 70.0 0.0 \n",
"447 0.0 0.0 \n",
"448 0.0 0.0 \n",
"449 0.0 0.0 \n",
"451 0.0 0.0 \n",
"\n",
"[250 rows x 1065 columns]"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_seq_samples = df_seq_orig_species[df_seq_orig_species.index.isin(total_load_dict.keys())][df_seq_orig_species.columns[:-1*num_metadata_cols]]\n",
"\n",
"# This is num_metadata_cols-1 because we don't need the description column since it is already stored as the index\n",
"seq_metadata = df_seq_orig_species[df_seq_orig_species.columns[-1*(num_metadata_cols-1):]]\n",
"df_seq_samples"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Set abundance to zero for taxa below LOD defined by # molecules input into amplification rxn or sequencing 50% CV threshold\n",
"This is defined as the load at which there should be a 95% chance of one copy being loaded into the amplification reaction (3 copy average)."
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Actinomycetales;D_4__Actinomycetaceae;D_5__F0332;D_6__uncultured bacterium | \n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Bifidobacteriales;D_4__Bifidobacteriaceae;D_5__Alloscardovia;D_6__Bifidobacterium longum subsp. longum | \n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Bifidobacteriales;D_4__Bifidobacteriaceae;D_5__Scardovia;D_6__unidentified | \n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;D_6__Corynebacterium kroppenstedtii | \n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;D_6__Corynebacterium pseudodiphtheriticum | \n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;__ | \n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium;D_6__Corynebacterium durum | \n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium;__ | \n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Lawsonella;D_6__uncultured bacterium | \n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;__;__ | \n",
" ... | \n",
" D_0__Bacteria;D_1__Synergistetes;D_2__Synergistia;D_3__Synergistales;D_4__Synergistaceae;D_5__Pyramidobacter;D_6__Pyramidobacter piscolens | \n",
" D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__Firmicutes oral clone FM046;D_5__Firmicutes oral clone FM046;D_6__Firmicutes oral clone FM046 | \n",
" D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__gut metagenome;D_5__gut metagenome;D_6__gut metagenome | \n",
" D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__uncultured bacterium;D_5__uncultured bacterium;D_6__uncultured bacterium | \n",
" D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;__;__;__ | \n",
" D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mycoplasmatales;D_4__Mycoplasmataceae;D_5__Mycoplasma;D_6__Mycoplasma salivarium ATCC 23064 | \n",
" D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mycoplasmatales;D_4__Mycoplasmataceae;D_5__Mycoplasma;__ | \n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;D_6__uncultured bacterium | \n",
" D_0__Bacteria;__;__;__;__;__;__ | \n",
" Unassigned;__;__;__;__;__;__ | \n",
"
\n",
" \n",
" index | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 141 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 142 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 328.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 144 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 25.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 145 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 146 | \n",
" 0.0 | \n",
" 11.0 | \n",
" 93.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 75.0 | \n",
" 18.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 446 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 447 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 13.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 18.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 448 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 13.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 14.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 449 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 715.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1306.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 451 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 135.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 9.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
250 rows × 546 columns
\n",
"
"
],
"text/plain": [
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Actinomycetales;D_4__Actinomycetaceae;D_5__F0332;D_6__uncultured bacterium \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Bifidobacteriales;D_4__Bifidobacteriaceae;D_5__Alloscardovia;D_6__Bifidobacterium longum subsp. longum \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 11.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Bifidobacteriales;D_4__Bifidobacteriaceae;D_5__Scardovia;D_6__unidentified \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 93.0 \n",
"... ... \n",
"446 0.0 \n",
"447 13.0 \n",
"448 13.0 \n",
"449 715.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;D_6__Corynebacterium kroppenstedtii \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;D_6__Corynebacterium pseudodiphtheriticum \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;__ \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium;D_6__Corynebacterium durum \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 75.0 \n",
"... ... \n",
"446 0.0 \n",
"447 18.0 \n",
"448 14.0 \n",
"449 1306.0 \n",
"451 135.0 \n",
"\n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium;__ \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 18.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Lawsonella;D_6__uncultured bacterium \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;__;__ \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" ... \\\n",
"index ... \n",
"141 ... \n",
"142 ... \n",
"144 ... \n",
"145 ... \n",
"146 ... \n",
"... ... \n",
"446 ... \n",
"447 ... \n",
"448 ... \n",
"449 ... \n",
"451 ... \n",
"\n",
" D_0__Bacteria;D_1__Synergistetes;D_2__Synergistia;D_3__Synergistales;D_4__Synergistaceae;D_5__Pyramidobacter;D_6__Pyramidobacter piscolens \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__Firmicutes oral clone FM046;D_5__Firmicutes oral clone FM046;D_6__Firmicutes oral clone FM046 \\\n",
"index \n",
"141 0.0 \n",
"142 328.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__gut metagenome;D_5__gut metagenome;D_6__gut metagenome \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__uncultured bacterium;D_5__uncultured bacterium;D_6__uncultured bacterium \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 9.0 \n",
"\n",
" D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;__;__;__ \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mycoplasmatales;D_4__Mycoplasmataceae;D_5__Mycoplasma;D_6__Mycoplasma salivarium ATCC 23064 \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mycoplasmatales;D_4__Mycoplasmataceae;D_5__Mycoplasma;__ \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;D_6__uncultured bacterium \\\n",
"index \n",
"141 0.0 \n",
"142 0.0 \n",
"144 0.0 \n",
"145 0.0 \n",
"146 0.0 \n",
"... ... \n",
"446 0.0 \n",
"447 0.0 \n",
"448 0.0 \n",
"449 0.0 \n",
"451 0.0 \n",
"\n",
" D_0__Bacteria;__;__;__;__;__;__ Unassigned;__;__;__;__;__;__ \n",
"index \n",
"141 0.0 0.0 \n",
"142 0.0 0.0 \n",
"144 25.0 0.0 \n",
"145 0.0 0.0 \n",
"146 0.0 0.0 \n",
"... ... ... \n",
"446 0.0 0.0 \n",
"447 0.0 0.0 \n",
"448 0.0 0.0 \n",
"449 0.0 0.0 \n",
"451 0.0 0.0 \n",
"\n",
"[250 rows x 546 columns]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_species_lod_filter = pd.DataFrame()\n",
"\n",
"for col in df_seq_samples.columns:\n",
" df_species_lod_filter[col] = df_seq_samples.apply(lambda x: x[col] if x[col]>lod_dict[x.name] else 0, axis=1)\n",
" \n",
"# Remove columns (taxa) that have zero counts after filtering\n",
"df_species_lod_filter = df_species_lod_filter[df_species_lod_filter.sum(axis=1)>0]\n",
"\n",
"# Remove rows (samples) that have zero counts after filtering\n",
"df_species_lod_filter = df_species_lod_filter.loc[:, (df_species_lod_filter != 0).any(axis=0)]\n",
"df_species_lod_filter"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Determine which samples (if any) were filtered out"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"orig_indexes = df_seq_samples.index.tolist()\n",
"filter_indexes = df_species_lod_filter.index.tolist()\n",
"\n",
"lost = list(set(set(orig_indexes) - set(filter_indexes)))"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Well | \n",
" Concentration | \n",
" PoissonConfMax | \n",
" PoissonConfMin | \n",
" Total | \n",
" Positives | \n",
" Primer | \n",
" Sample | \n",
" Dilution | \n",
" Corrected Concentration | \n",
" ... | \n",
" Weight (mL) | \n",
" Weight (True/False) | \n",
" Corrected Weight (mL) | \n",
" Copies/mL | \n",
" Log Copies/mL | \n",
" Seq_Dilution | \n",
" Copies in Amp Rxn | \n",
" Rel. Abundance LOD (%) | \n",
" Abs. Abundance LOD | \n",
" Rel. Abundance LOD (%) Corrected | \n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
0 rows × 21 columns
\n",
"
"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [Well, Concentration, PoissonConfMax, PoissonConfMin, Total, Positives, Primer, Sample, Dilution, Corrected Concentration, Study ID, Weight (mL), Weight (True/False), Corrected Weight (mL), Copies/mL, Log Copies/mL, Seq_Dilution, Copies in Amp Rxn, Rel. Abundance LOD (%), Abs. Abundance LOD, Rel. Abundance LOD (%) Corrected]\n",
"Index: []\n",
"\n",
"[0 rows x 21 columns]"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_total_load[df_total_load['Sample'].isin(lost)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Generate dataframes for each taxonomy level"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"def collapse_taxonomy(_df, level):\n",
" collapsed_dict = {}\n",
" index=0\n",
" \n",
" # Evaluate the selected taxonomy level to collapse to\n",
" if level == 'Genus':\n",
" index = -1\n",
" elif level == 'Family':\n",
" index = -2\n",
" elif level == 'Order':\n",
" index = -3\n",
" elif level == 'Class':\n",
" index = -4\n",
" elif level == 'Phylum':\n",
" index = -5\n",
" else:\n",
" raise ValueError('Could not interpret taxonomy level. Please use (Phylum, Class, Order, Family, Genus)')\n",
"\n",
" # Iterate through columns adding values together for each sample if the new column name already exists\n",
" for col in _df:\n",
" new_col = \";\".join(col.split(';')[:index])\n",
"\n",
" if new_col in collapsed_dict.keys():\n",
" collapsed_dict[new_col] += np.array(_df[col])\n",
" else:\n",
" collapsed_dict[new_col] = np.array(_df[col])\n",
"\n",
" df_collapsed = pd.DataFrame.from_dict(collapsed_dict).set_index(_df.index)\n",
" return df_collapsed"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"df_lod_list = [None]*6\n",
"\n",
"df_lod_list[0] = collapse_taxonomy(df_species_lod_filter, 'Phylum')\n",
"df_lod_list[1] = collapse_taxonomy(df_species_lod_filter, 'Class')\n",
"df_lod_list[2] = collapse_taxonomy(df_species_lod_filter, 'Order')\n",
"df_lod_list[3] = collapse_taxonomy(df_species_lod_filter, 'Family')\n",
"df_lod_list[4] = collapse_taxonomy(df_species_lod_filter, 'Genus')\n",
"df_lod_list[5] = df_species_lod_filter"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Generate relative and absolute abundance tables"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Actinomycetales;D_4__Actinomycetaceae;D_5__F0332;D_6__uncultured bacterium | \n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Bifidobacteriales;D_4__Bifidobacteriaceae;D_5__Alloscardovia;D_6__Bifidobacterium longum subsp. longum | \n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Bifidobacteriales;D_4__Bifidobacteriaceae;D_5__Scardovia;D_6__unidentified | \n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;D_6__Corynebacterium kroppenstedtii | \n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;D_6__Corynebacterium pseudodiphtheriticum | \n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;__ | \n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium;D_6__Corynebacterium durum | \n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium;__ | \n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Lawsonella;D_6__uncultured bacterium | \n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;__;__ | \n",
" ... | \n",
" D_0__Bacteria;D_1__Synergistetes;D_2__Synergistia;D_3__Synergistales;D_4__Synergistaceae;D_5__Pyramidobacter;D_6__Pyramidobacter piscolens | \n",
" D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__Firmicutes oral clone FM046;D_5__Firmicutes oral clone FM046;D_6__Firmicutes oral clone FM046 | \n",
" D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__gut metagenome;D_5__gut metagenome;D_6__gut metagenome | \n",
" D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__uncultured bacterium;D_5__uncultured bacterium;D_6__uncultured bacterium | \n",
" D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;__;__;__ | \n",
" D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mycoplasmatales;D_4__Mycoplasmataceae;D_5__Mycoplasma;D_6__Mycoplasma salivarium ATCC 23064 | \n",
" D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mycoplasmatales;D_4__Mycoplasmataceae;D_5__Mycoplasma;__ | \n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;D_6__uncultured bacterium | \n",
" D_0__Bacteria;__;__;__;__;__;__ | \n",
" Unassigned;__;__;__;__;__;__ | \n",
"
\n",
" \n",
" index | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 141 | \n",
" 0.542056 | \n",
" 0.542056 | \n",
" 0.542056 | \n",
" 0.542056 | \n",
" 0.542056 | \n",
" 0.542056 | \n",
" 5.420561e-01 | \n",
" 0.542056 | \n",
" 0.542056 | \n",
" 0.542056 | \n",
" ... | \n",
" 0.542056 | \n",
" 0.542056 | \n",
" 0.542056 | \n",
" 0.542056 | \n",
" 0.542056 | \n",
" 0.542056 | \n",
" 0.542056 | \n",
" 0.542056 | \n",
" 0.542056 | \n",
" 0.542056 | \n",
"
\n",
" \n",
" 142 | \n",
" 0.256053 | \n",
" 0.256053 | \n",
" 0.256053 | \n",
" 0.256053 | \n",
" 0.256053 | \n",
" 0.256053 | \n",
" 2.560533e-01 | \n",
" 0.256053 | \n",
" 0.256053 | \n",
" 0.256053 | \n",
" ... | \n",
" 0.256053 | \n",
" 840.110790 | \n",
" 0.256053 | \n",
" 0.256053 | \n",
" 0.256053 | \n",
" 0.256053 | \n",
" 0.256053 | \n",
" 0.256053 | \n",
" 0.256053 | \n",
" 0.256053 | \n",
"
\n",
" \n",
" 144 | \n",
" 2.677622 | \n",
" 2.677622 | \n",
" 2.677622 | \n",
" 2.677622 | \n",
" 2.677622 | \n",
" 2.677622 | \n",
" 2.677622e+00 | \n",
" 2.677622 | \n",
" 2.677622 | \n",
" 2.677622 | \n",
" ... | \n",
" 2.677622 | \n",
" 2.677622 | \n",
" 2.677622 | \n",
" 2.677622 | \n",
" 2.677622 | \n",
" 2.677622 | \n",
" 2.677622 | \n",
" 2.677622 | \n",
" 672.083146 | \n",
" 2.677622 | \n",
"
\n",
" \n",
" 145 | \n",
" 23.215800 | \n",
" 23.215800 | \n",
" 23.215800 | \n",
" 23.215800 | \n",
" 23.215800 | \n",
" 23.215800 | \n",
" 2.321580e+01 | \n",
" 23.215800 | \n",
" 23.215800 | \n",
" 23.215800 | \n",
" ... | \n",
" 23.215800 | \n",
" 23.215800 | \n",
" 23.215800 | \n",
" 23.215800 | \n",
" 23.215800 | \n",
" 23.215800 | \n",
" 23.215800 | \n",
" 23.215800 | \n",
" 23.215800 | \n",
" 23.215800 | \n",
"
\n",
" \n",
" 146 | \n",
" 1.601897 | \n",
" 177.810588 | \n",
" 1491.366284 | \n",
" 1.601897 | \n",
" 1.601897 | \n",
" 1.601897 | \n",
" 1.203025e+03 | \n",
" 289.943391 | \n",
" 1.601897 | \n",
" 1.601897 | \n",
" ... | \n",
" 1.601897 | \n",
" 1.601897 | \n",
" 1.601897 | \n",
" 1.601897 | \n",
" 1.601897 | \n",
" 1.601897 | \n",
" 1.601897 | \n",
" 1.601897 | \n",
" 1.601897 | \n",
" 1.601897 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 446 | \n",
" 1.573802 | \n",
" 1.573802 | \n",
" 1.573802 | \n",
" 1.573802 | \n",
" 1.573802 | \n",
" 1.573802 | \n",
" 1.573802e+00 | \n",
" 1.573802 | \n",
" 1.573802 | \n",
" 1.573802 | \n",
" ... | \n",
" 1.573802 | \n",
" 1.573802 | \n",
" 1.573802 | \n",
" 1.573802 | \n",
" 1.573802 | \n",
" 1.573802 | \n",
" 1.573802 | \n",
" 1.573802 | \n",
" 1.573802 | \n",
" 1.573802 | \n",
"
\n",
" \n",
" 447 | \n",
" 28.031160 | \n",
" 28.031160 | \n",
" 3672.081944 | \n",
" 28.031160 | \n",
" 28.031160 | \n",
" 28.031160 | \n",
" 5.073640e+03 | \n",
" 28.031160 | \n",
" 28.031160 | \n",
" 28.031160 | \n",
" ... | \n",
" 28.031160 | \n",
" 28.031160 | \n",
" 28.031160 | \n",
" 28.031160 | \n",
" 28.031160 | \n",
" 28.031160 | \n",
" 28.031160 | \n",
" 28.031160 | \n",
" 28.031160 | \n",
" 28.031160 | \n",
"
\n",
" \n",
" 448 | \n",
" 9.311661 | \n",
" 9.311661 | \n",
" 1219.827534 | \n",
" 9.311661 | \n",
" 9.311661 | \n",
" 9.311661 | \n",
" 1.312944e+03 | \n",
" 9.311661 | \n",
" 9.311661 | \n",
" 9.311661 | \n",
" ... | \n",
" 9.311661 | \n",
" 9.311661 | \n",
" 9.311661 | \n",
" 9.311661 | \n",
" 9.311661 | \n",
" 9.311661 | \n",
" 9.311661 | \n",
" 9.311661 | \n",
" 9.311661 | \n",
" 9.311661 | \n",
"
\n",
" \n",
" 449 | \n",
" 1.855765 | \n",
" 1.855765 | \n",
" 13270.577008 | \n",
" 1.855765 | \n",
" 1.855765 | \n",
" 1.855765 | \n",
" 2.423815e+04 | \n",
" 1.855765 | \n",
" 1.855765 | \n",
" 1.855765 | \n",
" ... | \n",
" 1.855765 | \n",
" 1.855765 | \n",
" 1.855765 | \n",
" 1.855765 | \n",
" 1.855765 | \n",
" 1.855765 | \n",
" 1.855765 | \n",
" 1.855765 | \n",
" 1.855765 | \n",
" 1.855765 | \n",
"
\n",
" \n",
" 451 | \n",
" 1057.959039 | \n",
" 1057.959039 | \n",
" 1057.959039 | \n",
" 1057.959039 | \n",
" 1057.959039 | \n",
" 1057.959039 | \n",
" 1.429303e+06 | \n",
" 1057.959039 | \n",
" 1057.959039 | \n",
" 1057.959039 | \n",
" ... | \n",
" 1057.959039 | \n",
" 1057.959039 | \n",
" 1057.959039 | \n",
" 96274.272512 | \n",
" 1057.959039 | \n",
" 1057.959039 | \n",
" 1057.959039 | \n",
" 1057.959039 | \n",
" 1057.959039 | \n",
" 1057.959039 | \n",
"
\n",
" \n",
"
\n",
"
250 rows × 546 columns
\n",
"
"
],
"text/plain": [
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Actinomycetales;D_4__Actinomycetaceae;D_5__F0332;D_6__uncultured bacterium \\\n",
"index \n",
"141 0.542056 \n",
"142 0.256053 \n",
"144 2.677622 \n",
"145 23.215800 \n",
"146 1.601897 \n",
"... ... \n",
"446 1.573802 \n",
"447 28.031160 \n",
"448 9.311661 \n",
"449 1.855765 \n",
"451 1057.959039 \n",
"\n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Bifidobacteriales;D_4__Bifidobacteriaceae;D_5__Alloscardovia;D_6__Bifidobacterium longum subsp. longum \\\n",
"index \n",
"141 0.542056 \n",
"142 0.256053 \n",
"144 2.677622 \n",
"145 23.215800 \n",
"146 177.810588 \n",
"... ... \n",
"446 1.573802 \n",
"447 28.031160 \n",
"448 9.311661 \n",
"449 1.855765 \n",
"451 1057.959039 \n",
"\n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Bifidobacteriales;D_4__Bifidobacteriaceae;D_5__Scardovia;D_6__unidentified \\\n",
"index \n",
"141 0.542056 \n",
"142 0.256053 \n",
"144 2.677622 \n",
"145 23.215800 \n",
"146 1491.366284 \n",
"... ... \n",
"446 1.573802 \n",
"447 3672.081944 \n",
"448 1219.827534 \n",
"449 13270.577008 \n",
"451 1057.959039 \n",
"\n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;D_6__Corynebacterium kroppenstedtii \\\n",
"index \n",
"141 0.542056 \n",
"142 0.256053 \n",
"144 2.677622 \n",
"145 23.215800 \n",
"146 1.601897 \n",
"... ... \n",
"446 1.573802 \n",
"447 28.031160 \n",
"448 9.311661 \n",
"449 1.855765 \n",
"451 1057.959039 \n",
"\n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;D_6__Corynebacterium pseudodiphtheriticum \\\n",
"index \n",
"141 0.542056 \n",
"142 0.256053 \n",
"144 2.677622 \n",
"145 23.215800 \n",
"146 1.601897 \n",
"... ... \n",
"446 1.573802 \n",
"447 28.031160 \n",
"448 9.311661 \n",
"449 1.855765 \n",
"451 1057.959039 \n",
"\n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;__ \\\n",
"index \n",
"141 0.542056 \n",
"142 0.256053 \n",
"144 2.677622 \n",
"145 23.215800 \n",
"146 1.601897 \n",
"... ... \n",
"446 1.573802 \n",
"447 28.031160 \n",
"448 9.311661 \n",
"449 1.855765 \n",
"451 1057.959039 \n",
"\n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium;D_6__Corynebacterium durum \\\n",
"index \n",
"141 5.420561e-01 \n",
"142 2.560533e-01 \n",
"144 2.677622e+00 \n",
"145 2.321580e+01 \n",
"146 1.203025e+03 \n",
"... ... \n",
"446 1.573802e+00 \n",
"447 5.073640e+03 \n",
"448 1.312944e+03 \n",
"449 2.423815e+04 \n",
"451 1.429303e+06 \n",
"\n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium;__ \\\n",
"index \n",
"141 0.542056 \n",
"142 0.256053 \n",
"144 2.677622 \n",
"145 23.215800 \n",
"146 289.943391 \n",
"... ... \n",
"446 1.573802 \n",
"447 28.031160 \n",
"448 9.311661 \n",
"449 1.855765 \n",
"451 1057.959039 \n",
"\n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Lawsonella;D_6__uncultured bacterium \\\n",
"index \n",
"141 0.542056 \n",
"142 0.256053 \n",
"144 2.677622 \n",
"145 23.215800 \n",
"146 1.601897 \n",
"... ... \n",
"446 1.573802 \n",
"447 28.031160 \n",
"448 9.311661 \n",
"449 1.855765 \n",
"451 1057.959039 \n",
"\n",
" D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;__;__ \\\n",
"index \n",
"141 0.542056 \n",
"142 0.256053 \n",
"144 2.677622 \n",
"145 23.215800 \n",
"146 1.601897 \n",
"... ... \n",
"446 1.573802 \n",
"447 28.031160 \n",
"448 9.311661 \n",
"449 1.855765 \n",
"451 1057.959039 \n",
"\n",
" ... \\\n",
"index ... \n",
"141 ... \n",
"142 ... \n",
"144 ... \n",
"145 ... \n",
"146 ... \n",
"... ... \n",
"446 ... \n",
"447 ... \n",
"448 ... \n",
"449 ... \n",
"451 ... \n",
"\n",
" D_0__Bacteria;D_1__Synergistetes;D_2__Synergistia;D_3__Synergistales;D_4__Synergistaceae;D_5__Pyramidobacter;D_6__Pyramidobacter piscolens \\\n",
"index \n",
"141 0.542056 \n",
"142 0.256053 \n",
"144 2.677622 \n",
"145 23.215800 \n",
"146 1.601897 \n",
"... ... \n",
"446 1.573802 \n",
"447 28.031160 \n",
"448 9.311661 \n",
"449 1.855765 \n",
"451 1057.959039 \n",
"\n",
" D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__Firmicutes oral clone FM046;D_5__Firmicutes oral clone FM046;D_6__Firmicutes oral clone FM046 \\\n",
"index \n",
"141 0.542056 \n",
"142 840.110790 \n",
"144 2.677622 \n",
"145 23.215800 \n",
"146 1.601897 \n",
"... ... \n",
"446 1.573802 \n",
"447 28.031160 \n",
"448 9.311661 \n",
"449 1.855765 \n",
"451 1057.959039 \n",
"\n",
" D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__gut metagenome;D_5__gut metagenome;D_6__gut metagenome \\\n",
"index \n",
"141 0.542056 \n",
"142 0.256053 \n",
"144 2.677622 \n",
"145 23.215800 \n",
"146 1.601897 \n",
"... ... \n",
"446 1.573802 \n",
"447 28.031160 \n",
"448 9.311661 \n",
"449 1.855765 \n",
"451 1057.959039 \n",
"\n",
" D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__uncultured bacterium;D_5__uncultured bacterium;D_6__uncultured bacterium \\\n",
"index \n",
"141 0.542056 \n",
"142 0.256053 \n",
"144 2.677622 \n",
"145 23.215800 \n",
"146 1.601897 \n",
"... ... \n",
"446 1.573802 \n",
"447 28.031160 \n",
"448 9.311661 \n",
"449 1.855765 \n",
"451 96274.272512 \n",
"\n",
" D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;__;__;__ \\\n",
"index \n",
"141 0.542056 \n",
"142 0.256053 \n",
"144 2.677622 \n",
"145 23.215800 \n",
"146 1.601897 \n",
"... ... \n",
"446 1.573802 \n",
"447 28.031160 \n",
"448 9.311661 \n",
"449 1.855765 \n",
"451 1057.959039 \n",
"\n",
" D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mycoplasmatales;D_4__Mycoplasmataceae;D_5__Mycoplasma;D_6__Mycoplasma salivarium ATCC 23064 \\\n",
"index \n",
"141 0.542056 \n",
"142 0.256053 \n",
"144 2.677622 \n",
"145 23.215800 \n",
"146 1.601897 \n",
"... ... \n",
"446 1.573802 \n",
"447 28.031160 \n",
"448 9.311661 \n",
"449 1.855765 \n",
"451 1057.959039 \n",
"\n",
" D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mycoplasmatales;D_4__Mycoplasmataceae;D_5__Mycoplasma;__ \\\n",
"index \n",
"141 0.542056 \n",
"142 0.256053 \n",
"144 2.677622 \n",
"145 23.215800 \n",
"146 1.601897 \n",
"... ... \n",
"446 1.573802 \n",
"447 28.031160 \n",
"448 9.311661 \n",
"449 1.855765 \n",
"451 1057.959039 \n",
"\n",
" D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;D_6__uncultured bacterium \\\n",
"index \n",
"141 0.542056 \n",
"142 0.256053 \n",
"144 2.677622 \n",
"145 23.215800 \n",
"146 1.601897 \n",
"... ... \n",
"446 1.573802 \n",
"447 28.031160 \n",
"448 9.311661 \n",
"449 1.855765 \n",
"451 1057.959039 \n",
"\n",
" D_0__Bacteria;__;__;__;__;__;__ Unassigned;__;__;__;__;__;__ \n",
"index \n",
"141 0.542056 0.542056 \n",
"142 0.256053 0.256053 \n",
"144 672.083146 2.677622 \n",
"145 23.215800 23.215800 \n",
"146 1.601897 1.601897 \n",
"... ... ... \n",
"446 1.573802 1.573802 \n",
"447 28.031160 28.031160 \n",
"448 9.311661 9.311661 \n",
"449 1.855765 1.855765 \n",
"451 1057.959039 1057.959039 \n",
"\n",
"[250 rows x 546 columns]"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_rel_lod_list = [None]*6\n",
"df_abs_lod_list = [None]*6\n",
"\n",
"df_pseudo_rel_lod_list = [None]*6\n",
"df_pseudo_abs_lod_list = [None]*6\n",
"\n",
"for index, df in enumerate(df_lod_list):\n",
" df_rel_lod_list[index] = df.div(read_depth, axis=0).multiply(100)\n",
" df_abs_lod_list[index] = df_rel_lod_list[index].apply(lambda x: x*total_load_dict[x.name], 1).div(100)\n",
" \n",
" df_pseudo_rel_lod_list[index] = df_rel_lod_list[index]+(0.1/read_depth)*100\n",
" #df_pseudo_abs_lod_list[index] = df_pseudo_rel_lod_list[index].multiply(1e4).div(100)\n",
" df_pseudo_abs_lod_list[index] = df_pseudo_rel_lod_list[index].apply(lambda x: x*total_load_dict[x.name], 1).div(100)\n",
"\n",
"df_pseudo_abs_lod_list[5]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Transform column taxa names into unique IDs.\n",
"This overcomes downstream issue when multiple columns have the same name"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" taxonomy | \n",
"
\n",
" \n",
" \n",
" \n",
" ASV0 | \n",
" D_0__Bacteria;D_1__Actinobacteria | \n",
"
\n",
" \n",
" ASV1 | \n",
" D_0__Bacteria;D_1__Bacteroidetes | \n",
"
\n",
" \n",
" ASV2 | \n",
" D_0__Bacteria;D_1__Chloroflexi | \n",
"
\n",
" \n",
" ASV3 | \n",
" D_0__Bacteria;D_1__Cyanobacteria | \n",
"
\n",
" \n",
" ASV4 | \n",
" D_0__Bacteria;D_1__Epsilonbacteraeota | \n",
"
\n",
" \n",
" ASV5 | \n",
" D_0__Bacteria;D_1__Firmicutes | \n",
"
\n",
" \n",
" ASV6 | \n",
" D_0__Bacteria;D_1__Fusobacteria | \n",
"
\n",
" \n",
" ASV7 | \n",
" D_0__Bacteria;D_1__Patescibacteria | \n",
"
\n",
" \n",
" ASV8 | \n",
" D_0__Bacteria;D_1__Proteobacteria | \n",
"
\n",
" \n",
" ASV9 | \n",
" D_0__Bacteria;D_1__Spirochaetes | \n",
"
\n",
" \n",
" ASV10 | \n",
" D_0__Bacteria;D_1__Synergistetes | \n",
"
\n",
" \n",
" ASV11 | \n",
" D_0__Bacteria;D_1__Tenericutes | \n",
"
\n",
" \n",
" ASV12 | \n",
" D_0__Bacteria;D_1__Verrucomicrobia | \n",
"
\n",
" \n",
" ASV13 | \n",
" D_0__Bacteria;__ | \n",
"
\n",
" \n",
" ASV14 | \n",
" Unassigned;__ | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" taxonomy\n",
"ASV0 D_0__Bacteria;D_1__Actinobacteria\n",
"ASV1 D_0__Bacteria;D_1__Bacteroidetes\n",
"ASV2 D_0__Bacteria;D_1__Chloroflexi\n",
"ASV3 D_0__Bacteria;D_1__Cyanobacteria\n",
"ASV4 D_0__Bacteria;D_1__Epsilonbacteraeota\n",
"ASV5 D_0__Bacteria;D_1__Firmicutes\n",
"ASV6 D_0__Bacteria;D_1__Fusobacteria\n",
"ASV7 D_0__Bacteria;D_1__Patescibacteria\n",
"ASV8 D_0__Bacteria;D_1__Proteobacteria\n",
"ASV9 D_0__Bacteria;D_1__Spirochaetes\n",
"ASV10 D_0__Bacteria;D_1__Synergistetes\n",
"ASV11 D_0__Bacteria;D_1__Tenericutes\n",
"ASV12 D_0__Bacteria;D_1__Verrucomicrobia\n",
"ASV13 D_0__Bacteria;__\n",
"ASV14 Unassigned;__"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_col_names_lod_list = [None]*6\n",
"\n",
"for index, df in enumerate(df_rel_lod_list):\n",
" num_cols = len(df.columns)\n",
" col_names = ['ASV' + str(x) for x in range(num_cols)]\n",
" df_col_names_lod_list[index] = pd.DataFrame(index=col_names, data={'taxonomy':df.columns.tolist()})\n",
" \n",
" df_rel_lod_list[index].columns = col_names\n",
" df_abs_lod_list[index].columns = col_names\n",
" \n",
" df_pseudo_rel_lod_list[index].columns = col_names\n",
" df_pseudo_abs_lod_list[index].columns = col_names\n",
" \n",
"df_col_names_lod_list[0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Generate shorter taxonomy names for plotting purposes"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"exclusion_list = ['', 'uncultured bacterium', 'metagenome', 'uncultured', \n",
" 'gut metagenome', 'uncultured organism', 'unidentified', \n",
" 'uncultured Bacteroidales bacterium', 'uncultured Mollicutes bacterium', 'uncultured archaeon']\n",
"\n",
"for i in range(6):\n",
" if i == 0:\n",
" df_col_names_lod_list[i][['Kingdom', 'Phylum']] = df_col_names_lod_list[i]['taxonomy'].str.split(';', expand=True)\n",
" elif i==1:\n",
" df_col_names_lod_list[i][['Kingdom', 'Phylum', 'Class']] = df_col_names_lod_list[i]['taxonomy'].str.split(';', expand=True)\n",
" elif i==2:\n",
" df_col_names_lod_list[i][['Kingdom', 'Phylum', 'Class', 'Order']] = df_col_names_lod_list[i]['taxonomy'].str.split(';', expand=True)\n",
" elif i==3:\n",
" df_col_names_lod_list[i][['Kingdom', 'Phylum', 'Class', 'Order', 'Family']] = df_col_names_lod_list[i]['taxonomy'].str.split(';', expand=True)\n",
" elif i==4:\n",
" df_col_names_lod_list[i][['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus']] = df_col_names_lod_list[i]['taxonomy'].str.split(';', expand=True)\n",
" else:\n",
" df_col_names_lod_list[i][['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']] = df_col_names_lod_list[i]['taxonomy'].str.split(';', expand=True)\n",
" \n",
" labels_list = []\n",
" for index, row in df_col_names_lod_list[i].iterrows():\n",
" # Species\n",
" if row[-1][5:] in exclusion_list:\n",
" # Genus\n",
" if row[-2][5:] in exclusion_list:\n",
" # Family\n",
" if row[-3][5:] in exclusion_list:\n",
" # Order\n",
" if row[-4][5:] in exclusion_list:\n",
" # Class\n",
" if row[-5][5:] in exclusion_list:\n",
" # Phylum\n",
" if row[-6][5:] in exclusion_list:\n",
" labels_list.append(row[-7][5:] + '(' + df_col_names_lod_list[i].columns[-7][0].lower() + ')')\n",
" else:\n",
" labels_list.append(row[-6][5:] + '(' + df_col_names_lod_list[i].columns[-6][0].lower() + ')')\n",
" else:\n",
" labels_list.append(row[-5][5:] + '(' + df_col_names_lod_list[i].columns[-5][0].lower() + ')')\n",
" else:\n",
" labels_list.append(row[-4][5:] + '(' + df_col_names_lod_list[i].columns[-4][0].lower() + ')')\n",
" else:\n",
" labels_list.append(row[-3][5:] + '(' + df_col_names_lod_list[i].columns[-3][0].lower() + ')')\n",
" else:\n",
" labels_list.append(row[-2][5:] + '(' + df_col_names_lod_list[i].columns[-2][0].lower() + ')')\n",
" else:\n",
" labels_list.append(row[-1][5:] + '(' + df_col_names_lod_list[i].columns[-1][0].lower() + ')')\n",
" \n",
" df_col_names_lod_list[i]['label'] = labels_list"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Sort the columns by the max abundance of taxa across all samples"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ASV76 | \n",
" ASV259 | \n",
" ASV255 | \n",
" ASV191 | \n",
" ASV247 | \n",
" ASV29 | \n",
" ASV188 | \n",
" ASV241 | \n",
" ASV61 | \n",
" ASV81 | \n",
" ... | \n",
" ASV256 | \n",
" ASV111 | \n",
" ASV262 | \n",
" ASV89 | \n",
" ASV226 | \n",
" ASV269 | \n",
" ASV151 | \n",
" ASV229 | \n",
" ASV180 | \n",
" ASV31 | \n",
"
\n",
" \n",
" index | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 141 | \n",
" 2.577477e+04 | \n",
" 7252.710875 | \n",
" 0.000000e+00 | \n",
" 1.036953e+04 | \n",
" 0.000000e+00 | \n",
" 76511.221224 | \n",
" 3.498972e+04 | \n",
" 3908.224619 | \n",
" 6.016823e+02 | \n",
" 0.000000e+00 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 54.205612 | \n",
"
\n",
" \n",
" 142 | \n",
" 3.738122e+04 | \n",
" 0.000000 | \n",
" 0.000000e+00 | \n",
" 1.258758e+04 | \n",
" 0.000000e+00 | \n",
" 6001.888725 | \n",
" 1.603150e+04 | \n",
" 0.000000 | \n",
" 3.774225e+03 | \n",
" 0.000000e+00 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 144 | \n",
" 1.877013e+04 | \n",
" 267.762210 | \n",
" 2.583905e+04 | \n",
" 0.000000e+00 | \n",
" 1.155207e+06 | \n",
" 0.000000 | \n",
" 2.677622e+02 | \n",
" 0.000000 | \n",
" 9.103915e+02 | \n",
" 2.945384e+02 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 145 | \n",
" 1.950127e+04 | \n",
" 808142.011487 | \n",
" 5.256522e+06 | \n",
" 2.321580e+03 | \n",
" 0.000000e+00 | \n",
" 0.000000 | \n",
" 1.085107e+06 | \n",
" 3018.054050 | \n",
" 2.785896e+03 | \n",
" 1.798064e+06 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 146 | \n",
" 3.716241e+05 | \n",
" 17124.280966 | \n",
" 5.446450e+02 | \n",
" 1.728447e+04 | \n",
" 0.000000e+00 | \n",
" 8538.112025 | \n",
" 3.996733e+04 | \n",
" 20087.790768 | \n",
" 2.745652e+04 | \n",
" 0.000000e+00 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 446 | \n",
" 3.187421e+05 | \n",
" 13817.980372 | \n",
" 0.000000e+00 | \n",
" 5.738082e+04 | \n",
" 0.000000e+00 | \n",
" 38778.477945 | \n",
" 6.306224e+04 | \n",
" 19121.692655 | \n",
" 3.322296e+04 | \n",
" 0.000000e+00 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 447 | \n",
" 8.643969e+06 | \n",
" 22424.927902 | \n",
" 0.000000e+00 | \n",
" 9.110127e+04 | \n",
" 0.000000e+00 | \n",
" 58865.435744 | \n",
" 7.904787e+05 | \n",
" 15977.761130 | \n",
" 5.894953e+05 | \n",
" 0.000000e+00 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 448 | \n",
" 1.287523e+06 | \n",
" 290989.392571 | \n",
" 0.000000e+00 | \n",
" 8.101145e+04 | \n",
" 0.000000e+00 | \n",
" 772774.710062 | \n",
" 3.945351e+05 | \n",
" 112671.092803 | \n",
" 4.614859e+05 | \n",
" 0.000000e+00 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 449 | \n",
" 4.342119e+05 | \n",
" 83991.933352 | \n",
" 0.000000e+00 | \n",
" 2.551677e+04 | \n",
" 0.000000e+00 | \n",
" 0.000000 | \n",
" 3.099128e+03 | \n",
" 27780.805176 | \n",
" 3.305118e+04 | \n",
" 0.000000e+00 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 451 | \n",
" 3.000689e+08 | \n",
" 528979.519297 | \n",
" 0.000000e+00 | \n",
" 1.263203e+07 | \n",
" 0.000000e+00 | \n",
" 0.000000 | \n",
" 4.528065e+06 | \n",
" 740571.327016 | \n",
" 1.153916e+08 | \n",
" 0.000000e+00 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
"
\n",
" \n",
"
\n",
"
250 rows × 286 columns
\n",
"
"
],
"text/plain": [
" ASV76 ASV259 ASV255 ASV191 ASV247 \\\n",
"index \n",
"141 2.577477e+04 7252.710875 0.000000e+00 1.036953e+04 0.000000e+00 \n",
"142 3.738122e+04 0.000000 0.000000e+00 1.258758e+04 0.000000e+00 \n",
"144 1.877013e+04 267.762210 2.583905e+04 0.000000e+00 1.155207e+06 \n",
"145 1.950127e+04 808142.011487 5.256522e+06 2.321580e+03 0.000000e+00 \n",
"146 3.716241e+05 17124.280966 5.446450e+02 1.728447e+04 0.000000e+00 \n",
"... ... ... ... ... ... \n",
"446 3.187421e+05 13817.980372 0.000000e+00 5.738082e+04 0.000000e+00 \n",
"447 8.643969e+06 22424.927902 0.000000e+00 9.110127e+04 0.000000e+00 \n",
"448 1.287523e+06 290989.392571 0.000000e+00 8.101145e+04 0.000000e+00 \n",
"449 4.342119e+05 83991.933352 0.000000e+00 2.551677e+04 0.000000e+00 \n",
"451 3.000689e+08 528979.519297 0.000000e+00 1.263203e+07 0.000000e+00 \n",
"\n",
" ASV29 ASV188 ASV241 ASV61 ASV81 \\\n",
"index \n",
"141 76511.221224 3.498972e+04 3908.224619 6.016823e+02 0.000000e+00 \n",
"142 6001.888725 1.603150e+04 0.000000 3.774225e+03 0.000000e+00 \n",
"144 0.000000 2.677622e+02 0.000000 9.103915e+02 2.945384e+02 \n",
"145 0.000000 1.085107e+06 3018.054050 2.785896e+03 1.798064e+06 \n",
"146 8538.112025 3.996733e+04 20087.790768 2.745652e+04 0.000000e+00 \n",
"... ... ... ... ... ... \n",
"446 38778.477945 6.306224e+04 19121.692655 3.322296e+04 0.000000e+00 \n",
"447 58865.435744 7.904787e+05 15977.761130 5.894953e+05 0.000000e+00 \n",
"448 772774.710062 3.945351e+05 112671.092803 4.614859e+05 0.000000e+00 \n",
"449 0.000000 3.099128e+03 27780.805176 3.305118e+04 0.000000e+00 \n",
"451 0.000000 4.528065e+06 740571.327016 1.153916e+08 0.000000e+00 \n",
"\n",
" ... ASV256 ASV111 ASV262 ASV89 ASV226 ASV269 ASV151 ASV229 \\\n",
"index ... \n",
"141 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"142 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"144 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"145 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"146 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... ... ... ... ... ... \n",
"446 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"447 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"448 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"449 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"451 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"\n",
" ASV180 ASV31 \n",
"index \n",
"141 0.0 54.205612 \n",
"142 0.0 0.000000 \n",
"144 0.0 0.000000 \n",
"145 0.0 0.000000 \n",
"146 0.0 0.000000 \n",
"... ... ... \n",
"446 0.0 0.000000 \n",
"447 0.0 0.000000 \n",
"448 0.0 0.000000 \n",
"449 0.0 0.000000 \n",
"451 0.0 0.000000 \n",
"\n",
"[250 rows x 286 columns]"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_rel_sort_lod_list = [None]*6\n",
"df_abs_sort_lod_list = [None]*6\n",
"\n",
"df_pseudo_rel_sort_lod_list = [None]*6\n",
"df_pseudo_abs_sort_lod_list = [None]*6\n",
"\n",
"for i in range(6):\n",
" taxa_sorted = df_abs_lod_list[i].mean().sort_values(ascending=False).index\n",
" \n",
" df_rel_sort_lod_list[i] = df_rel_lod_list[i].loc[:, taxa_sorted]\n",
" df_abs_sort_lod_list[i] = df_abs_lod_list[i].loc[:, taxa_sorted]\n",
" \n",
" df_pseudo_rel_sort_lod_list[i] = df_pseudo_rel_lod_list[i].loc[:, taxa_sorted]\n",
" df_pseudo_abs_sort_lod_list[i] = df_pseudo_abs_lod_list[i].loc[:, taxa_sorted]\n",
" \n",
"df_abs_sort_lod_list[4]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Save the working files to allow use in individual analysis workbooks"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"pickle.dump(df_rel_sort_lod_list, open('pickle_files/rel_sort_lod_list.pkl', 'wb'))\n",
"pickle.dump(df_abs_sort_lod_list, open('pickle_files/abs_sort_lod_list.pkl', 'wb'))\n",
"\n",
"pickle.dump(df_pseudo_rel_sort_lod_list, open('pickle_files/pseudo_rel_sort_lod_list.pkl', 'wb'))\n",
"pickle.dump(df_pseudo_abs_sort_lod_list, open('pickle_files/pseudo_abs_sort_lod_list.pkl', 'wb'))\n",
"\n",
"pickle.dump(df_col_names_lod_list, open('pickle_files/col_names_lod_list.pkl', 'wb'))\n",
"pickle.dump(df_total_load, open('pickle_files/total_load_duodenum.pkl', 'wb'))\n",
"pickle.dump(seq_metadata, open('pickle_files/seq_duodenum_metadata.pkl', 'wb'))"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" taxonomy | \n",
" Kingdom | \n",
" Phylum | \n",
" Class | \n",
" Order | \n",
" Family | \n",
" Genus | \n",
" Species | \n",
" label | \n",
"
\n",
" \n",
" \n",
" \n",
" ASV193 | \n",
" D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3... | \n",
" D_0__Bacteria | \n",
" D_1__Firmicutes | \n",
" D_2__Bacilli | \n",
" D_3__Lactobacillales | \n",
" D_4__Streptococcaceae | \n",
" D_5__Streptococcus | \n",
" __ | \n",
" Streptococcus(g) | \n",
"
\n",
" \n",
" ASV501 | \n",
" D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr... | \n",
" D_0__Bacteria | \n",
" D_1__Proteobacteria | \n",
" D_2__Gammaproteobacteria | \n",
" D_3__Pasteurellales | \n",
" D_4__Pasteurellaceae | \n",
" D_5__Haemophilus | \n",
" __ | \n",
" Haemophilus(g) | \n",
"
\n",
" \n",
" ASV494 | \n",
" D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr... | \n",
" D_0__Bacteria | \n",
" D_1__Proteobacteria | \n",
" D_2__Gammaproteobacteria | \n",
" D_3__Enterobacteriales | \n",
" D_4__Enterobacteriaceae | \n",
" __ | \n",
" __ | \n",
" Enterobacteriaceae(f) | \n",
"
\n",
" \n",
" ASV404 | \n",
" D_0__Bacteria;D_1__Fusobacteria;D_2__Fusobacte... | \n",
" D_0__Bacteria | \n",
" D_1__Fusobacteria | \n",
" D_2__Fusobacteriia | \n",
" D_3__Fusobacteriales | \n",
" D_4__Fusobacteriaceae | \n",
" D_5__Fusobacterium | \n",
" __ | \n",
" Fusobacterium(g) | \n",
"
\n",
" \n",
" ASV486 | \n",
" D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr... | \n",
" D_0__Bacteria | \n",
" D_1__Proteobacteria | \n",
" D_2__Gammaproteobacteria | \n",
" D_3__Enterobacteriales | \n",
" D_4__Enterobacteriaceae | \n",
" D_5__Escherichia-Shigella | \n",
" __ | \n",
" Escherichia-Shigella(g) | \n",
"
\n",
" \n",
" ASV397 | \n",
" D_0__Bacteria;D_1__Firmicutes;D_2__Negativicut... | \n",
" D_0__Bacteria | \n",
" D_1__Firmicutes | \n",
" D_2__Negativicutes | \n",
" D_3__Selenomonadales | \n",
" D_4__Veillonellaceae | \n",
" D_5__Veillonella | \n",
" __ | \n",
" Veillonella(g) | \n",
"
\n",
" \n",
" ASV477 | \n",
" D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr... | \n",
" D_0__Bacteria | \n",
" D_1__Proteobacteria | \n",
" D_2__Gammaproteobacteria | \n",
" D_3__Betaproteobacteriales | \n",
" D_4__Neisseriaceae | \n",
" D_5__Neisseria | \n",
" D_6__uncultured bacterium | \n",
" Neisseria(g) | \n",
"
\n",
" \n",
" ASV67 | \n",
" D_0__Bacteria;D_1__Bacteroidetes;D_2__Bacteroi... | \n",
" D_0__Bacteria | \n",
" D_1__Bacteroidetes | \n",
" D_2__Bacteroidia | \n",
" D_3__Bacteroidales | \n",
" D_4__Prevotellaceae | \n",
" D_5__Prevotella 7 | \n",
" D_6__Prevotella melaninogenica | \n",
" Prevotella melaninogenica(s) | \n",
"
\n",
" \n",
" ASV155 | \n",
" D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3... | \n",
" D_0__Bacteria | \n",
" D_1__Firmicutes | \n",
" D_2__Bacilli | \n",
" D_3__Bacillales | \n",
" D_4__Family XI | \n",
" D_5__Gemella | \n",
" __ | \n",
" Gemella(g) | \n",
"
\n",
" \n",
" ASV206 | \n",
" D_0__Bacteria;D_1__Firmicutes;D_2__Clostridia;... | \n",
" D_0__Bacteria | \n",
" D_1__Firmicutes | \n",
" D_2__Clostridia | \n",
" D_3__Clostridiales | \n",
" D_4__Clostridiaceae 1 | \n",
" D_5__Clostridium sensu stricto 1 | \n",
" __ | \n",
" Clostridium sensu stricto 1(g) | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" taxonomy Kingdom \\\n",
"ASV193 D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3... D_0__Bacteria \n",
"ASV501 D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr... D_0__Bacteria \n",
"ASV494 D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr... D_0__Bacteria \n",
"ASV404 D_0__Bacteria;D_1__Fusobacteria;D_2__Fusobacte... D_0__Bacteria \n",
"ASV486 D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr... D_0__Bacteria \n",
"ASV397 D_0__Bacteria;D_1__Firmicutes;D_2__Negativicut... D_0__Bacteria \n",
"ASV477 D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr... D_0__Bacteria \n",
"ASV67 D_0__Bacteria;D_1__Bacteroidetes;D_2__Bacteroi... D_0__Bacteria \n",
"ASV155 D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3... D_0__Bacteria \n",
"ASV206 D_0__Bacteria;D_1__Firmicutes;D_2__Clostridia;... D_0__Bacteria \n",
"\n",
" Phylum Class \\\n",
"ASV193 D_1__Firmicutes D_2__Bacilli \n",
"ASV501 D_1__Proteobacteria D_2__Gammaproteobacteria \n",
"ASV494 D_1__Proteobacteria D_2__Gammaproteobacteria \n",
"ASV404 D_1__Fusobacteria D_2__Fusobacteriia \n",
"ASV486 D_1__Proteobacteria D_2__Gammaproteobacteria \n",
"ASV397 D_1__Firmicutes D_2__Negativicutes \n",
"ASV477 D_1__Proteobacteria D_2__Gammaproteobacteria \n",
"ASV67 D_1__Bacteroidetes D_2__Bacteroidia \n",
"ASV155 D_1__Firmicutes D_2__Bacilli \n",
"ASV206 D_1__Firmicutes D_2__Clostridia \n",
"\n",
" Order Family \\\n",
"ASV193 D_3__Lactobacillales D_4__Streptococcaceae \n",
"ASV501 D_3__Pasteurellales D_4__Pasteurellaceae \n",
"ASV494 D_3__Enterobacteriales D_4__Enterobacteriaceae \n",
"ASV404 D_3__Fusobacteriales D_4__Fusobacteriaceae \n",
"ASV486 D_3__Enterobacteriales D_4__Enterobacteriaceae \n",
"ASV397 D_3__Selenomonadales D_4__Veillonellaceae \n",
"ASV477 D_3__Betaproteobacteriales D_4__Neisseriaceae \n",
"ASV67 D_3__Bacteroidales D_4__Prevotellaceae \n",
"ASV155 D_3__Bacillales D_4__Family XI \n",
"ASV206 D_3__Clostridiales D_4__Clostridiaceae 1 \n",
"\n",
" Genus Species \\\n",
"ASV193 D_5__Streptococcus __ \n",
"ASV501 D_5__Haemophilus __ \n",
"ASV494 __ __ \n",
"ASV404 D_5__Fusobacterium __ \n",
"ASV486 D_5__Escherichia-Shigella __ \n",
"ASV397 D_5__Veillonella __ \n",
"ASV477 D_5__Neisseria D_6__uncultured bacterium \n",
"ASV67 D_5__Prevotella 7 D_6__Prevotella melaninogenica \n",
"ASV155 D_5__Gemella __ \n",
"ASV206 D_5__Clostridium sensu stricto 1 __ \n",
"\n",
" label \n",
"ASV193 Streptococcus(g) \n",
"ASV501 Haemophilus(g) \n",
"ASV494 Enterobacteriaceae(f) \n",
"ASV404 Fusobacterium(g) \n",
"ASV486 Escherichia-Shigella(g) \n",
"ASV397 Veillonella(g) \n",
"ASV477 Neisseria(g) \n",
"ASV67 Prevotella melaninogenica(s) \n",
"ASV155 Gemella(g) \n",
"ASV206 Clostridium sensu stricto 1(g) "
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_col_names_lod_list[5].loc[df_abs_sort_lod_list[5].columns[:10].tolist()]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.10"
}
},
"nbformat": 4,
"nbformat_minor": 4
}