{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import pickle" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load in read count data (Qiime2 taxa barplot csv files)\n", "\n", "Samples were subsampled to 45,386 reads. Samples with less than this number of reads after DADA2 processing were removed." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "df_seq_orig_species = pd.read_csv('data_files/species_counts_duodenum_45386.csv').set_index('index')\n", "read_depth = df_seq_orig_species.sum(axis=1)[0]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "45386.0" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "read_depth" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Set the number of metadata columns in the sequencing data" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "num_metadata_cols = 2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Remove the duplicate sequencing samples and rename the columns with '_Duo' in the name just to the sample ID" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
D_0__Archaea;D_1__Euryarchaeota;D_2__Halobacteria;D_3__Halobacteriales;D_4__Haloferacaceae;D_5__Halopenitus;D_6__Halopenitus persicusD_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobacterium;D_6__Methanobacterium formicicumD_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobacterium;D_6__uncultured archaeonD_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobrevibacter;__D_0__Archaea;D_1__Nanoarchaeaeota;D_2__Woesearchaeia;__;__;__;__D_0__Bacteria;D_1__Acidobacteria;D_2__Blastocatellia (Subgroup 4);D_3__Blastocatellales;D_4__Blastocatellaceae;D_5__Blastocatella;__D_0__Bacteria;D_1__Acidobacteria;D_2__Blastocatellia (Subgroup 4);D_3__Blastocatellales;D_4__Blastocatellaceae;D_5__uncultured;__D_0__Bacteria;D_1__Acidobacteria;D_2__Holophagae;D_3__Subgroup 7;__;__;__D_0__Bacteria;D_1__Actinobacteria;D_2__Acidimicrobiia;D_3__Microtrichales;D_4__Microtrichaceae;D_5__IMCC26207;D_6__uncultured bacteriumD_0__Bacteria;D_1__Actinobacteria;D_2__Acidimicrobiia;D_3__Microtrichales;D_4__uncultured;D_5__uncultured Acidimicrobidae bacterium;D_6__uncultured Acidimicrobidae bacterium...D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Opitutales;D_4__Opitutaceae;D_5__Opitutus;D_6__uncultured Verrucomicrobia bacteriumD_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Opitutales;D_4__Opitutaceae;__;__D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;D_6__uncultured bacteriumD_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;__D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Verrucomicrobiaceae;D_5__uncultured;D_6__uncultured bacteriumD_0__Bacteria;D_1__WPS-2;D_2__metagenome;D_3__metagenome;D_4__metagenome;D_5__metagenome;D_6__metagenomeD_0__Bacteria;__;__;__;__;__;__Unassigned;__;__;__;__;__;__DescriptionBody_Site
index
1410.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.0141Duodenum
1420.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.017.00.0142Duodenum
1440.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.025.00.0144Duodenum
1450.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.0145Duodenum
1460.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.0146Duodenum
..................................................................
4460.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.070.00.0446_DuoDuodenum
4470.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.0447_DuoDuodenum
4480.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.0448_DuoDuodenum
4490.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.0449_DuoDuodenum
4510.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.0451_DuoDuodenum
\n", "

254 rows × 1068 columns

\n", "
" ], "text/plain": [ " D_0__Archaea;D_1__Euryarchaeota;D_2__Halobacteria;D_3__Halobacteriales;D_4__Haloferacaceae;D_5__Halopenitus;D_6__Halopenitus persicus \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobacterium;D_6__Methanobacterium formicicum \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobacterium;D_6__uncultured archaeon \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobrevibacter;__ \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Archaea;D_1__Nanoarchaeaeota;D_2__Woesearchaeia;__;__;__;__ \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Acidobacteria;D_2__Blastocatellia (Subgroup 4);D_3__Blastocatellales;D_4__Blastocatellaceae;D_5__Blastocatella;__ \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Acidobacteria;D_2__Blastocatellia (Subgroup 4);D_3__Blastocatellales;D_4__Blastocatellaceae;D_5__uncultured;__ \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Acidobacteria;D_2__Holophagae;D_3__Subgroup 7;__;__;__ \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Actinobacteria;D_2__Acidimicrobiia;D_3__Microtrichales;D_4__Microtrichaceae;D_5__IMCC26207;D_6__uncultured bacterium \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Actinobacteria;D_2__Acidimicrobiia;D_3__Microtrichales;D_4__uncultured;D_5__uncultured Acidimicrobidae bacterium;D_6__uncultured Acidimicrobidae bacterium \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " ... \\\n", "index ... \n", "141 ... \n", "142 ... \n", "144 ... \n", "145 ... \n", "146 ... \n", "... ... \n", "446 ... \n", "447 ... \n", "448 ... \n", "449 ... \n", "451 ... \n", "\n", " D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Opitutales;D_4__Opitutaceae;D_5__Opitutus;D_6__uncultured Verrucomicrobia bacterium \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Opitutales;D_4__Opitutaceae;__;__ \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;D_6__uncultured bacterium \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;__ \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Verrucomicrobiaceae;D_5__uncultured;D_6__uncultured bacterium \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__WPS-2;D_2__metagenome;D_3__metagenome;D_4__metagenome;D_5__metagenome;D_6__metagenome \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;__;__;__;__;__;__ Unassigned;__;__;__;__;__;__ \\\n", "index \n", "141 0.0 0.0 \n", "142 17.0 0.0 \n", "144 25.0 0.0 \n", "145 0.0 0.0 \n", "146 0.0 0.0 \n", "... ... ... \n", "446 70.0 0.0 \n", "447 0.0 0.0 \n", "448 0.0 0.0 \n", "449 0.0 0.0 \n", "451 0.0 0.0 \n", "\n", " Description Body_Site \n", "index \n", "141 141 Duodenum \n", "142 142 Duodenum \n", "144 144 Duodenum \n", "145 145 Duodenum \n", "146 146 Duodenum \n", "... ... ... \n", "446 446_Duo Duodenum \n", "447 447_Duo Duodenum \n", "448 448_Duo Duodenum \n", "449 449_Duo Duodenum \n", "451 451_Duo Duodenum \n", "\n", "[254 rows x 1068 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_seq_orig_species = df_seq_orig_species.drop(['387_Duo', '388_Duo', '390_Duo', '391_Duo', '392_Duo', '394_Duo', '409_Duo', '410_Duo', '418_Duo', '423_Duo', '425_Duo', '433_Duo'])\n", "df_seq_orig_species.rename({'417_Duo':'417', '434_Duo':'434', '437_Duo':'437', '438_Duo':'438', '441_Duo':'441', '446_Duo':'446', '447_Duo':'447', '448_Duo':'448', '449_Duo':'449', '451_Duo':'451'}, axis='index', inplace=True)\n", "df_seq_orig_species.sort_index(inplace=True)\n", "df_seq_orig_species.index = df_seq_orig_species.index.astype(int)\n", "df_seq_orig_species" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# This taxa was only in second batch of sequenced duodenum samples likely indicating it is a contaminant. It is removed because\n", "# it interferes with a plot comparing saliva to duodenum samples.\n", "df_seq_orig_species.drop(['D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3__Bacillales;D_4__Paenibacillaceae;D_5__Paenibacillus;D_6__Paenibacillus darwinianus'], axis=1, inplace=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load in absolute abundance data (dPCR)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "df_total_load = pd.read_excel('dPCR data/dPCR_total_loads_duodenum.xlsx', index_col=0)\n", "df_weights = pd.read_csv('data_files/sample weights.csv')\n", "\n", "# Merge the two dataframes together based on the sample ID\n", "df_total_load = df_total_load.merge(df_weights, left_on='Sample', right_on='Study ID')\n", "\n", "# Add a column saying whether the sample weight is missing or not\n", "df_total_load['Weight (True/False)'] = df_total_load.apply(lambda x: x['Weight (mL)'][0].isdigit(), axis=1)\n", "\n", "# Determine the average sample weight for all samples\n", "mean_weight = df_total_load[df_total_load['Weight (True/False)']==True]['Weight (mL)'].astype(float).mean()\n", "\n", "# Create new column where any sample with a missing weight is set to the average weight of all samples\n", "df_total_load['Corrected Weight (mL)'] = df_total_load.apply(lambda x: float(x['Weight (mL)']) if x['Weight (True/False)'] else mean_weight, axis=1)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
WellConcentrationPoissonConfMaxPoissonConfMinTotalPositivesPrimerSampleDilutionCorrected ConcentrationStudy IDWeight (mL)Weight (True/False)Corrected Weight (mL)
74B081430.01456.01417.01870613158mod_Caporaso215101430000.0215no dataFalse0.884958
88E03671.0686.0663.0178837771mod_Caporaso23350033550000.0233no weightFalse0.884958
119A1023.325.722.017629345mod_Caporaso2801023300.0280no dataFalse0.884958
142B06268.0277.0264.0199254064mod_Caporaso31810268000.0318no dataFalse0.884958
145F06782.0799.0774.0180208753mod_Caporaso32410782000.0324no dataFalse0.884958
178H02286.0294.0282.0202434368mod_Caporaso36010286000.0360no dataFalse0.884958
189C103079.03139.03050.01968418247mod_Caporaso372103079000.0372no sampleFalse0.884958
201G113910.04010.03860.01682616220mod_Caporaso384103910000.0384no dataFalse0.884958
214F0416.718.715.719006268mod_Caporaso4001016700.0400no dataFalse0.884958
242B0990.194.987.7183341352mod_Caporaso4301090100.0430no dataFalse0.884958
251D073740.03830.03700.01546614823mod_Caporaso448103740000.0448no dataFalse0.884958
\n", "
" ], "text/plain": [ " Well Concentration PoissonConfMax PoissonConfMin Total Positives \\\n", "74 B08 1430.0 1456.0 1417.0 18706 13158 \n", "88 E03 671.0 686.0 663.0 17883 7771 \n", "119 A10 23.3 25.7 22.0 17629 345 \n", "142 B06 268.0 277.0 264.0 19925 4064 \n", "145 F06 782.0 799.0 774.0 18020 8753 \n", "178 H02 286.0 294.0 282.0 20243 4368 \n", "189 C10 3079.0 3139.0 3050.0 19684 18247 \n", "201 G11 3910.0 4010.0 3860.0 16826 16220 \n", "214 F04 16.7 18.7 15.7 19006 268 \n", "242 B09 90.1 94.9 87.7 18334 1352 \n", "251 D07 3740.0 3830.0 3700.0 15466 14823 \n", "\n", " Primer Sample Dilution Corrected Concentration Study ID \\\n", "74 mod_Caporaso 215 10 1430000.0 215 \n", "88 mod_Caporaso 233 500 33550000.0 233 \n", "119 mod_Caporaso 280 10 23300.0 280 \n", "142 mod_Caporaso 318 10 268000.0 318 \n", "145 mod_Caporaso 324 10 782000.0 324 \n", "178 mod_Caporaso 360 10 286000.0 360 \n", "189 mod_Caporaso 372 10 3079000.0 372 \n", "201 mod_Caporaso 384 10 3910000.0 384 \n", "214 mod_Caporaso 400 10 16700.0 400 \n", "242 mod_Caporaso 430 10 90100.0 430 \n", "251 mod_Caporaso 448 10 3740000.0 448 \n", "\n", " Weight (mL) Weight (True/False) Corrected Weight (mL) \n", "74 no data False 0.884958 \n", "88 no weight False 0.884958 \n", "119 no data False 0.884958 \n", "142 no data False 0.884958 \n", "145 no data False 0.884958 \n", "178 no data False 0.884958 \n", "189 no sample False 0.884958 \n", "201 no data False 0.884958 \n", "214 no data False 0.884958 \n", "242 no data False 0.884958 \n", "251 no data False 0.884958 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Print out the samples without weights for reference (N=11)\n", "df_total_load[~df_total_load['Weight (True/False)']]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Normalize concentration to the input volume" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "## Set the lower dPCR threshold. 95% CI is +-1X and the dPCR blanks are <1cp/uL with +3std dev of ~1 cp/uL. \n", "## This means we would have ~2X resolution at 2 cp/uL.\n", "df_total_load = df_total_load[(df_total_load['Concentration']>2)]\n", "\n", "## Calculate Copies/mL\n", "df_total_load['Copies/mL'] = df_total_load['Corrected Concentration']/df_total_load['Corrected Weight (mL)']\n", "df_total_load['Log Copies/mL'] = np.log10(df_total_load['Copies/mL'])" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "df_total_load[['Sample', 'Copies/mL']].to_excel('duodenum_total_loads.xlsx')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Calculate LOD in terms of absolute abundance and relative abundance, 95% confidence of the template being added to the sample (3 copy input)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# These samples were diluted before placing sample in library reaction due to inhibitors preventing amplification in undiluted sample\n", "diluted_samples = {423:100, 437:10, 438:10, 441:10, 446:10, 447:10, 448:10, 449:10, 451:10,\n", " 395:100, 198:50, 423:50, 427:50, 373:10, 321:10, 169:10, 375:10, 353:10,\n", " 242:10, 411:10, 312:10, 433:2, 366:2}\n", "\n", "# Create column to account for the fact that some samples were diluted before input into library prep reaction\n", "df_total_load['Seq_Dilution'] = df_total_load.apply(lambda x: diluted_samples[x['Sample']] if x['Sample'] in diluted_samples.keys() else 1, axis=1)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
WellConcentrationPoissonConfMaxPoissonConfMinTotalPositivesPrimerSampleDilutionCorrected ConcentrationStudy IDWeight (mL)Weight (True/False)Corrected Weight (mL)Copies/mLLog Copies/mLSeq_DilutionCopies in Amp RxnRel. Abundance LOD (%)Abs. Abundance LOD
0A02400.0411.0394.0171794950mod_Caporaso14110400000.01411.6259True1.62592.460176e+055.390966114000.00.02142952.718055
1B0224.326.923.117527359mod_Caporaso1421024300.01420.2091True0.20911.162123e+055.0652521850.50.352734409.920066
2A05373.0383.0368.0194275276mod_Caporaso14550018650000.01451.77True1.77001.053672e+077.0227061652750.00.00046048.426150
3D021234.01258.01222.01700011044mod_Caporaso146101234000.01461.6973True1.69737.270371e+055.861557143190.00.00694650.500375
4E02642.0657.0634.0171807226mod_Caporaso14710642000.01471.7476True1.74763.673610e+055.565093122470.00.01335149.046856
...............................................................
255D015810.06060.05590.01357713480mod_Caporaso451500290500000.04510.605True0.60504.801653e+088.681391101016750.00.0002951416.765053
256A07154.0161.0150.0140561720mod_Caporaso20725000385000000.02071.8095True1.80952.127660e+088.327902113475000.00.00002247.369044
257B07170.0177.0167.0161052169mod_Caporaso27425000425000000.02741.083True1.08303.924284e+088.593760114875000.00.00002079.145231
258C07405.0417.0399.0155744539mod_Caporaso3225000202500000.03220.219True0.21909.246575e+088.96598117087500.00.000042391.389432
259D07428.0440.0422.0156244766mod_Caporaso3955000214000000.03950.491True0.49104.358452e+088.63933210074900.00.00400517457.084667
\n", "

256 rows × 20 columns

\n", "
" ], "text/plain": [ " Well Concentration PoissonConfMax PoissonConfMin Total Positives \\\n", "0 A02 400.0 411.0 394.0 17179 4950 \n", "1 B02 24.3 26.9 23.1 17527 359 \n", "2 A05 373.0 383.0 368.0 19427 5276 \n", "3 D02 1234.0 1258.0 1222.0 17000 11044 \n", "4 E02 642.0 657.0 634.0 17180 7226 \n", ".. ... ... ... ... ... ... \n", "255 D01 5810.0 6060.0 5590.0 13577 13480 \n", "256 A07 154.0 161.0 150.0 14056 1720 \n", "257 B07 170.0 177.0 167.0 16105 2169 \n", "258 C07 405.0 417.0 399.0 15574 4539 \n", "259 D07 428.0 440.0 422.0 15624 4766 \n", "\n", " Primer Sample Dilution Corrected Concentration Study ID \\\n", "0 mod_Caporaso 141 10 400000.0 141 \n", "1 mod_Caporaso 142 10 24300.0 142 \n", "2 mod_Caporaso 145 500 18650000.0 145 \n", "3 mod_Caporaso 146 10 1234000.0 146 \n", "4 mod_Caporaso 147 10 642000.0 147 \n", ".. ... ... ... ... ... \n", "255 mod_Caporaso 451 500 290500000.0 451 \n", "256 mod_Caporaso 207 25000 385000000.0 207 \n", "257 mod_Caporaso 274 25000 425000000.0 274 \n", "258 mod_Caporaso 322 5000 202500000.0 322 \n", "259 mod_Caporaso 395 5000 214000000.0 395 \n", "\n", " Weight (mL) Weight (True/False) Corrected Weight (mL) Copies/mL \\\n", "0 1.6259 True 1.6259 2.460176e+05 \n", "1 0.2091 True 0.2091 1.162123e+05 \n", "2 1.77 True 1.7700 1.053672e+07 \n", "3 1.6973 True 1.6973 7.270371e+05 \n", "4 1.7476 True 1.7476 3.673610e+05 \n", ".. ... ... ... ... \n", "255 0.605 True 0.6050 4.801653e+08 \n", "256 1.8095 True 1.8095 2.127660e+08 \n", "257 1.083 True 1.0830 3.924284e+08 \n", "258 0.219 True 0.2190 9.246575e+08 \n", "259 0.491 True 0.4910 4.358452e+08 \n", "\n", " Log Copies/mL Seq_Dilution Copies in Amp Rxn Rel. Abundance LOD (%) \\\n", "0 5.390966 1 14000.0 0.021429 \n", "1 5.065252 1 850.5 0.352734 \n", "2 7.022706 1 652750.0 0.000460 \n", "3 5.861557 1 43190.0 0.006946 \n", "4 5.565093 1 22470.0 0.013351 \n", ".. ... ... ... ... \n", "255 8.681391 10 1016750.0 0.000295 \n", "256 8.327902 1 13475000.0 0.000022 \n", "257 8.593760 1 14875000.0 0.000020 \n", "258 8.965981 1 7087500.0 0.000042 \n", "259 8.639332 100 74900.0 0.004005 \n", "\n", " Abs. Abundance LOD \n", "0 52.718055 \n", "1 409.920066 \n", "2 48.426150 \n", "3 50.500375 \n", "4 49.046856 \n", ".. ... \n", "255 1416.765053 \n", "256 47.369044 \n", "257 79.145231 \n", "258 391.389432 \n", "259 17457.084667 \n", "\n", "[256 rows x 20 columns]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# uL added to the amplification rxn\n", "seq_volume = 3.5\n", "copy_input_threshold = 3\n", "\n", "df_total_load['Copies in Amp Rxn'] = df_total_load['Concentration']*df_total_load['Dilution']/df_total_load['Seq_Dilution']*seq_volume\n", "df_total_load['Rel. Abundance LOD (%)'] = copy_input_threshold/df_total_load['Copies in Amp Rxn']*100\n", "df_total_load['Abs. Abundance LOD'] = df_total_load['Rel. Abundance LOD (%)']*df_total_load['Copies/mL']/100\n", "df_total_load" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Generate dictionary for easier downstream conversion of relative to absolute abundances" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "256" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "total_load_dict = {df_total_load['Sample'].iloc[i] : df_total_load['Copies/mL'].iloc[i] for i in range(len(df_total_load))}\n", "len(total_load_dict)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Determine LOD thresholds. If LOD from poisson loading > LOD from sequencing use the sequencing value. LOD from sequencing is based on a 50% CV from replicates (Fig 2d from quant-seq paper)." ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "seq_lloq = 7.115*(read_depth**(-0.556))\n", "\n", "df_total_load['Rel. Abundance LOD (%) Corrected'] = df_total_load['Rel. Abundance LOD (%)'].where(df_total_load['Rel. Abundance LOD (%)']>seq_lloq, seq_lloq)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "lod_dict = {df_total_load['Sample'].iloc[i] : df_total_load['Rel. Abundance LOD (%) Corrected'].iloc[i]*read_depth/100 for i in range(len(df_total_load))}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Filter out samples without accurate total loads and store metadata in separate file" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
D_0__Archaea;D_1__Euryarchaeota;D_2__Halobacteria;D_3__Halobacteriales;D_4__Haloferacaceae;D_5__Halopenitus;D_6__Halopenitus persicusD_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobacterium;D_6__Methanobacterium formicicumD_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobacterium;D_6__uncultured archaeonD_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobrevibacter;__D_0__Archaea;D_1__Nanoarchaeaeota;D_2__Woesearchaeia;__;__;__;__D_0__Bacteria;D_1__Acidobacteria;D_2__Blastocatellia (Subgroup 4);D_3__Blastocatellales;D_4__Blastocatellaceae;D_5__Blastocatella;__D_0__Bacteria;D_1__Acidobacteria;D_2__Blastocatellia (Subgroup 4);D_3__Blastocatellales;D_4__Blastocatellaceae;D_5__uncultured;__D_0__Bacteria;D_1__Acidobacteria;D_2__Holophagae;D_3__Subgroup 7;__;__;__D_0__Bacteria;D_1__Actinobacteria;D_2__Acidimicrobiia;D_3__Microtrichales;D_4__Microtrichaceae;D_5__IMCC26207;D_6__uncultured bacteriumD_0__Bacteria;D_1__Actinobacteria;D_2__Acidimicrobiia;D_3__Microtrichales;D_4__uncultured;D_5__uncultured Acidimicrobidae bacterium;D_6__uncultured Acidimicrobidae bacterium...D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Chthoniobacterales;D_4__Chthoniobacteraceae;D_5__Chthoniobacter;D_6__uncultured bacteriumD_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Chthoniobacterales;D_4__Xiphinematobacteraceae;D_5__Candidatus Xiphinematobacter;D_6__metagenomeD_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Opitutales;D_4__Opitutaceae;D_5__Opitutus;D_6__uncultured Verrucomicrobia bacteriumD_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Opitutales;D_4__Opitutaceae;__;__D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;D_6__uncultured bacteriumD_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;__D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Verrucomicrobiaceae;D_5__uncultured;D_6__uncultured bacteriumD_0__Bacteria;D_1__WPS-2;D_2__metagenome;D_3__metagenome;D_4__metagenome;D_5__metagenome;D_6__metagenomeD_0__Bacteria;__;__;__;__;__;__Unassigned;__;__;__;__;__;__
index
1410.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
1420.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.017.00.0
1440.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.025.00.0
1450.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
1460.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
..................................................................
4460.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.070.00.0
4470.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
4480.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
4490.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
4510.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
\n", "

250 rows × 1065 columns

\n", "
" ], "text/plain": [ " D_0__Archaea;D_1__Euryarchaeota;D_2__Halobacteria;D_3__Halobacteriales;D_4__Haloferacaceae;D_5__Halopenitus;D_6__Halopenitus persicus \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobacterium;D_6__Methanobacterium formicicum \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobacterium;D_6__uncultured archaeon \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Archaea;D_1__Euryarchaeota;D_2__Methanobacteria;D_3__Methanobacteriales;D_4__Methanobacteriaceae;D_5__Methanobrevibacter;__ \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Archaea;D_1__Nanoarchaeaeota;D_2__Woesearchaeia;__;__;__;__ \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Acidobacteria;D_2__Blastocatellia (Subgroup 4);D_3__Blastocatellales;D_4__Blastocatellaceae;D_5__Blastocatella;__ \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Acidobacteria;D_2__Blastocatellia (Subgroup 4);D_3__Blastocatellales;D_4__Blastocatellaceae;D_5__uncultured;__ \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Acidobacteria;D_2__Holophagae;D_3__Subgroup 7;__;__;__ \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Actinobacteria;D_2__Acidimicrobiia;D_3__Microtrichales;D_4__Microtrichaceae;D_5__IMCC26207;D_6__uncultured bacterium \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Actinobacteria;D_2__Acidimicrobiia;D_3__Microtrichales;D_4__uncultured;D_5__uncultured Acidimicrobidae bacterium;D_6__uncultured Acidimicrobidae bacterium \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " ... \\\n", "index ... \n", "141 ... \n", "142 ... \n", "144 ... \n", "145 ... \n", "146 ... \n", "... ... \n", "446 ... \n", "447 ... \n", "448 ... \n", "449 ... \n", "451 ... \n", "\n", " D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Chthoniobacterales;D_4__Chthoniobacteraceae;D_5__Chthoniobacter;D_6__uncultured bacterium \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Chthoniobacterales;D_4__Xiphinematobacteraceae;D_5__Candidatus Xiphinematobacter;D_6__metagenome \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Opitutales;D_4__Opitutaceae;D_5__Opitutus;D_6__uncultured Verrucomicrobia bacterium \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Opitutales;D_4__Opitutaceae;__;__ \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;D_6__uncultured bacterium \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;__ \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Verrucomicrobiaceae;D_5__uncultured;D_6__uncultured bacterium \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__WPS-2;D_2__metagenome;D_3__metagenome;D_4__metagenome;D_5__metagenome;D_6__metagenome \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;__;__;__;__;__;__ Unassigned;__;__;__;__;__;__ \n", "index \n", "141 0.0 0.0 \n", "142 17.0 0.0 \n", "144 25.0 0.0 \n", "145 0.0 0.0 \n", "146 0.0 0.0 \n", "... ... ... \n", "446 70.0 0.0 \n", "447 0.0 0.0 \n", "448 0.0 0.0 \n", "449 0.0 0.0 \n", "451 0.0 0.0 \n", "\n", "[250 rows x 1065 columns]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_seq_samples = df_seq_orig_species[df_seq_orig_species.index.isin(total_load_dict.keys())][df_seq_orig_species.columns[:-1*num_metadata_cols]]\n", "\n", "# This is num_metadata_cols-1 because we don't need the description column since it is already stored as the index\n", "seq_metadata = df_seq_orig_species[df_seq_orig_species.columns[-1*(num_metadata_cols-1):]]\n", "df_seq_samples" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Set abundance to zero for taxa below LOD defined by # molecules input into amplification rxn or sequencing 50% CV threshold\n", "This is defined as the load at which there should be a 95% chance of one copy being loaded into the amplification reaction (3 copy average)." ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Actinomycetales;D_4__Actinomycetaceae;D_5__F0332;D_6__uncultured bacteriumD_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Bifidobacteriales;D_4__Bifidobacteriaceae;D_5__Alloscardovia;D_6__Bifidobacterium longum subsp. longumD_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Bifidobacteriales;D_4__Bifidobacteriaceae;D_5__Scardovia;D_6__unidentifiedD_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;D_6__Corynebacterium kroppenstedtiiD_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;D_6__Corynebacterium pseudodiphtheriticumD_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;__D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium;D_6__Corynebacterium durumD_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium;__D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Lawsonella;D_6__uncultured bacteriumD_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;__;__...D_0__Bacteria;D_1__Synergistetes;D_2__Synergistia;D_3__Synergistales;D_4__Synergistaceae;D_5__Pyramidobacter;D_6__Pyramidobacter piscolensD_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__Firmicutes oral clone FM046;D_5__Firmicutes oral clone FM046;D_6__Firmicutes oral clone FM046D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__gut metagenome;D_5__gut metagenome;D_6__gut metagenomeD_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__uncultured bacterium;D_5__uncultured bacterium;D_6__uncultured bacteriumD_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;__;__;__D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mycoplasmatales;D_4__Mycoplasmataceae;D_5__Mycoplasma;D_6__Mycoplasma salivarium ATCC 23064D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mycoplasmatales;D_4__Mycoplasmataceae;D_5__Mycoplasma;__D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;D_6__uncultured bacteriumD_0__Bacteria;__;__;__;__;__;__Unassigned;__;__;__;__;__;__
index
1410.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
1420.00.00.00.00.00.00.00.00.00.0...0.0328.00.00.00.00.00.00.00.00.0
1440.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.025.00.0
1450.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
1460.011.093.00.00.00.075.018.00.00.0...0.00.00.00.00.00.00.00.00.00.0
..................................................................
4460.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
4470.00.013.00.00.00.018.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
4480.00.013.00.00.00.014.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
4490.00.0715.00.00.00.01306.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
4510.00.00.00.00.00.0135.00.00.00.0...0.00.00.09.00.00.00.00.00.00.0
\n", "

250 rows × 546 columns

\n", "
" ], "text/plain": [ " D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Actinomycetales;D_4__Actinomycetaceae;D_5__F0332;D_6__uncultured bacterium \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Bifidobacteriales;D_4__Bifidobacteriaceae;D_5__Alloscardovia;D_6__Bifidobacterium longum subsp. longum \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 11.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Bifidobacteriales;D_4__Bifidobacteriaceae;D_5__Scardovia;D_6__unidentified \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 93.0 \n", "... ... \n", "446 0.0 \n", "447 13.0 \n", "448 13.0 \n", "449 715.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;D_6__Corynebacterium kroppenstedtii \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;D_6__Corynebacterium pseudodiphtheriticum \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;__ \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium;D_6__Corynebacterium durum \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 75.0 \n", "... ... \n", "446 0.0 \n", "447 18.0 \n", "448 14.0 \n", "449 1306.0 \n", "451 135.0 \n", "\n", " D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium;__ \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 18.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Lawsonella;D_6__uncultured bacterium \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;__;__ \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " ... \\\n", "index ... \n", "141 ... \n", "142 ... \n", "144 ... \n", "145 ... \n", "146 ... \n", "... ... \n", "446 ... \n", "447 ... \n", "448 ... \n", "449 ... \n", "451 ... \n", "\n", " D_0__Bacteria;D_1__Synergistetes;D_2__Synergistia;D_3__Synergistales;D_4__Synergistaceae;D_5__Pyramidobacter;D_6__Pyramidobacter piscolens \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__Firmicutes oral clone FM046;D_5__Firmicutes oral clone FM046;D_6__Firmicutes oral clone FM046 \\\n", "index \n", "141 0.0 \n", "142 328.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__gut metagenome;D_5__gut metagenome;D_6__gut metagenome \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__uncultured bacterium;D_5__uncultured bacterium;D_6__uncultured bacterium \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 9.0 \n", "\n", " D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;__;__;__ \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mycoplasmatales;D_4__Mycoplasmataceae;D_5__Mycoplasma;D_6__Mycoplasma salivarium ATCC 23064 \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mycoplasmatales;D_4__Mycoplasmataceae;D_5__Mycoplasma;__ \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;D_6__uncultured bacterium \\\n", "index \n", "141 0.0 \n", "142 0.0 \n", "144 0.0 \n", "145 0.0 \n", "146 0.0 \n", "... ... \n", "446 0.0 \n", "447 0.0 \n", "448 0.0 \n", "449 0.0 \n", "451 0.0 \n", "\n", " D_0__Bacteria;__;__;__;__;__;__ Unassigned;__;__;__;__;__;__ \n", "index \n", "141 0.0 0.0 \n", "142 0.0 0.0 \n", "144 25.0 0.0 \n", "145 0.0 0.0 \n", "146 0.0 0.0 \n", "... ... ... \n", "446 0.0 0.0 \n", "447 0.0 0.0 \n", "448 0.0 0.0 \n", "449 0.0 0.0 \n", "451 0.0 0.0 \n", "\n", "[250 rows x 546 columns]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_species_lod_filter = pd.DataFrame()\n", "\n", "for col in df_seq_samples.columns:\n", " df_species_lod_filter[col] = df_seq_samples.apply(lambda x: x[col] if x[col]>lod_dict[x.name] else 0, axis=1)\n", " \n", "# Remove columns (taxa) that have zero counts after filtering\n", "df_species_lod_filter = df_species_lod_filter[df_species_lod_filter.sum(axis=1)>0]\n", "\n", "# Remove rows (samples) that have zero counts after filtering\n", "df_species_lod_filter = df_species_lod_filter.loc[:, (df_species_lod_filter != 0).any(axis=0)]\n", "df_species_lod_filter" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Determine which samples (if any) were filtered out" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "orig_indexes = df_seq_samples.index.tolist()\n", "filter_indexes = df_species_lod_filter.index.tolist()\n", "\n", "lost = list(set(set(orig_indexes) - set(filter_indexes)))" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
WellConcentrationPoissonConfMaxPoissonConfMinTotalPositivesPrimerSampleDilutionCorrected Concentration...Weight (mL)Weight (True/False)Corrected Weight (mL)Copies/mLLog Copies/mLSeq_DilutionCopies in Amp RxnRel. Abundance LOD (%)Abs. Abundance LODRel. Abundance LOD (%) Corrected
\n", "

0 rows × 21 columns

\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [Well, Concentration, PoissonConfMax, PoissonConfMin, Total, Positives, Primer, Sample, Dilution, Corrected Concentration, Study ID, Weight (mL), Weight (True/False), Corrected Weight (mL), Copies/mL, Log Copies/mL, Seq_Dilution, Copies in Amp Rxn, Rel. Abundance LOD (%), Abs. Abundance LOD, Rel. Abundance LOD (%) Corrected]\n", "Index: []\n", "\n", "[0 rows x 21 columns]" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_total_load[df_total_load['Sample'].isin(lost)]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Generate dataframes for each taxonomy level" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "def collapse_taxonomy(_df, level):\n", " collapsed_dict = {}\n", " index=0\n", " \n", " # Evaluate the selected taxonomy level to collapse to\n", " if level == 'Genus':\n", " index = -1\n", " elif level == 'Family':\n", " index = -2\n", " elif level == 'Order':\n", " index = -3\n", " elif level == 'Class':\n", " index = -4\n", " elif level == 'Phylum':\n", " index = -5\n", " else:\n", " raise ValueError('Could not interpret taxonomy level. Please use (Phylum, Class, Order, Family, Genus)')\n", "\n", " # Iterate through columns adding values together for each sample if the new column name already exists\n", " for col in _df:\n", " new_col = \";\".join(col.split(';')[:index])\n", "\n", " if new_col in collapsed_dict.keys():\n", " collapsed_dict[new_col] += np.array(_df[col])\n", " else:\n", " collapsed_dict[new_col] = np.array(_df[col])\n", "\n", " df_collapsed = pd.DataFrame.from_dict(collapsed_dict).set_index(_df.index)\n", " return df_collapsed" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "df_lod_list = [None]*6\n", "\n", "df_lod_list[0] = collapse_taxonomy(df_species_lod_filter, 'Phylum')\n", "df_lod_list[1] = collapse_taxonomy(df_species_lod_filter, 'Class')\n", "df_lod_list[2] = collapse_taxonomy(df_species_lod_filter, 'Order')\n", "df_lod_list[3] = collapse_taxonomy(df_species_lod_filter, 'Family')\n", "df_lod_list[4] = collapse_taxonomy(df_species_lod_filter, 'Genus')\n", "df_lod_list[5] = df_species_lod_filter" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Generate relative and absolute abundance tables" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Actinomycetales;D_4__Actinomycetaceae;D_5__F0332;D_6__uncultured bacteriumD_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Bifidobacteriales;D_4__Bifidobacteriaceae;D_5__Alloscardovia;D_6__Bifidobacterium longum subsp. longumD_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Bifidobacteriales;D_4__Bifidobacteriaceae;D_5__Scardovia;D_6__unidentifiedD_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;D_6__Corynebacterium kroppenstedtiiD_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;D_6__Corynebacterium pseudodiphtheriticumD_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;__D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium;D_6__Corynebacterium durumD_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium;__D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Lawsonella;D_6__uncultured bacteriumD_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;__;__...D_0__Bacteria;D_1__Synergistetes;D_2__Synergistia;D_3__Synergistales;D_4__Synergistaceae;D_5__Pyramidobacter;D_6__Pyramidobacter piscolensD_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__Firmicutes oral clone FM046;D_5__Firmicutes oral clone FM046;D_6__Firmicutes oral clone FM046D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__gut metagenome;D_5__gut metagenome;D_6__gut metagenomeD_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__uncultured bacterium;D_5__uncultured bacterium;D_6__uncultured bacteriumD_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;__;__;__D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mycoplasmatales;D_4__Mycoplasmataceae;D_5__Mycoplasma;D_6__Mycoplasma salivarium ATCC 23064D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mycoplasmatales;D_4__Mycoplasmataceae;D_5__Mycoplasma;__D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;D_6__uncultured bacteriumD_0__Bacteria;__;__;__;__;__;__Unassigned;__;__;__;__;__;__
index
1410.5420560.5420560.5420560.5420560.5420560.5420565.420561e-010.5420560.5420560.542056...0.5420560.5420560.5420560.5420560.5420560.5420560.5420560.5420560.5420560.542056
1420.2560530.2560530.2560530.2560530.2560530.2560532.560533e-010.2560530.2560530.256053...0.256053840.1107900.2560530.2560530.2560530.2560530.2560530.2560530.2560530.256053
1442.6776222.6776222.6776222.6776222.6776222.6776222.677622e+002.6776222.6776222.677622...2.6776222.6776222.6776222.6776222.6776222.6776222.6776222.677622672.0831462.677622
14523.21580023.21580023.21580023.21580023.21580023.2158002.321580e+0123.21580023.21580023.215800...23.21580023.21580023.21580023.21580023.21580023.21580023.21580023.21580023.21580023.215800
1461.601897177.8105881491.3662841.6018971.6018971.6018971.203025e+03289.9433911.6018971.601897...1.6018971.6018971.6018971.6018971.6018971.6018971.6018971.6018971.6018971.601897
..................................................................
4461.5738021.5738021.5738021.5738021.5738021.5738021.573802e+001.5738021.5738021.573802...1.5738021.5738021.5738021.5738021.5738021.5738021.5738021.5738021.5738021.573802
44728.03116028.0311603672.08194428.03116028.03116028.0311605.073640e+0328.03116028.03116028.031160...28.03116028.03116028.03116028.03116028.03116028.03116028.03116028.03116028.03116028.031160
4489.3116619.3116611219.8275349.3116619.3116619.3116611.312944e+039.3116619.3116619.311661...9.3116619.3116619.3116619.3116619.3116619.3116619.3116619.3116619.3116619.311661
4491.8557651.85576513270.5770081.8557651.8557651.8557652.423815e+041.8557651.8557651.855765...1.8557651.8557651.8557651.8557651.8557651.8557651.8557651.8557651.8557651.855765
4511057.9590391057.9590391057.9590391057.9590391057.9590391057.9590391.429303e+061057.9590391057.9590391057.959039...1057.9590391057.9590391057.95903996274.2725121057.9590391057.9590391057.9590391057.9590391057.9590391057.959039
\n", "

250 rows × 546 columns

\n", "
" ], "text/plain": [ " D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Actinomycetales;D_4__Actinomycetaceae;D_5__F0332;D_6__uncultured bacterium \\\n", "index \n", "141 0.542056 \n", "142 0.256053 \n", "144 2.677622 \n", "145 23.215800 \n", "146 1.601897 \n", "... ... \n", "446 1.573802 \n", "447 28.031160 \n", "448 9.311661 \n", "449 1.855765 \n", "451 1057.959039 \n", "\n", " D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Bifidobacteriales;D_4__Bifidobacteriaceae;D_5__Alloscardovia;D_6__Bifidobacterium longum subsp. longum \\\n", "index \n", "141 0.542056 \n", "142 0.256053 \n", "144 2.677622 \n", "145 23.215800 \n", "146 177.810588 \n", "... ... \n", "446 1.573802 \n", "447 28.031160 \n", "448 9.311661 \n", "449 1.855765 \n", "451 1057.959039 \n", "\n", " D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Bifidobacteriales;D_4__Bifidobacteriaceae;D_5__Scardovia;D_6__unidentified \\\n", "index \n", "141 0.542056 \n", "142 0.256053 \n", "144 2.677622 \n", "145 23.215800 \n", "146 1491.366284 \n", "... ... \n", "446 1.573802 \n", "447 3672.081944 \n", "448 1219.827534 \n", "449 13270.577008 \n", "451 1057.959039 \n", "\n", " D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;D_6__Corynebacterium kroppenstedtii \\\n", "index \n", "141 0.542056 \n", "142 0.256053 \n", "144 2.677622 \n", "145 23.215800 \n", "146 1.601897 \n", "... ... \n", "446 1.573802 \n", "447 28.031160 \n", "448 9.311661 \n", "449 1.855765 \n", "451 1057.959039 \n", "\n", " D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;D_6__Corynebacterium pseudodiphtheriticum \\\n", "index \n", "141 0.542056 \n", "142 0.256053 \n", "144 2.677622 \n", "145 23.215800 \n", "146 1.601897 \n", "... ... \n", "446 1.573802 \n", "447 28.031160 \n", "448 9.311661 \n", "449 1.855765 \n", "451 1057.959039 \n", "\n", " D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium 1;__ \\\n", "index \n", "141 0.542056 \n", "142 0.256053 \n", "144 2.677622 \n", "145 23.215800 \n", "146 1.601897 \n", "... ... \n", "446 1.573802 \n", "447 28.031160 \n", "448 9.311661 \n", "449 1.855765 \n", "451 1057.959039 \n", "\n", " D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium;D_6__Corynebacterium durum \\\n", "index \n", "141 5.420561e-01 \n", "142 2.560533e-01 \n", "144 2.677622e+00 \n", "145 2.321580e+01 \n", "146 1.203025e+03 \n", "... ... \n", "446 1.573802e+00 \n", "447 5.073640e+03 \n", "448 1.312944e+03 \n", "449 2.423815e+04 \n", "451 1.429303e+06 \n", "\n", " D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Corynebacterium;__ \\\n", "index \n", "141 0.542056 \n", "142 0.256053 \n", "144 2.677622 \n", "145 23.215800 \n", "146 289.943391 \n", "... ... \n", "446 1.573802 \n", "447 28.031160 \n", "448 9.311661 \n", "449 1.855765 \n", "451 1057.959039 \n", "\n", " D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;D_5__Lawsonella;D_6__uncultured bacterium \\\n", "index \n", "141 0.542056 \n", "142 0.256053 \n", "144 2.677622 \n", "145 23.215800 \n", "146 1.601897 \n", "... ... \n", "446 1.573802 \n", "447 28.031160 \n", "448 9.311661 \n", "449 1.855765 \n", "451 1057.959039 \n", "\n", " D_0__Bacteria;D_1__Actinobacteria;D_2__Actinobacteria;D_3__Corynebacteriales;D_4__Corynebacteriaceae;__;__ \\\n", "index \n", "141 0.542056 \n", "142 0.256053 \n", "144 2.677622 \n", "145 23.215800 \n", "146 1.601897 \n", "... ... \n", "446 1.573802 \n", "447 28.031160 \n", "448 9.311661 \n", "449 1.855765 \n", "451 1057.959039 \n", "\n", " ... \\\n", "index ... \n", "141 ... \n", "142 ... \n", "144 ... \n", "145 ... \n", "146 ... \n", "... ... \n", "446 ... \n", "447 ... \n", "448 ... \n", "449 ... \n", "451 ... \n", "\n", " D_0__Bacteria;D_1__Synergistetes;D_2__Synergistia;D_3__Synergistales;D_4__Synergistaceae;D_5__Pyramidobacter;D_6__Pyramidobacter piscolens \\\n", "index \n", "141 0.542056 \n", "142 0.256053 \n", "144 2.677622 \n", "145 23.215800 \n", "146 1.601897 \n", "... ... \n", "446 1.573802 \n", "447 28.031160 \n", "448 9.311661 \n", "449 1.855765 \n", "451 1057.959039 \n", "\n", " D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__Firmicutes oral clone FM046;D_5__Firmicutes oral clone FM046;D_6__Firmicutes oral clone FM046 \\\n", "index \n", "141 0.542056 \n", "142 840.110790 \n", "144 2.677622 \n", "145 23.215800 \n", "146 1.601897 \n", "... ... \n", "446 1.573802 \n", "447 28.031160 \n", "448 9.311661 \n", "449 1.855765 \n", "451 1057.959039 \n", "\n", " D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__gut metagenome;D_5__gut metagenome;D_6__gut metagenome \\\n", "index \n", "141 0.542056 \n", "142 0.256053 \n", "144 2.677622 \n", "145 23.215800 \n", "146 1.601897 \n", "... ... \n", "446 1.573802 \n", "447 28.031160 \n", "448 9.311661 \n", "449 1.855765 \n", "451 1057.959039 \n", "\n", " D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;D_4__uncultured bacterium;D_5__uncultured bacterium;D_6__uncultured bacterium \\\n", "index \n", "141 0.542056 \n", "142 0.256053 \n", "144 2.677622 \n", "145 23.215800 \n", "146 1.601897 \n", "... ... \n", "446 1.573802 \n", "447 28.031160 \n", "448 9.311661 \n", "449 1.855765 \n", "451 96274.272512 \n", "\n", " D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mollicutes RF39;__;__;__ \\\n", "index \n", "141 0.542056 \n", "142 0.256053 \n", "144 2.677622 \n", "145 23.215800 \n", "146 1.601897 \n", "... ... \n", "446 1.573802 \n", "447 28.031160 \n", "448 9.311661 \n", "449 1.855765 \n", "451 1057.959039 \n", "\n", " D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mycoplasmatales;D_4__Mycoplasmataceae;D_5__Mycoplasma;D_6__Mycoplasma salivarium ATCC 23064 \\\n", "index \n", "141 0.542056 \n", "142 0.256053 \n", "144 2.677622 \n", "145 23.215800 \n", "146 1.601897 \n", "... ... \n", "446 1.573802 \n", "447 28.031160 \n", "448 9.311661 \n", "449 1.855765 \n", "451 1057.959039 \n", "\n", " D_0__Bacteria;D_1__Tenericutes;D_2__Mollicutes;D_3__Mycoplasmatales;D_4__Mycoplasmataceae;D_5__Mycoplasma;__ \\\n", "index \n", "141 0.542056 \n", "142 0.256053 \n", "144 2.677622 \n", "145 23.215800 \n", "146 1.601897 \n", "... ... \n", "446 1.573802 \n", "447 28.031160 \n", "448 9.311661 \n", "449 1.855765 \n", "451 1057.959039 \n", "\n", " D_0__Bacteria;D_1__Verrucomicrobia;D_2__Verrucomicrobiae;D_3__Verrucomicrobiales;D_4__Akkermansiaceae;D_5__Akkermansia;D_6__uncultured bacterium \\\n", "index \n", "141 0.542056 \n", "142 0.256053 \n", "144 2.677622 \n", "145 23.215800 \n", "146 1.601897 \n", "... ... \n", "446 1.573802 \n", "447 28.031160 \n", "448 9.311661 \n", "449 1.855765 \n", "451 1057.959039 \n", "\n", " D_0__Bacteria;__;__;__;__;__;__ Unassigned;__;__;__;__;__;__ \n", "index \n", "141 0.542056 0.542056 \n", "142 0.256053 0.256053 \n", "144 672.083146 2.677622 \n", "145 23.215800 23.215800 \n", "146 1.601897 1.601897 \n", "... ... ... \n", "446 1.573802 1.573802 \n", "447 28.031160 28.031160 \n", "448 9.311661 9.311661 \n", "449 1.855765 1.855765 \n", "451 1057.959039 1057.959039 \n", "\n", "[250 rows x 546 columns]" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_rel_lod_list = [None]*6\n", "df_abs_lod_list = [None]*6\n", "\n", "df_pseudo_rel_lod_list = [None]*6\n", "df_pseudo_abs_lod_list = [None]*6\n", "\n", "for index, df in enumerate(df_lod_list):\n", " df_rel_lod_list[index] = df.div(read_depth, axis=0).multiply(100)\n", " df_abs_lod_list[index] = df_rel_lod_list[index].apply(lambda x: x*total_load_dict[x.name], 1).div(100)\n", " \n", " df_pseudo_rel_lod_list[index] = df_rel_lod_list[index]+(0.1/read_depth)*100\n", " #df_pseudo_abs_lod_list[index] = df_pseudo_rel_lod_list[index].multiply(1e4).div(100)\n", " df_pseudo_abs_lod_list[index] = df_pseudo_rel_lod_list[index].apply(lambda x: x*total_load_dict[x.name], 1).div(100)\n", "\n", "df_pseudo_abs_lod_list[5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Transform column taxa names into unique IDs.\n", "This overcomes downstream issue when multiple columns have the same name" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
taxonomy
ASV0D_0__Bacteria;D_1__Actinobacteria
ASV1D_0__Bacteria;D_1__Bacteroidetes
ASV2D_0__Bacteria;D_1__Chloroflexi
ASV3D_0__Bacteria;D_1__Cyanobacteria
ASV4D_0__Bacteria;D_1__Epsilonbacteraeota
ASV5D_0__Bacteria;D_1__Firmicutes
ASV6D_0__Bacteria;D_1__Fusobacteria
ASV7D_0__Bacteria;D_1__Patescibacteria
ASV8D_0__Bacteria;D_1__Proteobacteria
ASV9D_0__Bacteria;D_1__Spirochaetes
ASV10D_0__Bacteria;D_1__Synergistetes
ASV11D_0__Bacteria;D_1__Tenericutes
ASV12D_0__Bacteria;D_1__Verrucomicrobia
ASV13D_0__Bacteria;__
ASV14Unassigned;__
\n", "
" ], "text/plain": [ " taxonomy\n", "ASV0 D_0__Bacteria;D_1__Actinobacteria\n", "ASV1 D_0__Bacteria;D_1__Bacteroidetes\n", "ASV2 D_0__Bacteria;D_1__Chloroflexi\n", "ASV3 D_0__Bacteria;D_1__Cyanobacteria\n", "ASV4 D_0__Bacteria;D_1__Epsilonbacteraeota\n", "ASV5 D_0__Bacteria;D_1__Firmicutes\n", "ASV6 D_0__Bacteria;D_1__Fusobacteria\n", "ASV7 D_0__Bacteria;D_1__Patescibacteria\n", "ASV8 D_0__Bacteria;D_1__Proteobacteria\n", "ASV9 D_0__Bacteria;D_1__Spirochaetes\n", "ASV10 D_0__Bacteria;D_1__Synergistetes\n", "ASV11 D_0__Bacteria;D_1__Tenericutes\n", "ASV12 D_0__Bacteria;D_1__Verrucomicrobia\n", "ASV13 D_0__Bacteria;__\n", "ASV14 Unassigned;__" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_col_names_lod_list = [None]*6\n", "\n", "for index, df in enumerate(df_rel_lod_list):\n", " num_cols = len(df.columns)\n", " col_names = ['ASV' + str(x) for x in range(num_cols)]\n", " df_col_names_lod_list[index] = pd.DataFrame(index=col_names, data={'taxonomy':df.columns.tolist()})\n", " \n", " df_rel_lod_list[index].columns = col_names\n", " df_abs_lod_list[index].columns = col_names\n", " \n", " df_pseudo_rel_lod_list[index].columns = col_names\n", " df_pseudo_abs_lod_list[index].columns = col_names\n", " \n", "df_col_names_lod_list[0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Generate shorter taxonomy names for plotting purposes" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "exclusion_list = ['', 'uncultured bacterium', 'metagenome', 'uncultured', \n", " 'gut metagenome', 'uncultured organism', 'unidentified', \n", " 'uncultured Bacteroidales bacterium', 'uncultured Mollicutes bacterium', 'uncultured archaeon']\n", "\n", "for i in range(6):\n", " if i == 0:\n", " df_col_names_lod_list[i][['Kingdom', 'Phylum']] = df_col_names_lod_list[i]['taxonomy'].str.split(';', expand=True)\n", " elif i==1:\n", " df_col_names_lod_list[i][['Kingdom', 'Phylum', 'Class']] = df_col_names_lod_list[i]['taxonomy'].str.split(';', expand=True)\n", " elif i==2:\n", " df_col_names_lod_list[i][['Kingdom', 'Phylum', 'Class', 'Order']] = df_col_names_lod_list[i]['taxonomy'].str.split(';', expand=True)\n", " elif i==3:\n", " df_col_names_lod_list[i][['Kingdom', 'Phylum', 'Class', 'Order', 'Family']] = df_col_names_lod_list[i]['taxonomy'].str.split(';', expand=True)\n", " elif i==4:\n", " df_col_names_lod_list[i][['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus']] = df_col_names_lod_list[i]['taxonomy'].str.split(';', expand=True)\n", " else:\n", " df_col_names_lod_list[i][['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']] = df_col_names_lod_list[i]['taxonomy'].str.split(';', expand=True)\n", " \n", " labels_list = []\n", " for index, row in df_col_names_lod_list[i].iterrows():\n", " # Species\n", " if row[-1][5:] in exclusion_list:\n", " # Genus\n", " if row[-2][5:] in exclusion_list:\n", " # Family\n", " if row[-3][5:] in exclusion_list:\n", " # Order\n", " if row[-4][5:] in exclusion_list:\n", " # Class\n", " if row[-5][5:] in exclusion_list:\n", " # Phylum\n", " if row[-6][5:] in exclusion_list:\n", " labels_list.append(row[-7][5:] + '(' + df_col_names_lod_list[i].columns[-7][0].lower() + ')')\n", " else:\n", " labels_list.append(row[-6][5:] + '(' + df_col_names_lod_list[i].columns[-6][0].lower() + ')')\n", " else:\n", " labels_list.append(row[-5][5:] + '(' + df_col_names_lod_list[i].columns[-5][0].lower() + ')')\n", " else:\n", " labels_list.append(row[-4][5:] + '(' + df_col_names_lod_list[i].columns[-4][0].lower() + ')')\n", " else:\n", " labels_list.append(row[-3][5:] + '(' + df_col_names_lod_list[i].columns[-3][0].lower() + ')')\n", " else:\n", " labels_list.append(row[-2][5:] + '(' + df_col_names_lod_list[i].columns[-2][0].lower() + ')')\n", " else:\n", " labels_list.append(row[-1][5:] + '(' + df_col_names_lod_list[i].columns[-1][0].lower() + ')')\n", " \n", " df_col_names_lod_list[i]['label'] = labels_list" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Sort the columns by the max abundance of taxa across all samples" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ASV76ASV259ASV255ASV191ASV247ASV29ASV188ASV241ASV61ASV81...ASV256ASV111ASV262ASV89ASV226ASV269ASV151ASV229ASV180ASV31
index
1412.577477e+047252.7108750.000000e+001.036953e+040.000000e+0076511.2212243.498972e+043908.2246196.016823e+020.000000e+00...0.00.00.00.00.00.00.00.00.054.205612
1423.738122e+040.0000000.000000e+001.258758e+040.000000e+006001.8887251.603150e+040.0000003.774225e+030.000000e+00...0.00.00.00.00.00.00.00.00.00.000000
1441.877013e+04267.7622102.583905e+040.000000e+001.155207e+060.0000002.677622e+020.0000009.103915e+022.945384e+02...0.00.00.00.00.00.00.00.00.00.000000
1451.950127e+04808142.0114875.256522e+062.321580e+030.000000e+000.0000001.085107e+063018.0540502.785896e+031.798064e+06...0.00.00.00.00.00.00.00.00.00.000000
1463.716241e+0517124.2809665.446450e+021.728447e+040.000000e+008538.1120253.996733e+0420087.7907682.745652e+040.000000e+00...0.00.00.00.00.00.00.00.00.00.000000
..................................................................
4463.187421e+0513817.9803720.000000e+005.738082e+040.000000e+0038778.4779456.306224e+0419121.6926553.322296e+040.000000e+00...0.00.00.00.00.00.00.00.00.00.000000
4478.643969e+0622424.9279020.000000e+009.110127e+040.000000e+0058865.4357447.904787e+0515977.7611305.894953e+050.000000e+00...0.00.00.00.00.00.00.00.00.00.000000
4481.287523e+06290989.3925710.000000e+008.101145e+040.000000e+00772774.7100623.945351e+05112671.0928034.614859e+050.000000e+00...0.00.00.00.00.00.00.00.00.00.000000
4494.342119e+0583991.9333520.000000e+002.551677e+040.000000e+000.0000003.099128e+0327780.8051763.305118e+040.000000e+00...0.00.00.00.00.00.00.00.00.00.000000
4513.000689e+08528979.5192970.000000e+001.263203e+070.000000e+000.0000004.528065e+06740571.3270161.153916e+080.000000e+00...0.00.00.00.00.00.00.00.00.00.000000
\n", "

250 rows × 286 columns

\n", "
" ], "text/plain": [ " ASV76 ASV259 ASV255 ASV191 ASV247 \\\n", "index \n", "141 2.577477e+04 7252.710875 0.000000e+00 1.036953e+04 0.000000e+00 \n", "142 3.738122e+04 0.000000 0.000000e+00 1.258758e+04 0.000000e+00 \n", "144 1.877013e+04 267.762210 2.583905e+04 0.000000e+00 1.155207e+06 \n", "145 1.950127e+04 808142.011487 5.256522e+06 2.321580e+03 0.000000e+00 \n", "146 3.716241e+05 17124.280966 5.446450e+02 1.728447e+04 0.000000e+00 \n", "... ... ... ... ... ... \n", "446 3.187421e+05 13817.980372 0.000000e+00 5.738082e+04 0.000000e+00 \n", "447 8.643969e+06 22424.927902 0.000000e+00 9.110127e+04 0.000000e+00 \n", "448 1.287523e+06 290989.392571 0.000000e+00 8.101145e+04 0.000000e+00 \n", "449 4.342119e+05 83991.933352 0.000000e+00 2.551677e+04 0.000000e+00 \n", "451 3.000689e+08 528979.519297 0.000000e+00 1.263203e+07 0.000000e+00 \n", "\n", " ASV29 ASV188 ASV241 ASV61 ASV81 \\\n", "index \n", "141 76511.221224 3.498972e+04 3908.224619 6.016823e+02 0.000000e+00 \n", "142 6001.888725 1.603150e+04 0.000000 3.774225e+03 0.000000e+00 \n", "144 0.000000 2.677622e+02 0.000000 9.103915e+02 2.945384e+02 \n", "145 0.000000 1.085107e+06 3018.054050 2.785896e+03 1.798064e+06 \n", "146 8538.112025 3.996733e+04 20087.790768 2.745652e+04 0.000000e+00 \n", "... ... ... ... ... ... \n", "446 38778.477945 6.306224e+04 19121.692655 3.322296e+04 0.000000e+00 \n", "447 58865.435744 7.904787e+05 15977.761130 5.894953e+05 0.000000e+00 \n", "448 772774.710062 3.945351e+05 112671.092803 4.614859e+05 0.000000e+00 \n", "449 0.000000 3.099128e+03 27780.805176 3.305118e+04 0.000000e+00 \n", "451 0.000000 4.528065e+06 740571.327016 1.153916e+08 0.000000e+00 \n", "\n", " ... ASV256 ASV111 ASV262 ASV89 ASV226 ASV269 ASV151 ASV229 \\\n", "index ... \n", "141 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "142 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "144 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "145 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "146 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "... ... ... ... ... ... ... ... ... ... \n", "446 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "447 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "448 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "449 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "451 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "\n", " ASV180 ASV31 \n", "index \n", "141 0.0 54.205612 \n", "142 0.0 0.000000 \n", "144 0.0 0.000000 \n", "145 0.0 0.000000 \n", "146 0.0 0.000000 \n", "... ... ... \n", "446 0.0 0.000000 \n", "447 0.0 0.000000 \n", "448 0.0 0.000000 \n", "449 0.0 0.000000 \n", "451 0.0 0.000000 \n", "\n", "[250 rows x 286 columns]" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_rel_sort_lod_list = [None]*6\n", "df_abs_sort_lod_list = [None]*6\n", "\n", "df_pseudo_rel_sort_lod_list = [None]*6\n", "df_pseudo_abs_sort_lod_list = [None]*6\n", "\n", "for i in range(6):\n", " taxa_sorted = df_abs_lod_list[i].mean().sort_values(ascending=False).index\n", " \n", " df_rel_sort_lod_list[i] = df_rel_lod_list[i].loc[:, taxa_sorted]\n", " df_abs_sort_lod_list[i] = df_abs_lod_list[i].loc[:, taxa_sorted]\n", " \n", " df_pseudo_rel_sort_lod_list[i] = df_pseudo_rel_lod_list[i].loc[:, taxa_sorted]\n", " df_pseudo_abs_sort_lod_list[i] = df_pseudo_abs_lod_list[i].loc[:, taxa_sorted]\n", " \n", "df_abs_sort_lod_list[4]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Save the working files to allow use in individual analysis workbooks" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "pickle.dump(df_rel_sort_lod_list, open('pickle_files/rel_sort_lod_list.pkl', 'wb'))\n", "pickle.dump(df_abs_sort_lod_list, open('pickle_files/abs_sort_lod_list.pkl', 'wb'))\n", "\n", "pickle.dump(df_pseudo_rel_sort_lod_list, open('pickle_files/pseudo_rel_sort_lod_list.pkl', 'wb'))\n", "pickle.dump(df_pseudo_abs_sort_lod_list, open('pickle_files/pseudo_abs_sort_lod_list.pkl', 'wb'))\n", "\n", "pickle.dump(df_col_names_lod_list, open('pickle_files/col_names_lod_list.pkl', 'wb'))\n", "pickle.dump(df_total_load, open('pickle_files/total_load_duodenum.pkl', 'wb'))\n", "pickle.dump(seq_metadata, open('pickle_files/seq_duodenum_metadata.pkl', 'wb'))" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
taxonomyKingdomPhylumClassOrderFamilyGenusSpecieslabel
ASV193D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3...D_0__BacteriaD_1__FirmicutesD_2__BacilliD_3__LactobacillalesD_4__StreptococcaceaeD_5__Streptococcus__Streptococcus(g)
ASV501D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...D_0__BacteriaD_1__ProteobacteriaD_2__GammaproteobacteriaD_3__PasteurellalesD_4__PasteurellaceaeD_5__Haemophilus__Haemophilus(g)
ASV494D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...D_0__BacteriaD_1__ProteobacteriaD_2__GammaproteobacteriaD_3__EnterobacterialesD_4__Enterobacteriaceae____Enterobacteriaceae(f)
ASV404D_0__Bacteria;D_1__Fusobacteria;D_2__Fusobacte...D_0__BacteriaD_1__FusobacteriaD_2__FusobacteriiaD_3__FusobacterialesD_4__FusobacteriaceaeD_5__Fusobacterium__Fusobacterium(g)
ASV486D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...D_0__BacteriaD_1__ProteobacteriaD_2__GammaproteobacteriaD_3__EnterobacterialesD_4__EnterobacteriaceaeD_5__Escherichia-Shigella__Escherichia-Shigella(g)
ASV397D_0__Bacteria;D_1__Firmicutes;D_2__Negativicut...D_0__BacteriaD_1__FirmicutesD_2__NegativicutesD_3__SelenomonadalesD_4__VeillonellaceaeD_5__Veillonella__Veillonella(g)
ASV477D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr...D_0__BacteriaD_1__ProteobacteriaD_2__GammaproteobacteriaD_3__BetaproteobacterialesD_4__NeisseriaceaeD_5__NeisseriaD_6__uncultured bacteriumNeisseria(g)
ASV67D_0__Bacteria;D_1__Bacteroidetes;D_2__Bacteroi...D_0__BacteriaD_1__BacteroidetesD_2__BacteroidiaD_3__BacteroidalesD_4__PrevotellaceaeD_5__Prevotella 7D_6__Prevotella melaninogenicaPrevotella melaninogenica(s)
ASV155D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3...D_0__BacteriaD_1__FirmicutesD_2__BacilliD_3__BacillalesD_4__Family XID_5__Gemella__Gemella(g)
ASV206D_0__Bacteria;D_1__Firmicutes;D_2__Clostridia;...D_0__BacteriaD_1__FirmicutesD_2__ClostridiaD_3__ClostridialesD_4__Clostridiaceae 1D_5__Clostridium sensu stricto 1__Clostridium sensu stricto 1(g)
\n", "
" ], "text/plain": [ " taxonomy Kingdom \\\n", "ASV193 D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3... D_0__Bacteria \n", "ASV501 D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr... D_0__Bacteria \n", "ASV494 D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr... D_0__Bacteria \n", "ASV404 D_0__Bacteria;D_1__Fusobacteria;D_2__Fusobacte... D_0__Bacteria \n", "ASV486 D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr... D_0__Bacteria \n", "ASV397 D_0__Bacteria;D_1__Firmicutes;D_2__Negativicut... D_0__Bacteria \n", "ASV477 D_0__Bacteria;D_1__Proteobacteria;D_2__Gammapr... D_0__Bacteria \n", "ASV67 D_0__Bacteria;D_1__Bacteroidetes;D_2__Bacteroi... D_0__Bacteria \n", "ASV155 D_0__Bacteria;D_1__Firmicutes;D_2__Bacilli;D_3... D_0__Bacteria \n", "ASV206 D_0__Bacteria;D_1__Firmicutes;D_2__Clostridia;... D_0__Bacteria \n", "\n", " Phylum Class \\\n", "ASV193 D_1__Firmicutes D_2__Bacilli \n", "ASV501 D_1__Proteobacteria D_2__Gammaproteobacteria \n", "ASV494 D_1__Proteobacteria D_2__Gammaproteobacteria \n", "ASV404 D_1__Fusobacteria D_2__Fusobacteriia \n", "ASV486 D_1__Proteobacteria D_2__Gammaproteobacteria \n", "ASV397 D_1__Firmicutes D_2__Negativicutes \n", "ASV477 D_1__Proteobacteria D_2__Gammaproteobacteria \n", "ASV67 D_1__Bacteroidetes D_2__Bacteroidia \n", "ASV155 D_1__Firmicutes D_2__Bacilli \n", "ASV206 D_1__Firmicutes D_2__Clostridia \n", "\n", " Order Family \\\n", "ASV193 D_3__Lactobacillales D_4__Streptococcaceae \n", "ASV501 D_3__Pasteurellales D_4__Pasteurellaceae \n", "ASV494 D_3__Enterobacteriales D_4__Enterobacteriaceae \n", "ASV404 D_3__Fusobacteriales D_4__Fusobacteriaceae \n", "ASV486 D_3__Enterobacteriales D_4__Enterobacteriaceae \n", "ASV397 D_3__Selenomonadales D_4__Veillonellaceae \n", "ASV477 D_3__Betaproteobacteriales D_4__Neisseriaceae \n", "ASV67 D_3__Bacteroidales D_4__Prevotellaceae \n", "ASV155 D_3__Bacillales D_4__Family XI \n", "ASV206 D_3__Clostridiales D_4__Clostridiaceae 1 \n", "\n", " Genus Species \\\n", "ASV193 D_5__Streptococcus __ \n", "ASV501 D_5__Haemophilus __ \n", "ASV494 __ __ \n", "ASV404 D_5__Fusobacterium __ \n", "ASV486 D_5__Escherichia-Shigella __ \n", "ASV397 D_5__Veillonella __ \n", "ASV477 D_5__Neisseria D_6__uncultured bacterium \n", "ASV67 D_5__Prevotella 7 D_6__Prevotella melaninogenica \n", "ASV155 D_5__Gemella __ \n", "ASV206 D_5__Clostridium sensu stricto 1 __ \n", "\n", " label \n", "ASV193 Streptococcus(g) \n", "ASV501 Haemophilus(g) \n", "ASV494 Enterobacteriaceae(f) \n", "ASV404 Fusobacterium(g) \n", "ASV486 Escherichia-Shigella(g) \n", "ASV397 Veillonella(g) \n", "ASV477 Neisseria(g) \n", "ASV67 Prevotella melaninogenica(s) \n", "ASV155 Gemella(g) \n", "ASV206 Clostridium sensu stricto 1(g) " ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_col_names_lod_list[5].loc[df_abs_sort_lod_list[5].columns[:10].tolist()]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.10" } }, "nbformat": 4, "nbformat_minor": 4 }