{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import pickle" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load in the raw metadata file and processed total microbial loads from dPCR" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# raw metadata\n", "df_raw_metadata = pd.read_excel('data_files/metadata_raw.xlsx', index_col=0)\n", "df_colony_counts = pd.read_excel('data_files/Colony counts.xlsx', index_col=0)\n", "df_total_load = pd.read_pickle('pickle_files/total_load_duodenum.pkl').set_index('Sample')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Merge relevant total load columns with raw metadata" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "df_metadata = df_total_load[['Weight (mL)', 'Copies/mL', 'Log Copies/mL', 'Rel. Abundance LOD (%) Corrected']].merge(df_raw_metadata, left_index=True, right_index=True).merge(df_colony_counts, left_index=True, right_index=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Generate binary variables used in downstream analysis" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Weight (mL) | \n", "Copies/mL | \n", "Log Copies/mL | \n", "Rel. Abundance LOD (%) Corrected | \n", "Age (years) | \n", "Gender | \n", "weight (lbs) | \n", "GM-CSF | \n", "IFNY | \n", "IL10 | \n", "... | \n", "bloating>50th | \n", "constipation>50th | \n", "excess_gas>50th | \n", "incomplete_evac>50th | \n", "diarrhea>50th | \n", "urgency>50th | \n", "current_smoker | \n", "any_probiotics | \n", "Gender_binary | \n", "any_PPI | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
141 | \n", "1.6259 | \n", "2.460176e+05 | \n", "5.390966 | \n", "0.021429 | \n", "42 | \n", "Female | \n", "437 | \n", "1.089782 | \n", "0.320000 | \n", "9.613930 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
142 | \n", "0.2091 | \n", "1.162123e+05 | \n", "5.065252 | \n", "0.352734 | \n", "81 | \n", "Male | \n", "137 | \n", "3.464436 | \n", "5.116849 | \n", "11.172813 | \n", "... | \n", "1.0 | \n", "0.0 | \n", "1.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0 | \n", "0 | \n", "1 | \n", "1 | \n", "
145 | \n", "1.77 | \n", "1.053672e+07 | \n", "7.022706 | \n", "0.018320 | \n", "68 | \n", "Female | \n", "213 | \n", "2.194994 | \n", "0.881870 | \n", "12.464125 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "1.0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "
146 | \n", "1.6973 | \n", "7.270371e+05 | \n", "5.861557 | \n", "0.018320 | \n", "72 | \n", "Female | \n", "201 | \n", "0.320000 | \n", "1.413385 | \n", "5.787293 | \n", "... | \n", "1.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "1.0 | \n", "1.0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "
147 | \n", "1.7476 | \n", "3.673610e+05 | \n", "5.565093 | \n", "0.018320 | \n", "56 | \n", "Female | \n", "195 | \n", "2.847633 | \n", "0.080797 | \n", "1.501182 | \n", "... | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
451 | \n", "0.605 | \n", "4.801653e+08 | \n", "8.681391 | \n", "0.018320 | \n", "34 | \n", "Male | \n", "160 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
207 | \n", "1.8095 | \n", "2.127660e+08 | \n", "8.327902 | \n", "0.018320 | \n", "52 | \n", "Female | \n", "200 | \n", "0.320000 | \n", "0.320000 | \n", "1.438906 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
274 | \n", "1.083 | \n", "3.924284e+08 | \n", "8.593760 | \n", "0.018320 | \n", "55 | \n", "Male | \n", "195 | \n", "34.464303 | \n", "90.534830 | \n", "24.358041 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
322 | \n", "0.219 | \n", "9.246575e+08 | \n", "8.965981 | \n", "0.018320 | \n", "73 | \n", "Male | \n", "176 | \n", "0.320000 | \n", "0.996686 | \n", "3.348208 | \n", "... | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0 | \n", "0 | \n", "1 | \n", "1 | \n", "
395 | \n", "0.491 | \n", "4.358452e+08 | \n", "8.639332 | \n", "0.018320 | \n", "64 | \n", "Female | \n", "123 | \n", "10.564126 | \n", "4.289218 | \n", "6.107177 | \n", "... | \n", "1.0 | \n", "0.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
250 rows × 69 columns
\n", "