diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 88fbad1..18fd2a3 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -23,7 +23,6 @@ jobs:
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- cache: 'pip'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
diff --git a/tableone.ipynb b/tableone.ipynb
index b13eee6..3cc44f0 100644
--- a/tableone.ipynb
+++ b/tableone.ipynb
@@ -335,7 +335,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 5,
"metadata": {
"colab": {},
"colab_type": "code",
@@ -347,12 +347,12 @@
"# Test for normality, multimodality (Hartigan's Dip Test), and far outliers (Tukey's test)\n",
"\n",
"# for versions >= 0.7.9\n",
- "table1 = TableOne(data, dip_test=True, normal_test=True, tukey_test=True)"
+ "table1 = TableOne(data, dip_test=True, normal_test=True, tukey_test=True, show_histograms=True)"
]
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 6,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@@ -388,6 +388,7 @@
"
| \n",
" Missing | \n",
" Overall | \n",
+ " Histogram | \n",
" \n",
" \n",
" \n",
@@ -396,79 +397,93 @@
" | \n",
" | \n",
" 1000 | \n",
+ " | \n",
" \n",
" \n",
" | Age, mean (SD) | \n",
" | \n",
" 0 | \n",
" 65.0 (17.2) | \n",
+ " ▂▂▃▄▆▇█▇ | \n",
"
\n",
" \n",
" | SysABP, mean (SD) | \n",
" | \n",
" 291 | \n",
" 114.3 (40.2) | \n",
+ " ▂▁▁▃█▆▃▁ | \n",
"
\n",
" \n",
" | Height, mean (SD) | \n",
" | \n",
" 475 | \n",
" 170.1 (22.1) | \n",
+ " ▁▃▅▇▇█▂▁ | \n",
"
\n",
" \n",
" | Weight, mean (SD) | \n",
" | \n",
" 302 | \n",
" 82.9 (23.8) | \n",
+ " ▃▆█▆▄▃▁▁ | \n",
"
\n",
" \n",
" | ICU, n (%) | \n",
" CCU | \n",
- " 0 | \n",
+ " | \n",
" 162 (16.2) | \n",
+ " | \n",
"
\n",
" \n",
" | CSRU | \n",
" | \n",
" 202 (20.2) | \n",
+ " | \n",
"
\n",
" \n",
" | MICU | \n",
" | \n",
" 380 (38.0) | \n",
+ " | \n",
"
\n",
" \n",
" | SICU | \n",
" | \n",
" 256 (25.6) | \n",
+ " | \n",
"
\n",
" \n",
" | MechVent, n (%) | \n",
" 0 | \n",
- " 0 | \n",
+ " | \n",
" 540 (54.0) | \n",
+ " | \n",
"
\n",
" \n",
" | 1 | \n",
" | \n",
" 460 (46.0) | \n",
+ " | \n",
"
\n",
" \n",
" | LOS, mean (SD) | \n",
" | \n",
" 0 | \n",
" 14.2 (14.2) | \n",
+ " █▆▃▁▁▁▁▁ | \n",
"
\n",
" \n",
" | death, n (%) | \n",
" 0 | \n",
- " 0 | \n",
+ " | \n",
" 864 (86.4) | \n",
+ " | \n",
"
\n",
" \n",
" | 1 | \n",
" | \n",
" 136 (13.6) | \n",
+ " | \n",
"
\n",
" \n",
"\n",
@@ -478,21 +493,21 @@
" in: Height, LOS.
"
],
"text/plain": [
- " Missing Overall\n",
- "n 1000\n",
- "Age, mean (SD) 0 65.0 (17.2)\n",
- "SysABP, mean (SD) 291 114.3 (40.2)\n",
- "Height, mean (SD) 475 170.1 (22.1)\n",
- "Weight, mean (SD) 302 82.9 (23.8)\n",
- "ICU, n (%) CCU 0 162 (16.2)\n",
- " CSRU 202 (20.2)\n",
- " MICU 380 (38.0)\n",
- " SICU 256 (25.6)\n",
- "MechVent, n (%) 0 0 540 (54.0)\n",
- " 1 460 (46.0)\n",
- "LOS, mean (SD) 0 14.2 (14.2)\n",
- "death, n (%) 0 0 864 (86.4)\n",
- " 1 136 (13.6)\n",
+ " Missing Overall Histogram\n",
+ "n 1000 \n",
+ "Age, mean (SD) 0 65.0 (17.2) ▂▂▃▄▆▇█▇\n",
+ "SysABP, mean (SD) 291 114.3 (40.2) ▂▁▁▃█▆▃▁\n",
+ "Height, mean (SD) 475 170.1 (22.1) ▁▃▅▇▇█▂▁\n",
+ "Weight, mean (SD) 302 82.9 (23.8) ▃▆█▆▄▃▁▁\n",
+ "ICU, n (%) CCU 162 (16.2) \n",
+ " CSRU 202 (20.2) \n",
+ " MICU 380 (38.0) \n",
+ " SICU 256 (25.6) \n",
+ "MechVent, n (%) 0 540 (54.0) \n",
+ " 1 460 (46.0) \n",
+ "LOS, mean (SD) 0 14.2 (14.2) █▆▃▁▁▁▁▁\n",
+ "death, n (%) 0 864 (86.4) \n",
+ " 1 136 (13.6) \n",
"[1] Hartigan's Dip Test reports possible\n",
" multimodal distributions for: Age, SysABP, Height, LOS.\n",
"[2] Normality test reports non-normal\n",
@@ -501,7 +516,7 @@
" in: Height, LOS."
]
},
- "execution_count": 7,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -513,7 +528,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 7,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@@ -530,7 +545,7 @@
"pandas.core.frame.DataFrame"
]
},
- "execution_count": 8,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -570,7 +585,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 8,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@@ -587,7 +602,7 @@
"(-30.0, 250.0)"
]
},
- "execution_count": 9,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
},
@@ -622,7 +637,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 9,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@@ -673,7 +688,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 10,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@@ -761,7 +776,7 @@
" \n",
" | ICU, n (%) | \n",
" CCU | \n",
- " 0 | \n",
+ " | \n",
" 162 (16.2) | \n",
" 137 (15.9) | \n",
" 25 (18.4) | \n",
@@ -802,7 +817,7 @@
"SysABP, mean (SD) 291 114.3 (40.2) 115.4 (38.3) 107.6 (49.4)\n",
"Height, mean [min,max] 475 170.1 [13.0,406.4] 170.3 [13.0,406.4] 168.5 [144.8,188.0]\n",
"Weight, mean (SD) 302 82.9 (23.8) 83.0 (23.6) 82.3 (25.4)\n",
- "ICU, n (%) CCU 0 162 (16.2) 137 (15.9) 25 (18.4)\n",
+ "ICU, n (%) CCU 162 (16.2) 137 (15.9) 25 (18.4)\n",
" CSRU 202 (20.2) 194 (22.5) 8 (5.9)\n",
" MICU 380 (38.0) 318 (36.8) 62 (45.6)\n",
" SICU 256 (25.6) 215 (24.9) 41 (30.1)\n",
@@ -814,7 +829,7 @@
" in: Height, SysABP."
]
},
- "execution_count": 11,
+ "execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
@@ -895,7 +910,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 11,
"metadata": {
"colab": {},
"colab_type": "code",
@@ -910,7 +925,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 12,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@@ -952,9 +967,9 @@
" Overall | \n",
" 0 | \n",
" 1 | \n",
+ " SMD (0,1) | \n",
" P-Value | \n",
" Test | \n",
- " SMD (0,1) | \n",
"
\n",
" \n",
" \n",
@@ -976,9 +991,9 @@
" 68.0 [53.0,79.0] | \n",
" 66.0 [52.8,78.0] | \n",
" 75.0 [62.0,83.0] | \n",
+ " 0.487 | \n",
" <0.001 | \n",
" Kruskal-Wallis | \n",
- " 0.487 | \n",
" \n",
" \n",
" | SysABP, mean (SD) | \n",
@@ -987,9 +1002,9 @@
" 114.3 (40.2) | \n",
" 115.4 (38.3) | \n",
" 107.6 (49.4) | \n",
- " 0.134 | \n",
- " Two Sample T-test | \n",
" -0.176 | \n",
+ " 0.134 | \n",
+ " Welch’s T-test | \n",
"
\n",
" \n",
" | Height, mean (SD) | \n",
@@ -998,9 +1013,9 @@
" 170.1 (22.1) | \n",
" 170.3 (23.2) | \n",
" 168.5 (11.3) | \n",
- " 0.304 | \n",
- " Two Sample T-test | \n",
" -0.099 | \n",
+ " 0.304 | \n",
+ " Welch’s T-test | \n",
"
\n",
" \n",
" | Weight, mean (SD) | \n",
@@ -1009,20 +1024,20 @@
" 82.9 (23.8) | \n",
" 83.0 (23.6) | \n",
" 82.3 (25.4) | \n",
- " 0.782 | \n",
- " Two Sample T-test | \n",
" -0.031 | \n",
+ " 0.782 | \n",
+ " Welch’s T-test | \n",
"
\n",
" \n",
" | ICU, n (%) | \n",
" CCU | \n",
- " 0 | \n",
+ " | \n",
" 162 (16.2) | \n",
" 137 (15.9) | \n",
" 25 (18.4) | \n",
+ " 0.490 | \n",
" <0.001 | \n",
" Chi-squared | \n",
- " 0.490 | \n",
"
\n",
" \n",
" | CSRU | \n",
@@ -1062,17 +1077,17 @@
" in: Height, SysABP.
"
],
"text/plain": [
- " Grouped by death \n",
- " Missing Overall 0 1 P-Value Test SMD (0,1)\n",
- "n 1000 864 136 \n",
- "Age, median [Q1,Q3] 0 68.0 [53.0,79.0] 66.0 [52.8,78.0] 75.0 [62.0,83.0] <0.001 Kruskal-Wallis 0.487\n",
- "SysABP, mean (SD) 291 114.3 (40.2) 115.4 (38.3) 107.6 (49.4) 0.134 Two Sample T-test -0.176\n",
- "Height, mean (SD) 475 170.1 (22.1) 170.3 (23.2) 168.5 (11.3) 0.304 Two Sample T-test -0.099\n",
- "Weight, mean (SD) 302 82.9 (23.8) 83.0 (23.6) 82.3 (25.4) 0.782 Two Sample T-test -0.031\n",
- "ICU, n (%) CCU 0 162 (16.2) 137 (15.9) 25 (18.4) <0.001 Chi-squared 0.490\n",
- " CSRU 202 (20.2) 194 (22.5) 8 (5.9) \n",
- " MICU 380 (38.0) 318 (36.8) 62 (45.6) \n",
- " SICU 256 (25.6) 215 (24.9) 41 (30.1) \n",
+ " Grouped by death \n",
+ " Missing Overall 0 1 SMD (0,1) P-Value Test\n",
+ "n 1000 864 136 \n",
+ "Age, median [Q1,Q3] 0 68.0 [53.0,79.0] 66.0 [52.8,78.0] 75.0 [62.0,83.0] 0.487 <0.001 Kruskal-Wallis\n",
+ "SysABP, mean (SD) 291 114.3 (40.2) 115.4 (38.3) 107.6 (49.4) -0.176 0.134 Welch’s T-test\n",
+ "Height, mean (SD) 475 170.1 (22.1) 170.3 (23.2) 168.5 (11.3) -0.099 0.304 Welch’s T-test\n",
+ "Weight, mean (SD) 302 82.9 (23.8) 83.0 (23.6) 82.3 (25.4) -0.031 0.782 Welch’s T-test\n",
+ "ICU, n (%) CCU 162 (16.2) 137 (15.9) 25 (18.4) 0.490 <0.001 Chi-squared\n",
+ " CSRU 202 (20.2) 194 (22.5) 8 (5.9) \n",
+ " MICU 380 (38.0) 318 (36.8) 62 (45.6) \n",
+ " SICU 256 (25.6) 215 (24.9) 41 (30.1) \n",
"[1] Hartigan's Dip Test reports possible\n",
" multimodal distributions for: Age, Height, SysABP.\n",
"[2] Normality test reports non-normal\n",
@@ -1081,7 +1096,7 @@
" in: Height, SysABP."
]
},
- "execution_count": 13,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@@ -1114,7 +1129,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
@@ -1124,7 +1139,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
@@ -1157,7 +1172,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 15,
"metadata": {},
"outputs": [
{
@@ -1176,7 +1191,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 16,
"metadata": {},
"outputs": [
{
@@ -1253,7 +1268,7 @@
" 170.3 (23.2) | \n",
" 168.5 (11.3) | \n",
" 0.304 | \n",
- " Two Sample T-test | \n",
+ " Welch’s T-test | \n",
"
\n",
" \n",
" | Weight, mean (SD) | \n",
@@ -1263,12 +1278,12 @@
" 83.0 (23.6) | \n",
" 82.3 (25.4) | \n",
" 0.782 | \n",
- " Two Sample T-test | \n",
+ " Welch’s T-test | \n",
"
\n",
" \n",
" | ICU, n (%) | \n",
" CCU | \n",
- " 0 | \n",
+ " | \n",
" 162 (16.2) | \n",
" 137 (15.9) | \n",
" 25 (18.4) | \n",
@@ -1305,7 +1320,7 @@
"
\n",
" | MechVent, n (%) | \n",
" 0 | \n",
- " 0 | \n",
+ " | \n",
" 540 (54.0) | \n",
" 468 (54.2) | \n",
" 72 (52.9) | \n",
@@ -1329,30 +1344,30 @@
" 14.0 (13.5) | \n",
" 15.4 (17.7) | \n",
" 0.386 | \n",
- " Two Sample T-test | \n",
+ " Welch’s T-test | \n",
"
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- " Grouped by death \n",
- " Missing Overall 0 1 P-Value Test\n",
- "n 1000 864 136 \n",
- "Age, mean (SD) 0 65.0 (17.2) 64.0 (17.4) 71.7 (14.0) <0.001 Custom test 1\n",
- "SysABP, mean (SD) 291 114.3 (40.2) 115.4 (38.3) 107.6 (49.4) 0.012 Custom test 2\n",
- "Height, mean (SD) 475 170.1 (22.1) 170.3 (23.2) 168.5 (11.3) 0.304 Two Sample T-test\n",
- "Weight, mean (SD) 302 82.9 (23.8) 83.0 (23.6) 82.3 (25.4) 0.782 Two Sample T-test\n",
- "ICU, n (%) CCU 0 162 (16.2) 137 (15.9) 25 (18.4) <0.001 Chi-squared\n",
- " CSRU 202 (20.2) 194 (22.5) 8 (5.9) \n",
- " MICU 380 (38.0) 318 (36.8) 62 (45.6) \n",
- " SICU 256 (25.6) 215 (24.9) 41 (30.1) \n",
- "MechVent, n (%) 0 0 540 (54.0) 468 (54.2) 72 (52.9) 0.862 Chi-squared\n",
- " 1 460 (46.0) 396 (45.8) 64 (47.1) \n",
- "LOS, mean (SD) 0 14.2 (14.2) 14.0 (13.5) 15.4 (17.7) 0.386 Two Sample T-test"
+ " Grouped by death \n",
+ " Missing Overall 0 1 P-Value Test\n",
+ "n 1000 864 136 \n",
+ "Age, mean (SD) 0 65.0 (17.2) 64.0 (17.4) 71.7 (14.0) <0.001 Custom test 1\n",
+ "SysABP, mean (SD) 291 114.3 (40.2) 115.4 (38.3) 107.6 (49.4) 0.012 Custom test 2\n",
+ "Height, mean (SD) 475 170.1 (22.1) 170.3 (23.2) 168.5 (11.3) 0.304 Welch’s T-test\n",
+ "Weight, mean (SD) 302 82.9 (23.8) 83.0 (23.6) 82.3 (25.4) 0.782 Welch’s T-test\n",
+ "ICU, n (%) CCU 162 (16.2) 137 (15.9) 25 (18.4) <0.001 Chi-squared\n",
+ " CSRU 202 (20.2) 194 (22.5) 8 (5.9) \n",
+ " MICU 380 (38.0) 318 (36.8) 62 (45.6) \n",
+ " SICU 256 (25.6) 215 (24.9) 41 (30.1) \n",
+ "MechVent, n (%) 0 540 (54.0) 468 (54.2) 72 (52.9) 0.862 Chi-squared\n",
+ " 1 460 (46.0) 396 (45.8) 64 (47.1) \n",
+ "LOS, mean (SD) 0 14.2 (14.2) 14.0 (13.5) 15.4 (17.7) 0.386 Welch’s T-test"
]
},
- "execution_count": 17,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@@ -1392,7 +1407,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
@@ -1402,7 +1417,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
@@ -1412,7 +1427,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 19,
"metadata": {
"scrolled": true
},
@@ -1430,11 +1445,11 @@
" SysABP, mean (SD) & & 291 & 114.3 (40.2) & 115.4 (38.3) & 107.6 (49.4) \\\\\n",
" Height, mean (SD) & & 475 & 170.1 (22.1) & 170.3 (23.2) & 168.5 (11.3) \\\\\n",
" Weight, mean (SD) & & 302 & 82.9 (23.8) & 83.0 (23.6) & 82.3 (25.4) \\\\\n",
- " ICU, n (\\%) & CCU & 0 & 162 (16.2) & 137 (15.9) & 25 (18.4) \\\\\n",
+ " ICU, n (\\%) & CCU & & 162 (16.2) & 137 (15.9) & 25 (18.4) \\\\\n",
" & CSRU & & 202 (20.2) & 194 (22.5) & 8 (5.9) \\\\\n",
" & MICU & & 380 (38.0) & 318 (36.8) & 62 (45.6) \\\\\n",
" & SICU & & 256 (25.6) & 215 (24.9) & 41 (30.1) \\\\\n",
- " MechVent, n (\\%) & 0 & 0 & 540 (54.0) & 468 (54.2) & 72 (52.9) \\\\\n",
+ " MechVent, n (\\%) & 0 & & 540 (54.0) & 468 (54.2) & 72 (52.9) \\\\\n",
" & 1 & & 460 (46.0) & 396 (45.8) & 64 (47.1) \\\\\n",
" LOS, mean (SD) & & 0 & 14.2 (14.2) & 14.0 (13.5) & 15.4 (17.7) \\\\\n",
"\\hline\n",
@@ -1448,7 +1463,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 20,
"metadata": {},
"outputs": [
{
@@ -1462,11 +1477,11 @@
"| SysABP, mean (SD) | | 291 | 114.3 (40.2) | 115.4 (38.3) | 107.6 (49.4) |\n",
"| Height, mean (SD) | | 475 | 170.1 (22.1) | 170.3 (23.2) | 168.5 (11.3) |\n",
"| Weight, mean (SD) | | 302 | 82.9 (23.8) | 83.0 (23.6) | 82.3 (25.4) |\n",
- "| ICU, n (%) | CCU | 0 | 162 (16.2) | 137 (15.9) | 25 (18.4) |\n",
+ "| ICU, n (%) | CCU | | 162 (16.2) | 137 (15.9) | 25 (18.4) |\n",
"| | CSRU | | 202 (20.2) | 194 (22.5) | 8 (5.9) |\n",
"| | MICU | | 380 (38.0) | 318 (36.8) | 62 (45.6) |\n",
"| | SICU | | 256 (25.6) | 215 (24.9) | 41 (30.1) |\n",
- "| MechVent, n (%) | 0 | 0 | 540 (54.0) | 468 (54.2) | 72 (52.9) |\n",
+ "| MechVent, n (%) | 0 | | 540 (54.0) | 468 (54.2) | 72 (52.9) |\n",
"| | 1 | | 460 (46.0) | 396 (45.8) | 64 (47.1) |\n",
"| LOS, mean (SD) | | 0 | 14.2 (14.2) | 14.0 (13.5) | 15.4 (17.7) |\n"
]
@@ -1487,7 +1502,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
@@ -1526,7 +1541,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.19"
+ "version": "3.9.21"
}
},
"nbformat": 4,
diff --git a/tableone/formatting.py b/tableone/formatting.py
index 4ca9c49..2933822 100644
--- a/tableone/formatting.py
+++ b/tableone/formatting.py
@@ -291,3 +291,40 @@ def reorder_columns(table, optional_columns, groupby, order, overall):
table = table.reindex(cols, axis=1)
return table
+
+
+def generate_histograms(values, bins=8, clip=(1, 99)):
+ """
+ Generate a mini histogram using unicode blocks.
+
+ Parameters
+ ----------
+ values : np.ndarray
+ Numeric values.
+ bins : int
+ Number of bins for the histogram.
+ clip : tuple of (int, int) or None, optional
+ If specified, clip values to the given (lower_percentile, upper_percentile).
+ For example, clip=(1, 99) clips to 1st and 99th percentiles.
+ If None, no clipping is applied.
+
+ Returns
+ -------
+ str
+ Unicode sparkline.
+ """
+ if len(values) == 0:
+ return ''
+
+ if clip is not None:
+ lower, upper = np.percentile(values, clip)
+ values = np.clip(values, lower, upper)
+
+ hist, _ = np.histogram(values, bins=bins)
+ if hist.max() == 0:
+ return ''
+
+ blocks = '▁▂▃▄▅▆▇█'
+ hist_normalized = np.floor((hist / hist.max()) * (len(blocks) - 1)).astype(int)
+
+ return ''.join(blocks[i] for i in hist_normalized)
diff --git a/tableone/tableone.py b/tableone/tableone.py
index 59a0c27..45ca950 100644
--- a/tableone/tableone.py
+++ b/tableone/tableone.py
@@ -8,12 +8,13 @@
import numpy as np
import pandas as pd
from tabulate import tabulate
+from typing import Tuple
from tableone.deprecations import handle_deprecated_parameters
from tableone.formatting import (docstring_copier, set_display_options, format_pvalues,
format_smd_columns, apply_limits, sort_and_reindex,
apply_order, mask_duplicate_values, create_row_labels,
- reorder_columns)
+ reorder_columns, generate_histograms)
from tableone.preprocessors import (ensure_list, detect_categorical, order_categorical,
get_groups, handle_categorical_nulls)
from tableone.statistics import Statistics
@@ -171,6 +172,14 @@ class TableOne:
include_null : bool, default: True
Include None/Null values for categorical variables by treating them as a
category level.
+ show_histograms : bool, default=False
+ Whether to include mini-histograms for continuous variables.
+ clip_histograms : tuple or None, default (1, 99)
+ If show_histograms=True, specify a (lower_percentile, upper_percentile) range to clip the
+ data before generating histograms. This reduces the influence of extreme outliers.
+ For example, (1, 99) clips to the 1st and 99th percentiles.
+ Set to None to disable clipping and use the full range of values.
+
Attributes
@@ -210,23 +219,36 @@ def __init__(self, data: pd.DataFrame,
continuous: Optional[list] = None,
groupby: Optional[str] = None,
nonnormal: Optional[list] = None,
- min_max: Optional[list] = None, pval: Optional[bool] = False,
- pval_adjust: Optional[str] = None, htest_name: bool = False,
- pval_test_name: bool = False, htest: Optional[dict] = None,
+ min_max: Optional[list] = None,
+ pval: Optional[bool] = False,
+ pval_adjust: Optional[str] = None,
+ htest_name: bool = False,
+ pval_test_name: bool = False,
+ htest: Optional[dict] = None,
isnull: Optional[bool] = None, missing: bool = True,
- ddof: int = 1, labels: Optional[dict] = None,
- rename: Optional[dict] = None, sort: Union[bool, str] = False,
+ show_histograms: bool = False,
+ clip_histograms: Optional[Tuple[int, int]] = (1, 99),
+ ddof: int = 1,
+ labels: Optional[dict] = None,
+ rename: Optional[dict] = None,
+ sort: Union[bool, str] = False,
limit: Union[int, dict, None] = None,
- order: Optional[dict] = None, remarks: bool = False,
- label_suffix: bool = True, decimals: Union[int, dict] = 1,
- smd: bool = False, overall: bool = True,
- row_percent: bool = False, display_all: bool = False,
- dip_test: bool = False, normal_test: bool = False,
+ order: Optional[dict] = None,
+ remarks: bool = False,
+ label_suffix: bool = True,
+ decimals: Union[int, dict] = 1,
+ smd: bool = False,
+ overall: bool = True,
+ row_percent: bool = False,
+ display_all: bool = False,
+ dip_test: bool = False,
+ normal_test: bool = False,
tukey_test: bool = False,
pval_threshold: Optional[float] = None,
include_null: Optional[bool] = True,
pval_digits: int = 3,
- ttest_equal_var: bool = False) -> None:
+ ttest_equal_var: bool = False,
+ ) -> None:
# Warn about deprecated parameters
handle_deprecated_parameters(labels, isnull, pval_test_name, remarks)
@@ -241,7 +263,8 @@ def __init__(self, data: pd.DataFrame,
htest, missing, ddof, rename, sort, limit, order,
label_suffix, decimals, smd, overall, row_percent,
dip_test, normal_test, tukey_test, pval_threshold,
- include_null, pval_digits, ttest_equal_var)
+ include_null, pval_digits, ttest_equal_var,
+ show_histograms, clip_histograms)
# Initialize intermediate tables
self.initialize_intermediate_tables()
@@ -283,12 +306,14 @@ def initialize_core_attributes(self, data, columns, categorical, continuous, gro
htest, missing, ddof, rename, sort, limit, order,
label_suffix, decimals, smd, overall, row_percent,
dip_test, normal_test, tukey_test, pval_threshold,
- include_null, pval_digits, ttest_equal_var):
+ include_null, pval_digits, ttest_equal_var,
+ show_histograms, clip_histograms):
"""
Initialize attributes.
"""
self._alt_labels = rename
self._include_null = include_null
+ self._clip_histograms = clip_histograms
self._columns = columns if columns else data.columns.to_list() # type: ignore
self._categorical = detect_categorical(data[self._columns], groupby) if categorical is None else categorical
if continuous:
@@ -316,6 +341,7 @@ def initialize_core_attributes(self, data, columns, categorical, continuous, gro
self._pval_digits = pval_digits
self._reserved_columns = ['Missing', 'P-Value', 'Test', 'P-Value (adjusted)', 'SMD', 'Overall']
self._row_percent = row_percent
+ self._show_histograms = show_histograms
self._smd = smd
self._sort = sort
self._tukey_test = tukey_test
@@ -659,10 +685,59 @@ def _create_tableone(self, data):
table = self._combine_tables()
optional_columns = ['P-Value', 'P-Value (adjusted)', 'Test']
+ if self._show_histograms:
+ hist_cols = [
+ f"{lvl} Histogram" for lvl in self._groupbylvls
+ ] if self._groupby else ["Histogram"]
+ hist_cols.append("Overall Histogram") if self._groupby else None
+ optional_columns += hist_cols
+
# ensure column headers are strings before reindexing
table = table.reset_index().set_index(['variable', 'value']) # type: ignore
table.columns = table.columns.values.astype(str)
+ # Add histograms
+ if self._show_histograms:
+ histogram_cols = {}
+ for v in self._columns:
+ if v in self._continuous:
+ histograms = []
+ if self._groupby:
+ for lvl in self._groupbylvls:
+ lvl_values = data.loc[data[self._groupby] == lvl, v].dropna().values
+ histograms.append(generate_histograms(lvl_values, clip=self._clip_histograms))
+ overall_values = data[v].dropna().values
+ histograms.append(generate_histograms(overall_values, clip=self._clip_histograms))
+ else:
+ overall_values = data[v].dropna().values
+ histograms.append(generate_histograms(overall_values, clip=self._clip_histograms))
+ histogram_cols[v] = histograms
+
+ if histogram_cols:
+ new_cols = []
+ if self._groupby:
+ new_cols = [f"{lvl} Histogram" for lvl in self._groupbylvls] + ['Overall Histogram']
+ else:
+ new_cols = ['Histogram']
+
+ var_names = table.index.get_level_values(0)
+ var_values = table.index.get_level_values(1)
+
+ for idx, colname in enumerate(new_cols):
+ histograms = []
+ for var, val in zip(var_names, var_values):
+ # Only add histogram to the main summary row (val == '')
+ if val == '':
+ hists = histogram_cols.get(var, [])
+ if idx < len(hists):
+ histograms.append(hists[idx])
+ else:
+ histograms.append('')
+ else:
+ # No histogram for sub-rows
+ histograms.append('')
+ table[colname] = histograms
+
table = sort_and_reindex(table, self._smd, self.smd_table, self._sort, self._columns)
table = format_pvalues(table, self._pval, self._pval_adjust, self._pval_threshold, self._pval_digits)
table = format_smd_columns(table, self._smd, self.smd_table)
diff --git a/tests/test_histograms.py b/tests/test_histograms.py
new file mode 100644
index 0000000..fe9d9ea
--- /dev/null
+++ b/tests/test_histograms.py
@@ -0,0 +1,71 @@
+import numpy as np
+import pandas as pd
+
+from tableone import TableOne
+from tableone.formatting import generate_histograms
+
+
+def test_generate_histograms_simple():
+ # Simple case: clean data
+ x = np.linspace(0, 10, 100)
+ hist = generate_histograms(x)
+ assert isinstance(hist, str)
+ assert all(c in '▁▂▃▄▅▆▇█' for c in hist)
+
+
+def test_generate_histograms_empty_array():
+ # Edge case: empty array
+ x = np.array([])
+ hist = generate_histograms(x)
+ assert isinstance(hist, str)
+ assert hist == ''
+
+
+def test_clip_histogram_behavior():
+
+ # Create toy data: mostly normal values, plus strong outliers
+ rng = np.random.default_rng(seed=42)
+ normal_data = rng.normal(loc=50, scale=5, size=95)
+ outliers = np.array([300, 400, 500, 600, 1000]) # Big outliers
+ all_data = np.concatenate([normal_data, outliers])
+
+ df = pd.DataFrame({
+ 'group': ['A'] * 50 + ['B'] * 50,
+ 'value': all_data
+ })
+
+ # No clipping
+ t1_noclip = TableOne(df, columns=['value'], groupby='group', continuous=['value'],
+ show_histograms=True, clip_histograms=None)
+
+ # With clipping
+ t1_clip = TableOne(df, columns=['value'], groupby='group', continuous=['value'],
+ show_histograms=True, clip_histograms=(5, 95))
+
+ # Find the index for the summary row
+ main_row_idx = None
+ for idx in t1_noclip.tableone.index:
+ if idx[0].startswith('value') and idx[1] == '':
+ main_row_idx = idx
+ break
+
+ assert main_row_idx is not None, "Could not find main summary row for 'value'."
+
+ # Extract histograms
+ no_clip_hist = t1_noclip.tableone.loc[main_row_idx, ('Grouped by group', 'Overall Histogram')]
+ clip_hist = t1_clip.tableone.loc[main_row_idx, ('Grouped by group', 'Overall Histogram')]
+
+ # They should be different
+ assert no_clip_hist != clip_hist
+
+ # Histograms should not be empty
+ assert isinstance(no_clip_hist, str) and len(no_clip_hist) > 0
+ assert isinstance(clip_hist, str) and len(clip_hist) > 0
+
+
+def test_histogram_unicode_characters_only():
+ # Check that only expected unicode block characters are used
+ data = np.random.randn(100)
+ hist = generate_histograms(data)
+ block_chars = set('▁▂▃▄▅▆▇█')
+ assert set(hist).issubset(block_chars)