| \n", " | Amount of Missing Values | \n", "
|---|---|
| TetanusLast10Tdap | \n", "82516 | \n", "
| PneumoVaxEver | \n", "77040 | \n", "
| HIVTesting | \n", "66127 | \n", "
| ChestScan | \n", "56046 | \n", "
| CovidPos | \n", "50764 | \n", "
| HighRiskLastYear | \n", "50623 | \n", "
| BMI | \n", "48806 | \n", "
| FluVaxLast12 | \n", "47121 | \n", "
| AlcoholDrinkers | \n", "46574 | \n", "
| WeightInKilograms | \n", "42078 | \n", "
| ECigaretteUsage | \n", "35660 | \n", "
| SmokerStatus | \n", "35462 | \n", "
| HeightInMeters | \n", "28652 | \n", "
| DifficultyErrands | \n", "25656 | \n", "
| DifficultyConcentrating | \n", "24240 | \n", "
| DifficultyWalking | \n", "24012 | \n", "
| DifficultyDressingBathing | \n", "23915 | \n", "
| BlindOrVisionDifficulty | \n", "21564 | \n", "
| DeafOrHardOfHearing | \n", "20647 | \n", "
| RaceEthnicityCategory | \n", "14057 | \n", "
| RemovedTeeth | \n", "11360 | \n", "
| PhysicalHealthDays | \n", "10927 | \n", "
| AgeCategory | \n", "9079 | \n", "
| MentalHealthDays | \n", "9067 | \n", "
| LastCheckupTime | \n", "8308 | \n", "
| SleepHours | \n", "5453 | \n", "
| HadAngina | \n", "4405 | \n", "
| HadSkinCancer | \n", "3143 | \n", "
| HadHeartAttack | \n", "3065 | \n", "
| HadDepressiveDisorder | \n", "2812 | \n", "
| HadArthritis | \n", "2633 | \n", "
| HadCOPD | \n", "2219 | \n", "
| HadKidneyDisease | \n", "1926 | \n", "
| HadAsthma | \n", "1773 | \n", "
| HadStroke | \n", "1557 | \n", "
| GeneralHealth | \n", "1198 | \n", "
| PhysicalActivities | \n", "1093 | \n", "
| HadDiabetes | \n", "1087 | \n", "
| \n", " | State | \n", "Sex | \n", "GeneralHealth | \n", "PhysicalHealthDays | \n", "MentalHealthDays | \n", "LastCheckupTime | \n", "PhysicalActivities | \n", "SleepHours | \n", "RemovedTeeth | \n", "HadHeartAttack | \n", "... | \n", "HeightInMeters | \n", "WeightInKilograms | \n", "BMI | \n", "AlcoholDrinkers | \n", "HIVTesting | \n", "FluVaxLast12 | \n", "PneumoVaxEver | \n", "TetanusLast10Tdap | \n", "HighRiskLastYear | \n", "CovidPos | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "Alabama | \n", "Female | \n", "Very good | \n", "0.0 | \n", "0.0 | \n", "Within past year (anytime less than 12 months ... | \n", "No | \n", "8.0 | \n", "NaN | \n", "No | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "No | \n", "No | \n", "Yes | \n", "No | \n", "Yes, received tetanus shot but not sure what type | \n", "No | \n", "No | \n", "
| 1 | \n", "Alabama | \n", "Female | \n", "Excellent | \n", "0.0 | \n", "0.0 | \n", "NaN | \n", "No | \n", "6.0 | \n", "NaN | \n", "No | \n", "... | \n", "1.60 | \n", "68.04 | \n", "26.57 | \n", "No | \n", "No | \n", "No | \n", "No | \n", "No, did not receive any tetanus shot in the pa... | \n", "No | \n", "No | \n", "
| 2 | \n", "Alabama | \n", "Female | \n", "Very good | \n", "2.0 | \n", "3.0 | \n", "Within past year (anytime less than 12 months ... | \n", "Yes | \n", "5.0 | \n", "NaN | \n", "No | \n", "... | \n", "1.57 | \n", "63.50 | \n", "25.61 | \n", "No | \n", "No | \n", "No | \n", "No | \n", "NaN | \n", "No | \n", "Yes | \n", "
| 3 | \n", "Alabama | \n", "Female | \n", "Excellent | \n", "0.0 | \n", "0.0 | \n", "Within past year (anytime less than 12 months ... | \n", "Yes | \n", "7.0 | \n", "NaN | \n", "No | \n", "... | \n", "1.65 | \n", "63.50 | \n", "23.30 | \n", "No | \n", "No | \n", "Yes | \n", "Yes | \n", "No, did not receive any tetanus shot in the pa... | \n", "No | \n", "No | \n", "
| 4 | \n", "Alabama | \n", "Female | \n", "Fair | \n", "2.0 | \n", "0.0 | \n", "Within past year (anytime less than 12 months ... | \n", "Yes | \n", "9.0 | \n", "NaN | \n", "No | \n", "... | \n", "1.57 | \n", "53.98 | \n", "21.77 | \n", "Yes | \n", "No | \n", "No | \n", "Yes | \n", "No, did not receive any tetanus shot in the pa... | \n", "No | \n", "No | \n", "
5 rows × 40 columns
\n", "| \n", " | Amount of Missing Values | \n", "
|---|---|
| AlcoholDrinkers | \n", "42691 | \n", "
| BMI | \n", "42352 | \n", "
| ECigaretteUsage | \n", "31835 | \n", "
| SmokerStatus | \n", "31636 | \n", "
| DifficultyErrands | \n", "21968 | \n", "
| DifficultyConcentrating | \n", "20559 | \n", "
| DifficultyWalking | \n", "20327 | \n", "
| DifficultyDressingBathing | \n", "20229 | \n", "
| BlindOrVisionDifficulty | \n", "17882 | \n", "
| DeafOrHardOfHearing | \n", "16967 | \n", "
| RaceEthnicityCategory | \n", "13736 | \n", "
| PhysicalHealthDays | \n", "10900 | \n", "
| MentalHealthDays | \n", "9037 | \n", "
| AgeCategory | \n", "8438 | \n", "
| SleepHours | \n", "5413 | \n", "
| HadAngina | \n", "4379 | \n", "
| HadHeartAttack | \n", "3039 | \n", "
| HadDepressiveDisorder | \n", "2786 | \n", "
| HadArthritis | \n", "2607 | \n", "
| HadCOPD | \n", "2193 | \n", "
| HadKidneyDisease | \n", "1900 | \n", "
| HadAsthma | \n", "1747 | \n", "
| HadStroke | \n", "1531 | \n", "
| GeneralHealth | \n", "1176 | \n", "
| PhysicalActivities | \n", "1069 | \n", "
| HadDiabetes | \n", "1061 | \n", "
| \n", " | Total Count | \n", "Percentage (%) | \n", "
|---|---|---|
| MentalHealthDays | \n", "\n", " | \n", " |
| 0.0 | \n", "194145 | \n", "60.98 | \n", "
| 2.0 | \n", "18531 | \n", "5.82 | \n", "
| 5.0 | \n", "15538 | \n", "4.88 | \n", "
| 30.0 | \n", "14443 | \n", "4.54 | \n", "
| 3.0 | \n", "11974 | \n", "3.76 | \n", "
| 10.0 | \n", "11813 | \n", "3.71 | \n", "
| 1.0 | \n", "11158 | \n", "3.50 | \n", "
| 15.0 | \n", "10934 | \n", "3.43 | \n", "
| 20.0 | \n", "6826 | \n", "2.14 | \n", "
| 4.0 | \n", "6269 | \n", "1.97 | \n", "
| \n", " | sex | \n", "age_category | \n", "race_ethnicity | \n", "bmi | \n", "general_health | \n", "sleep_hours | \n", "physical_activities | \n", "smoker_status | \n", "e_cigarette_usage | \n", "alcohol_drinkers | \n", "... | \n", "had_kidney_disease | \n", "had_arthritis | \n", "had_diabetes | \n", "deaf_or_hard_of_hearing | \n", "blind_or_vision_difficulty | \n", "difficulty_concentrating | \n", "difficulty_walking | \n", "difficulty_dressing_bathing | \n", "difficulty_errands | \n", "poor_mental_health_days | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | \n", "female | \n", "80.0 | \n", "white | \n", "26.57 | \n", "excellent | \n", "6.0 | \n", "no | \n", "no | \n", "no | \n", "no | \n", "... | \n", "no | \n", "no | \n", "no | \n", "no | \n", "no | \n", "no | \n", "no | \n", "no | \n", "no | \n", "0.0 | \n", "
| 2 | \n", "female | \n", "55.0 | \n", "white | \n", "25.61 | \n", "very good | \n", "5.0 | \n", "yes | \n", "no | \n", "no | \n", "no | \n", "... | \n", "no | \n", "no | \n", "no | \n", "no | \n", "no | \n", "no | \n", "no | \n", "no | \n", "no | \n", "3.0 | \n", "
| 3 | \n", "female | \n", "55.0 | \n", "white | \n", "23.30 | \n", "excellent | \n", "7.0 | \n", "yes | \n", "yes | \n", "no | \n", "no | \n", "... | \n", "no | \n", "yes | \n", "no | \n", "no | \n", "no | \n", "no | \n", "no | \n", "no | \n", "no | \n", "0.0 | \n", "
3 rows × 27 columns
\n", "| \n", " | min | \n", "max | \n", "
|---|---|---|
| bmi | \n", "12.02 | \n", "97.65 | \n", "
| sleep_hours | \n", "1.00 | \n", "14.00 | \n", "
| poor_physical_health_days | \n", "0.00 | \n", "30.00 | \n", "
| poor_mental_health_days | \n", "0.00 | \n", "30.00 | \n", "
| \n", " | min | \n", "max | \n", "
|---|---|---|
| bmi | \n", "12.02 | \n", "54.39 | \n", "
| sleep_hours | \n", "1.00 | \n", "14.00 | \n", "
| poor_physical_health_days | \n", "0.00 | \n", "30.00 | \n", "
| poor_mental_health_days | \n", "0.00 | \n", "30.00 | \n", "
| \n", " | count | \n", "unique | \n", "top | \n", "freq | \n", "
|---|---|---|---|---|
| sex | \n", "302472 | \n", "2 | \n", "female | \n", "154084 | \n", "
| race_ethnicity | \n", "302472 | \n", "5 | \n", "white | \n", "228656 | \n", "
| general_health | \n", "302472 | \n", "5 | \n", "very good | \n", "107603 | \n", "
| physical_activities | \n", "302472 | \n", "2 | \n", "yes | \n", "238933 | \n", "
| smoker_status | \n", "302472 | \n", "2 | \n", "no | \n", "268007 | \n", "
| e_cigarette_usage | \n", "302472 | \n", "2 | \n", "no | \n", "286689 | \n", "
| alcohol_drinkers | \n", "302472 | \n", "2 | \n", "yes | \n", "168108 | \n", "
| had_heart_attack | \n", "302472 | \n", "2 | \n", "no | \n", "287253 | \n", "
| had_angina | \n", "302472 | \n", "2 | \n", "no | \n", "285291 | \n", "
| had_stroke | \n", "302472 | \n", "2 | \n", "no | \n", "291049 | \n", "
| had_asthma | \n", "302472 | \n", "2 | \n", "no | \n", "259432 | \n", "
| had_copd | \n", "302472 | \n", "2 | \n", "no | \n", "282231 | \n", "
| had_depressive_disorder | \n", "302472 | \n", "2 | \n", "no | \n", "244610 | \n", "
| had_kidney_disease | \n", "302472 | \n", "2 | \n", "no | \n", "289916 | \n", "
| had_arthritis | \n", "302472 | \n", "2 | \n", "no | \n", "203135 | \n", "
| had_diabetes | \n", "302472 | \n", "4 | \n", "no | \n", "254420 | \n", "
| deaf_or_hard_of_hearing | \n", "302472 | \n", "2 | \n", "no | \n", "276837 | \n", "
| blind_or_vision_difficulty | \n", "302472 | \n", "2 | \n", "no | \n", "288705 | \n", "
| difficulty_concentrating | \n", "302472 | \n", "2 | \n", "no | \n", "273202 | \n", "
| difficulty_walking | \n", "302472 | \n", "2 | \n", "no | \n", "264608 | \n", "
| difficulty_dressing_bathing | \n", "302472 | \n", "2 | \n", "no | \n", "294920 | \n", "
| difficulty_errands | \n", "302472 | \n", "2 | \n", "no | \n", "285994 | \n", "
| \n", " | count | \n", "mean | \n", "std | \n", "min | \n", "25% | \n", "50% | \n", "75% | \n", "max | \n", "
|---|---|---|---|---|---|---|---|---|
| age_category | \n", "302472.0 | \n", "52.91 | \n", "18.06 | \n", "18.00 | \n", "40.00 | \n", "55.00 | \n", "70.00 | \n", "80.00 | \n", "
| bmi | \n", "302472.0 | \n", "28.21 | \n", "5.73 | \n", "12.02 | \n", "24.19 | \n", "27.41 | \n", "31.45 | \n", "54.39 | \n", "
| sleep_hours | \n", "302472.0 | \n", "7.03 | \n", "1.22 | \n", "1.00 | \n", "6.00 | \n", "7.00 | \n", "8.00 | \n", "14.00 | \n", "
| poor_physical_health_days | \n", "302472.0 | \n", "3.11 | \n", "6.97 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "2.00 | \n", "30.00 | \n", "
| poor_mental_health_days | \n", "302472.0 | \n", "3.49 | \n", "7.07 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "3.00 | \n", "30.00 | \n", "
LinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LinearRegression()
| \n", " | age_category | \n", "bmi | \n", "sleep_hours | \n", "poor_physical_health_days | \n", "sex_male | \n", "race_ethnicity_hispanic | \n", "race_ethnicity_multiracial | \n", "race_ethnicity_other | \n", "race_ethnicity_white | \n", "general_health_fair | \n", "... | \n", "had_arthritis_yes | \n", "had_diabetes_no | \n", "had_diabetes_pre-diabetes | \n", "had_diabetes_yes | \n", "deaf_or_hard_of_hearing_yes | \n", "blind_or_vision_difficulty_yes | \n", "difficulty_concentrating_yes | \n", "difficulty_walking_yes | \n", "difficulty_dressing_bathing_yes | \n", "difficulty_errands_yes | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "1.499846 | \n", "-0.286107 | \n", "-0.846715 | \n", "-0.445594 | \n", "-0.981343 | \n", "-0.322523 | \n", "-0.149967 | \n", "-0.233683 | \n", "0.568177 | \n", "-0.362118 | \n", "... | \n", "-0.699299 | \n", "0.434590 | \n", "-0.149621 | \n", "-0.384251 | \n", "-0.304302 | \n", "-0.21837 | \n", "-0.327318 | \n", "-0.378279 | \n", "-0.160022 | \n", "-0.240035 | \n", "
| 1 | \n", "0.115899 | \n", "-0.453756 | \n", "-1.666745 | \n", "-0.158815 | \n", "-0.981343 | \n", "-0.322523 | \n", "-0.149967 | \n", "-0.233683 | \n", "0.568177 | \n", "-0.362118 | \n", "... | \n", "-0.699299 | \n", "0.434590 | \n", "-0.149621 | \n", "-0.384251 | \n", "-0.304302 | \n", "-0.21837 | \n", "-0.327318 | \n", "-0.378279 | \n", "-0.160022 | \n", "-0.240035 | \n", "
| 2 | \n", "0.115899 | \n", "-0.857163 | \n", "-0.026685 | \n", "-0.445594 | \n", "-0.981343 | \n", "-0.322523 | \n", "-0.149967 | \n", "-0.233683 | \n", "0.568177 | \n", "-0.362118 | \n", "... | \n", "1.430003 | \n", "0.434590 | \n", "-0.149621 | \n", "-0.384251 | \n", "-0.304302 | \n", "-0.21837 | \n", "-0.327318 | \n", "-0.378279 | \n", "-0.160022 | \n", "-0.240035 | \n", "
| 3 | \n", "-0.714469 | \n", "-1.124354 | \n", "1.613374 | \n", "-0.158815 | \n", "-0.981343 | \n", "-0.322523 | \n", "-0.149967 | \n", "-0.233683 | \n", "0.568177 | \n", "2.761528 | \n", "... | \n", "-0.699299 | \n", "0.434590 | \n", "-0.149621 | \n", "-0.384251 | \n", "-0.304302 | \n", "-0.21837 | \n", "-0.327318 | \n", "-0.378279 | \n", "-0.160022 | \n", "-0.240035 | \n", "
| 4 | \n", "1.499846 | \n", "-0.371678 | \n", "-0.026685 | \n", "-0.302205 | \n", "1.019012 | \n", "-0.322523 | \n", "-0.149967 | \n", "-0.233683 | \n", "0.568177 | \n", "-0.362118 | \n", "... | \n", "-0.699299 | \n", "-2.301017 | \n", "-0.149621 | \n", "2.602467 | \n", "-0.304302 | \n", "-0.21837 | \n", "-0.327318 | \n", "-0.378279 | \n", "-0.160022 | \n", "-0.240035 | \n", "
5 rows × 34 columns
\n", "| \n", " | age_category | \n", "bmi | \n", "sleep_hours | \n", "poor_physical_health_days | \n", "sex_male | \n", "race_ethnicity_hispanic | \n", "race_ethnicity_multiracial | \n", "race_ethnicity_other | \n", "race_ethnicity_white | \n", "general_health_fair | \n", "... | \n", "difficulty_concentrating_yes | \n", "difficulty_walking_yes | \n", "difficulty_dressing_bathing_yes | \n", "difficulty_errands_yes | \n", "had_depressive_disorder_yes difficulty_concentrating_yes | \n", "had_depressive_disorder_yes general_health_poor | \n", "had_depressive_disorder_yes sleep_hours | \n", "difficulty_concentrating_yes general_health_poor | \n", "difficulty_concentrating_yes sleep_hours | \n", "general_health_poor sleep_hours | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "1.499846 | \n", "-0.286107 | \n", "-0.846715 | \n", "-0.445594 | \n", "-0.981343 | \n", "-0.322523 | \n", "-0.149967 | \n", "-0.233683 | \n", "0.568177 | \n", "-0.362118 | \n", "... | \n", "-0.327318 | \n", "-0.378279 | \n", "-0.160022 | \n", "-0.240035 | \n", "0.159195 | \n", "0.079462 | \n", "0.411810 | \n", "0.053477 | \n", "0.277145 | \n", "0.138336 | \n", "
| 1 | \n", "0.115899 | \n", "-0.453756 | \n", "-1.666745 | \n", "-0.158815 | \n", "-0.981343 | \n", "-0.322523 | \n", "-0.149967 | \n", "-0.233683 | \n", "0.568177 | \n", "-0.362118 | \n", "... | \n", "-0.327318 | \n", "-0.378279 | \n", "-0.160022 | \n", "-0.240035 | \n", "0.159195 | \n", "0.079462 | \n", "0.810641 | \n", "0.053477 | \n", "0.545555 | \n", "0.272313 | \n", "
| 2 | \n", "0.115899 | \n", "-0.857163 | \n", "-0.026685 | \n", "-0.445594 | \n", "-0.981343 | \n", "-0.322523 | \n", "-0.149967 | \n", "-0.233683 | \n", "0.568177 | \n", "-0.362118 | \n", "... | \n", "-0.327318 | \n", "-0.378279 | \n", "-0.160022 | \n", "-0.240035 | \n", "0.159195 | \n", "0.079462 | \n", "0.012979 | \n", "0.053477 | \n", "0.008735 | \n", "0.004360 | \n", "
| 3 | \n", "-0.714469 | \n", "-1.124354 | \n", "1.613374 | \n", "-0.158815 | \n", "-0.981343 | \n", "-0.322523 | \n", "-0.149967 | \n", "-0.233683 | \n", "0.568177 | \n", "2.761528 | \n", "... | \n", "-0.327318 | \n", "-0.378279 | \n", "-0.160022 | \n", "-0.240035 | \n", "0.159195 | \n", "0.079462 | \n", "-0.784684 | \n", "0.053477 | \n", "-0.528086 | \n", "-0.263593 | \n", "
| 4 | \n", "1.499846 | \n", "-0.371678 | \n", "-0.026685 | \n", "-0.302205 | \n", "1.019012 | \n", "-0.322523 | \n", "-0.149967 | \n", "-0.233683 | \n", "0.568177 | \n", "-0.362118 | \n", "... | \n", "-0.327318 | \n", "-0.378279 | \n", "-0.160022 | \n", "-0.240035 | \n", "0.159195 | \n", "-2.976872 | \n", "0.012979 | \n", "-2.003410 | \n", "0.008735 | \n", "-0.163332 | \n", "
5 rows × 40 columns
\n", "| \n", " | Algorithm | \n", "Score | \n", "
|---|---|---|
| 0 | \n", "K-Means (k=3) | \n", "-0.0011 | \n", "
| 1 | \n", "K-Means (k=4) | \n", "-0.0053 | \n", "
| 2 | \n", "DBSCAN | \n", "-0.3257 | \n", "
| \n", " | Model Name | \n", "MAE (Days) | \n", "RMSE (Days) | \n", "R2 Score | \n", "
|---|---|---|---|---|
| 0 | \n", "Initial Baseline (Linear) | \n", "3.6770 | \n", "5.8506 | \n", "0.3157 | \n", "
| 1 | \n", "Linear Regression (Engineered) | \n", "3.0812 | \n", "6.2635 | \n", "0.2157 | \n", "
| 2 | \n", "Random Forest (SKlearn) | \n", "3.1311 | \n", "6.2497 | \n", "0.2192 | \n", "
| 3 | \n", "Gradient Boosting (SKlearn) | \n", "3.0391 | \n", "6.2161 | \n", "0.2275 | \n", "