{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "b9df5df0-154a-4802-80f8-101566dc579d",
"metadata": {
"tags": [
"remove-cell"
]
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"mtcars = pd.read_csv('https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv')"
]
},
{
"cell_type": "markdown",
"id": "f7ef09fe-3841-46ea-900e-ef82a5663a8a",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"## Boolean operations with Pandas\n",
"Like NumPy, you can subset and select values from a DataFrame using Boolean values - they work just the same as in NumPy by comparing values in your DataFrame to your chosen quantity."
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "34c59c95-2db6-4ae1-9b0a-97d64d6a8c44",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" model | \n",
" mpg | \n",
" cyl | \n",
" disp | \n",
" hp | \n",
" drat | \n",
" wt | \n",
" qsec | \n",
" vs | \n",
" am | \n",
" gear | \n",
" carb | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Mazda RX4 | \n",
" 21.0 | \n",
" 6 | \n",
" 160.0 | \n",
" 110 | \n",
" 3.90 | \n",
" 2.620 | \n",
" 16.46 | \n",
" 0 | \n",
" 1 | \n",
" 4 | \n",
" 4 | \n",
"
\n",
" \n",
" 1 | \n",
" Mazda RX4 Wag | \n",
" 21.0 | \n",
" 6 | \n",
" 160.0 | \n",
" 110 | \n",
" 3.90 | \n",
" 2.875 | \n",
" 17.02 | \n",
" 0 | \n",
" 1 | \n",
" 4 | \n",
" 4 | \n",
"
\n",
" \n",
" 2 | \n",
" Datsun 710 | \n",
" 22.8 | \n",
" 4 | \n",
" 108.0 | \n",
" 93 | \n",
" 3.85 | \n",
" 2.320 | \n",
" 18.61 | \n",
" 1 | \n",
" 1 | \n",
" 4 | \n",
" 1 | \n",
"
\n",
" \n",
" 17 | \n",
" Fiat 128 | \n",
" 32.4 | \n",
" 4 | \n",
" 78.7 | \n",
" 66 | \n",
" 4.08 | \n",
" 2.200 | \n",
" 19.47 | \n",
" 1 | \n",
" 1 | \n",
" 4 | \n",
" 1 | \n",
"
\n",
" \n",
" 18 | \n",
" Honda Civic | \n",
" 30.4 | \n",
" 4 | \n",
" 75.7 | \n",
" 52 | \n",
" 4.93 | \n",
" 1.615 | \n",
" 18.52 | \n",
" 1 | \n",
" 1 | \n",
" 4 | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" model mpg cyl disp hp drat wt qsec vs am gear \\\n",
"0 Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 \n",
"1 Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 \n",
"2 Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 \n",
"17 Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 \n",
"18 Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 \n",
"\n",
" carb \n",
"0 4 \n",
"1 4 \n",
"2 1 \n",
"17 1 \n",
"18 2 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" model | \n",
" mpg | \n",
" cyl | \n",
" disp | \n",
" hp | \n",
" drat | \n",
" wt | \n",
" qsec | \n",
" vs | \n",
" am | \n",
" gear | \n",
" carb | \n",
"
\n",
" \n",
" \n",
" \n",
" 3 | \n",
" Hornet 4 Drive | \n",
" 21.4 | \n",
" 6 | \n",
" 258.0 | \n",
" 110 | \n",
" 3.08 | \n",
" 3.215 | \n",
" 19.44 | \n",
" 1 | \n",
" 0 | \n",
" 3 | \n",
" 1 | \n",
"
\n",
" \n",
" 4 | \n",
" Hornet Sportabout | \n",
" 18.7 | \n",
" 8 | \n",
" 360.0 | \n",
" 175 | \n",
" 3.15 | \n",
" 3.440 | \n",
" 17.02 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 2 | \n",
"
\n",
" \n",
" 5 | \n",
" Valiant | \n",
" 18.1 | \n",
" 6 | \n",
" 225.0 | \n",
" 105 | \n",
" 2.76 | \n",
" 3.460 | \n",
" 20.22 | \n",
" 1 | \n",
" 0 | \n",
" 3 | \n",
" 1 | \n",
"
\n",
" \n",
" 6 | \n",
" Duster 360 | \n",
" 14.3 | \n",
" 8 | \n",
" 360.0 | \n",
" 245 | \n",
" 3.21 | \n",
" 3.570 | \n",
" 15.84 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 4 | \n",
"
\n",
" \n",
" 7 | \n",
" Merc 240D | \n",
" 24.4 | \n",
" 4 | \n",
" 146.7 | \n",
" 62 | \n",
" 3.69 | \n",
" 3.190 | \n",
" 20.00 | \n",
" 1 | \n",
" 0 | \n",
" 4 | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" model mpg cyl disp hp drat wt qsec vs am gear \\\n",
"3 Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 \n",
"4 Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 \n",
"5 Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 \n",
"6 Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 \n",
"7 Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 \n",
"\n",
" carb \n",
"3 1 \n",
"4 2 \n",
"5 1 \n",
"6 4 \n",
"7 2 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Demonstrate Boolean subsets. Split the data into two - 'am' specifies whether a car is automatic or manual\n",
"auto = mtcars['am'] == 1\n",
"\n",
"display(mtcars[auto].head())\n",
"display(mtcars[~auto].head())"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "1787c4a6-ecd2-47b7-b2cf-fb519ffc80b3",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" model | \n",
" mpg | \n",
" cyl | \n",
" disp | \n",
" hp | \n",
" drat | \n",
" wt | \n",
" qsec | \n",
" vs | \n",
" am | \n",
" gear | \n",
" carb | \n",
"
\n",
" \n",
" \n",
" \n",
" 28 | \n",
" Ford Pantera L | \n",
" 15.8 | \n",
" 8 | \n",
" 351.0 | \n",
" 264 | \n",
" 4.22 | \n",
" 3.17 | \n",
" 14.5 | \n",
" 0 | \n",
" 1 | \n",
" 5 | \n",
" 4 | \n",
"
\n",
" \n",
" 30 | \n",
" Maserati Bora | \n",
" 15.0 | \n",
" 8 | \n",
" 301.0 | \n",
" 335 | \n",
" 3.54 | \n",
" 3.57 | \n",
" 14.6 | \n",
" 0 | \n",
" 1 | \n",
" 5 | \n",
" 8 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" model mpg cyl disp hp drat wt qsec vs am gear \\\n",
"28 Ford Pantera L 15.8 8 351.0 264 4.22 3.17 14.5 0 1 5 \n",
"30 Maserati Bora 15.0 8 301.0 335 3.54 3.57 14.6 0 1 5 \n",
"\n",
" carb \n",
"28 4 \n",
"30 8 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Or MPG between certain values, only for cars with 5 gears!\n",
"miles_per = ((mtcars['mpg'] < 17.0) | (mtcars['mpg'] > 32.0)) & (mtcars['gear'] == 5)\n",
"\n",
"display(mtcars[miles_per])"
]
},
{
"cell_type": "markdown",
"id": "b52bca18-da44-4255-aa96-39f7eecdb7dd",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Cleaning data in Pandas\n",
"As with NumPy arrays, there are special ways to remove data with NaN's. Pandas has its own `.isnull()` method which can be chained with other methods to help clean up a dataset."
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "a799da32-edaa-47df-9b6d-5c4f40fd77c6",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" model | \n",
" mpg | \n",
" cyl | \n",
" disp | \n",
" hp | \n",
" drat | \n",
" wt | \n",
" qsec | \n",
" vs | \n",
" am | \n",
" gear | \n",
" carb | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Mazda RX4 | \n",
" 21.0 | \n",
" 6.0 | \n",
" 160.0 | \n",
" 110.0 | \n",
" 3.90 | \n",
" NaN | \n",
" NaN | \n",
" 0.0 | \n",
" 1.0 | \n",
" 4.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
" 1 | \n",
" Mazda RX4 Wag | \n",
" 21.0 | \n",
" 6.0 | \n",
" 160.0 | \n",
" 110.0 | \n",
" 3.90 | \n",
" 2.875 | \n",
" 17.02 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 4.0 | \n",
" NaN | \n",
"
\n",
" \n",
" 2 | \n",
" NaN | \n",
" 22.8 | \n",
" 4.0 | \n",
" 108.0 | \n",
" 93.0 | \n",
" 3.85 | \n",
" 2.320 | \n",
" 18.61 | \n",
" NaN | \n",
" 1.0 | \n",
" 4.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 3 | \n",
" Hornet 4 Drive | \n",
" 21.4 | \n",
" 6.0 | \n",
" 258.0 | \n",
" NaN | \n",
" 3.08 | \n",
" 3.215 | \n",
" 19.44 | \n",
" 1.0 | \n",
" NaN | \n",
" 3.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 4 | \n",
" Hornet Sportabout | \n",
" 18.7 | \n",
" NaN | \n",
" 360.0 | \n",
" 175.0 | \n",
" 3.15 | \n",
" 3.440 | \n",
" 17.02 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" model mpg cyl disp hp drat wt qsec vs am \\\n",
"0 Mazda RX4 21.0 6.0 160.0 110.0 3.90 NaN NaN 0.0 1.0 \n",
"1 Mazda RX4 Wag 21.0 6.0 160.0 110.0 3.90 2.875 17.02 0.0 1.0 \n",
"2 NaN 22.8 4.0 108.0 93.0 3.85 2.320 18.61 NaN 1.0 \n",
"3 Hornet 4 Drive 21.4 6.0 258.0 NaN 3.08 3.215 19.44 1.0 NaN \n",
"4 Hornet Sportabout 18.7 NaN 360.0 175.0 3.15 3.440 17.02 0.0 0.0 \n",
"\n",
" gear carb \n",
"0 4.0 4.0 \n",
"1 4.0 NaN \n",
"2 4.0 1.0 \n",
"3 3.0 1.0 \n",
"4 3.0 NaN "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# First add NaN to rows\n",
"inds = np.random.choice([True, False], p=[.1, .9], size=mtcars.shape)\n",
"\n",
"# Copy our original array so we don't overwrite it!\n",
"nan_cars = mtcars.copy()\n",
"\n",
"# Add NaN\n",
"nan_cars[inds] = np.NaN\n",
"\n",
"display(nan_cars.head())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "e7bc45ba-6c7a-4c69-899b-8d7557e58586",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" model | \n",
" mpg | \n",
" cyl | \n",
" disp | \n",
" hp | \n",
" drat | \n",
" wt | \n",
" qsec | \n",
" vs | \n",
" am | \n",
" gear | \n",
" carb | \n",
"
\n",
" \n",
" \n",
" \n",
" 6 | \n",
" Duster 360 | \n",
" 14.3 | \n",
" 8.0 | \n",
" 360.0 | \n",
" 245.0 | \n",
" 3.21 | \n",
" 3.57 | \n",
" 15.84 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
" 13 | \n",
" Merc 450SLC | \n",
" 15.2 | \n",
" 8.0 | \n",
" 275.8 | \n",
" 180.0 | \n",
" 3.07 | \n",
" 3.78 | \n",
" 18.00 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" 3.0 | \n",
"
\n",
" \n",
" 17 | \n",
" Fiat 128 | \n",
" 32.4 | \n",
" 4.0 | \n",
" 78.7 | \n",
" 66.0 | \n",
" 4.08 | \n",
" 2.20 | \n",
" 19.47 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 4.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 21 | \n",
" Dodge Challenger | \n",
" 15.5 | \n",
" 8.0 | \n",
" 318.0 | \n",
" 150.0 | \n",
" 2.76 | \n",
" 3.52 | \n",
" 16.87 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" 2.0 | \n",
"
\n",
" \n",
" 31 | \n",
" Volvo 142E | \n",
" 21.4 | \n",
" 4.0 | \n",
" 121.0 | \n",
" 109.0 | \n",
" 4.11 | \n",
" 2.78 | \n",
" 18.60 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 4.0 | \n",
" 2.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" model mpg cyl disp hp drat wt qsec vs am \\\n",
"6 Duster 360 14.3 8.0 360.0 245.0 3.21 3.57 15.84 0.0 0.0 \n",
"13 Merc 450SLC 15.2 8.0 275.8 180.0 3.07 3.78 18.00 0.0 0.0 \n",
"17 Fiat 128 32.4 4.0 78.7 66.0 4.08 2.20 19.47 1.0 1.0 \n",
"21 Dodge Challenger 15.5 8.0 318.0 150.0 2.76 3.52 16.87 0.0 0.0 \n",
"31 Volvo 142E 21.4 4.0 121.0 109.0 4.11 2.78 18.60 1.0 1.0 \n",
"\n",
" gear carb \n",
"6 3.0 4.0 \n",
"13 3.0 3.0 \n",
"17 4.0 1.0 \n",
"21 3.0 2.0 \n",
"31 4.0 2.0 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Use .isnull() to find problem participants with .any()\n",
"problems = nan_cars.isnull().any(axis=1)\n",
"\n",
"# Subset!\n",
"display(nan_cars[~problems])"
]
},
{
"cell_type": "markdown",
"id": "d31cf973-a55e-4b79-949c-274077732c71",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Imputing values and summary statistics\n",
"Dropping data is great, but most of the time you don't want to do that. Psychological data can be hard to come by and you want to make the most of it. There is a solution - replacing missing values with some educated guesses. An example of this might be the mean or median.\n",
"\n",
"In NumPy, computing summary statistics across vectors with missing data (`np.NaN`) will always result in `nan` - the missing value will cause problems. But in Pandas, calling `.mean()`, `.median()`, or `.std()` will *ignore* missing values and compute the statistic on the valid data. \n",
"\n",
"And once you have those, you can replace the missing values with them. This is made very simple with Pandas, because of the `.fillna()` method. This method accepts either a constant value, or a vector of scores that match the axis of interest (e.g., same number of rows or columns) to fill the gaps with."
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "ca0c89ef-5c9e-4e5d-846d-30f4295186a8",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" model | \n",
" mpg | \n",
" cyl | \n",
" disp | \n",
" hp | \n",
" drat | \n",
" wt | \n",
" qsec | \n",
" vs | \n",
" am | \n",
" gear | \n",
" carb | \n",
"
\n",
" \n",
" \n",
" \n",
" 27 | \n",
" Lotus Europa | \n",
" 9999.0 | \n",
" 9999.0 | \n",
" 95.1 | \n",
" 113.0 | \n",
" 3.77 | \n",
" 1.513 | \n",
" 16.9 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 5.0 | \n",
" 2.0 | \n",
"
\n",
" \n",
" 28 | \n",
" Ford Pantera L | \n",
" 15.8 | \n",
" 8.0 | \n",
" 351.0 | \n",
" 9999.0 | \n",
" 4.22 | \n",
" 3.170 | \n",
" 14.5 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 5.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
" 29 | \n",
" Ferrari Dino | \n",
" 19.7 | \n",
" 6.0 | \n",
" 145.0 | \n",
" 175.0 | \n",
" 3.62 | \n",
" 2.770 | \n",
" 9999.0 | \n",
" 0.0 | \n",
" 9999.0 | \n",
" 5.0 | \n",
" 6.0 | \n",
"
\n",
" \n",
" 30 | \n",
" Maserati Bora | \n",
" 9999.0 | \n",
" 8.0 | \n",
" 301.0 | \n",
" 335.0 | \n",
" 3.54 | \n",
" 3.570 | \n",
" 14.6 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 9999.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
" 31 | \n",
" Volvo 142E | \n",
" 21.4 | \n",
" 4.0 | \n",
" 121.0 | \n",
" 109.0 | \n",
" 4.11 | \n",
" 2.780 | \n",
" 18.6 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 4.0 | \n",
" 2.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" model mpg cyl disp hp drat wt qsec vs \\\n",
"27 Lotus Europa 9999.0 9999.0 95.1 113.0 3.77 1.513 16.9 1.0 \n",
"28 Ford Pantera L 15.8 8.0 351.0 9999.0 4.22 3.170 14.5 0.0 \n",
"29 Ferrari Dino 19.7 6.0 145.0 175.0 3.62 2.770 9999.0 0.0 \n",
"30 Maserati Bora 9999.0 8.0 301.0 335.0 3.54 3.570 14.6 0.0 \n",
"31 Volvo 142E 21.4 4.0 121.0 109.0 4.11 2.780 18.6 1.0 \n",
"\n",
" am gear carb \n",
"27 1.0 5.0 2.0 \n",
"28 1.0 5.0 4.0 \n",
"29 9999.0 5.0 6.0 \n",
"30 1.0 9999.0 8.0 \n",
"31 1.0 4.0 2.0 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Demonstrate fillna with a constant\n",
"display(nan_cars.fillna(9999).tail())"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "274c2736-25b1-45d3-9f8f-5c01b58d423e",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/mw/xt4ddf0j2n3dr4qcr__qhqlr0000gn/T/ipykernel_37590/463963669.py:2: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction.\n",
" mean_scores = nan_cars.mean(axis=0) # altrenatively use `axis='rows'`\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" model | \n",
" mpg | \n",
" cyl | \n",
" disp | \n",
" hp | \n",
" drat | \n",
" wt | \n",
" qsec | \n",
" vs | \n",
" am | \n",
" gear | \n",
" carb | \n",
"
\n",
" \n",
" \n",
" \n",
" 27 | \n",
" Lotus Europa | \n",
" 19.678571 | \n",
" 6.148148 | \n",
" 95.1 | \n",
" 113.000000 | \n",
" 3.77 | \n",
" 1.513 | \n",
" 16.90 | \n",
" 1.0 | \n",
" 1.00 | \n",
" 5.000000 | \n",
" 2.0 | \n",
"
\n",
" \n",
" 28 | \n",
" Ford Pantera L | \n",
" 15.800000 | \n",
" 8.000000 | \n",
" 351.0 | \n",
" 141.896552 | \n",
" 4.22 | \n",
" 3.170 | \n",
" 14.50 | \n",
" 0.0 | \n",
" 1.00 | \n",
" 5.000000 | \n",
" 4.0 | \n",
"
\n",
" \n",
" 29 | \n",
" Ferrari Dino | \n",
" 19.700000 | \n",
" 6.000000 | \n",
" 145.0 | \n",
" 175.000000 | \n",
" 3.62 | \n",
" 2.770 | \n",
" 17.74 | \n",
" 0.0 | \n",
" 0.44 | \n",
" 5.000000 | \n",
" 6.0 | \n",
"
\n",
" \n",
" 30 | \n",
" Maserati Bora | \n",
" 19.678571 | \n",
" 8.000000 | \n",
" 301.0 | \n",
" 335.000000 | \n",
" 3.54 | \n",
" 3.570 | \n",
" 14.60 | \n",
" 0.0 | \n",
" 1.00 | \n",
" 3.607143 | \n",
" 8.0 | \n",
"
\n",
" \n",
" 31 | \n",
" Volvo 142E | \n",
" 21.400000 | \n",
" 4.000000 | \n",
" 121.0 | \n",
" 109.000000 | \n",
" 4.11 | \n",
" 2.780 | \n",
" 18.60 | \n",
" 1.0 | \n",
" 1.00 | \n",
" 4.000000 | \n",
" 2.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" model mpg cyl disp hp drat wt \\\n",
"27 Lotus Europa 19.678571 6.148148 95.1 113.000000 3.77 1.513 \n",
"28 Ford Pantera L 15.800000 8.000000 351.0 141.896552 4.22 3.170 \n",
"29 Ferrari Dino 19.700000 6.000000 145.0 175.000000 3.62 2.770 \n",
"30 Maserati Bora 19.678571 8.000000 301.0 335.000000 3.54 3.570 \n",
"31 Volvo 142E 21.400000 4.000000 121.0 109.000000 4.11 2.780 \n",
"\n",
" qsec vs am gear carb \n",
"27 16.90 1.0 1.00 5.000000 2.0 \n",
"28 14.50 0.0 1.00 5.000000 4.0 \n",
"29 17.74 0.0 0.44 5.000000 6.0 \n",
"30 14.60 0.0 1.00 3.607143 8.0 \n",
"31 18.60 1.0 1.00 4.000000 2.0 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"mpg 19.678571\n",
"cyl 6.148148\n",
"disp 230.721875\n",
"hp 141.896552\n",
"drat 3.593226\n",
"wt 3.189536\n",
"qsec 17.740000\n",
"vs 0.413793\n",
"am 0.440000\n",
"gear 3.607143\n",
"carb 2.807692\n",
"dtype: float64"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Compute means PER VARIABLE (across axis 0)\n",
"mean_scores = nan_cars.mean(axis=0) # altrenatively use `axis='rows'`\n",
"\n",
"# Now fill\n",
"fixed = nan_cars.fillna(mean_scores)\n",
"\n",
"display(fixed.tail())\n",
"display(mean_scores.T)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}