{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "b9df5df0-154a-4802-80f8-101566dc579d",
   "metadata": {
    "tags": [
     "remove-cell"
    ]
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "mtcars = pd.read_csv('https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f7ef09fe-3841-46ea-900e-ef82a5663a8a",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "source": [
    "## Boolean operations with Pandas\n",
    "Like NumPy, you can subset and select values from a DataFrame using Boolean values - they work just the same as in NumPy by comparing values in your DataFrame to your chosen quantity."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "34c59c95-2db6-4ae1-9b0a-97d64d6a8c44",
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>model</th>\n",
       "      <th>mpg</th>\n",
       "      <th>cyl</th>\n",
       "      <th>disp</th>\n",
       "      <th>hp</th>\n",
       "      <th>drat</th>\n",
       "      <th>wt</th>\n",
       "      <th>qsec</th>\n",
       "      <th>vs</th>\n",
       "      <th>am</th>\n",
       "      <th>gear</th>\n",
       "      <th>carb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Mazda RX4</td>\n",
       "      <td>21.0</td>\n",
       "      <td>6</td>\n",
       "      <td>160.0</td>\n",
       "      <td>110</td>\n",
       "      <td>3.90</td>\n",
       "      <td>2.620</td>\n",
       "      <td>16.46</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Mazda RX4 Wag</td>\n",
       "      <td>21.0</td>\n",
       "      <td>6</td>\n",
       "      <td>160.0</td>\n",
       "      <td>110</td>\n",
       "      <td>3.90</td>\n",
       "      <td>2.875</td>\n",
       "      <td>17.02</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Datsun 710</td>\n",
       "      <td>22.8</td>\n",
       "      <td>4</td>\n",
       "      <td>108.0</td>\n",
       "      <td>93</td>\n",
       "      <td>3.85</td>\n",
       "      <td>2.320</td>\n",
       "      <td>18.61</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>Fiat 128</td>\n",
       "      <td>32.4</td>\n",
       "      <td>4</td>\n",
       "      <td>78.7</td>\n",
       "      <td>66</td>\n",
       "      <td>4.08</td>\n",
       "      <td>2.200</td>\n",
       "      <td>19.47</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>Honda Civic</td>\n",
       "      <td>30.4</td>\n",
       "      <td>4</td>\n",
       "      <td>75.7</td>\n",
       "      <td>52</td>\n",
       "      <td>4.93</td>\n",
       "      <td>1.615</td>\n",
       "      <td>18.52</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            model   mpg  cyl   disp   hp  drat     wt   qsec  vs  am  gear  \\\n",
       "0       Mazda RX4  21.0    6  160.0  110  3.90  2.620  16.46   0   1     4   \n",
       "1   Mazda RX4 Wag  21.0    6  160.0  110  3.90  2.875  17.02   0   1     4   \n",
       "2      Datsun 710  22.8    4  108.0   93  3.85  2.320  18.61   1   1     4   \n",
       "17       Fiat 128  32.4    4   78.7   66  4.08  2.200  19.47   1   1     4   \n",
       "18    Honda Civic  30.4    4   75.7   52  4.93  1.615  18.52   1   1     4   \n",
       "\n",
       "    carb  \n",
       "0      4  \n",
       "1      4  \n",
       "2      1  \n",
       "17     1  \n",
       "18     2  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>model</th>\n",
       "      <th>mpg</th>\n",
       "      <th>cyl</th>\n",
       "      <th>disp</th>\n",
       "      <th>hp</th>\n",
       "      <th>drat</th>\n",
       "      <th>wt</th>\n",
       "      <th>qsec</th>\n",
       "      <th>vs</th>\n",
       "      <th>am</th>\n",
       "      <th>gear</th>\n",
       "      <th>carb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Hornet 4 Drive</td>\n",
       "      <td>21.4</td>\n",
       "      <td>6</td>\n",
       "      <td>258.0</td>\n",
       "      <td>110</td>\n",
       "      <td>3.08</td>\n",
       "      <td>3.215</td>\n",
       "      <td>19.44</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Hornet Sportabout</td>\n",
       "      <td>18.7</td>\n",
       "      <td>8</td>\n",
       "      <td>360.0</td>\n",
       "      <td>175</td>\n",
       "      <td>3.15</td>\n",
       "      <td>3.440</td>\n",
       "      <td>17.02</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Valiant</td>\n",
       "      <td>18.1</td>\n",
       "      <td>6</td>\n",
       "      <td>225.0</td>\n",
       "      <td>105</td>\n",
       "      <td>2.76</td>\n",
       "      <td>3.460</td>\n",
       "      <td>20.22</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Duster 360</td>\n",
       "      <td>14.3</td>\n",
       "      <td>8</td>\n",
       "      <td>360.0</td>\n",
       "      <td>245</td>\n",
       "      <td>3.21</td>\n",
       "      <td>3.570</td>\n",
       "      <td>15.84</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Merc 240D</td>\n",
       "      <td>24.4</td>\n",
       "      <td>4</td>\n",
       "      <td>146.7</td>\n",
       "      <td>62</td>\n",
       "      <td>3.69</td>\n",
       "      <td>3.190</td>\n",
       "      <td>20.00</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               model   mpg  cyl   disp   hp  drat     wt   qsec  vs  am  gear  \\\n",
       "3     Hornet 4 Drive  21.4    6  258.0  110  3.08  3.215  19.44   1   0     3   \n",
       "4  Hornet Sportabout  18.7    8  360.0  175  3.15  3.440  17.02   0   0     3   \n",
       "5            Valiant  18.1    6  225.0  105  2.76  3.460  20.22   1   0     3   \n",
       "6         Duster 360  14.3    8  360.0  245  3.21  3.570  15.84   0   0     3   \n",
       "7          Merc 240D  24.4    4  146.7   62  3.69  3.190  20.00   1   0     4   \n",
       "\n",
       "   carb  \n",
       "3     1  \n",
       "4     2  \n",
       "5     1  \n",
       "6     4  \n",
       "7     2  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Demonstrate Boolean subsets. Split the data into two - 'am' specifies whether a car is automatic or manual\n",
    "auto = mtcars['am'] == 1\n",
    "\n",
    "display(mtcars[auto].head())\n",
    "display(mtcars[~auto].head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "1787c4a6-ecd2-47b7-b2cf-fb519ffc80b3",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>model</th>\n",
       "      <th>mpg</th>\n",
       "      <th>cyl</th>\n",
       "      <th>disp</th>\n",
       "      <th>hp</th>\n",
       "      <th>drat</th>\n",
       "      <th>wt</th>\n",
       "      <th>qsec</th>\n",
       "      <th>vs</th>\n",
       "      <th>am</th>\n",
       "      <th>gear</th>\n",
       "      <th>carb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>Ford Pantera L</td>\n",
       "      <td>15.8</td>\n",
       "      <td>8</td>\n",
       "      <td>351.0</td>\n",
       "      <td>264</td>\n",
       "      <td>4.22</td>\n",
       "      <td>3.17</td>\n",
       "      <td>14.5</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>Maserati Bora</td>\n",
       "      <td>15.0</td>\n",
       "      <td>8</td>\n",
       "      <td>301.0</td>\n",
       "      <td>335</td>\n",
       "      <td>3.54</td>\n",
       "      <td>3.57</td>\n",
       "      <td>14.6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             model   mpg  cyl   disp   hp  drat    wt  qsec  vs  am  gear  \\\n",
       "28  Ford Pantera L  15.8    8  351.0  264  4.22  3.17  14.5   0   1     5   \n",
       "30   Maserati Bora  15.0    8  301.0  335  3.54  3.57  14.6   0   1     5   \n",
       "\n",
       "    carb  \n",
       "28     4  \n",
       "30     8  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Or MPG between certain values, only for cars with 5 gears!\n",
    "miles_per = ((mtcars['mpg'] < 17.0) | (mtcars['mpg'] > 32.0)) & (mtcars['gear'] == 5)\n",
    "\n",
    "display(mtcars[miles_per])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b52bca18-da44-4255-aa96-39f7eecdb7dd",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "source": [
    "### Cleaning data in Pandas\n",
    "As with NumPy arrays, there are special ways to remove data with NaN's. Pandas has its own `.isnull()` method which can be chained with other methods to help clean up a dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a799da32-edaa-47df-9b6d-5c4f40fd77c6",
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>model</th>\n",
       "      <th>mpg</th>\n",
       "      <th>cyl</th>\n",
       "      <th>disp</th>\n",
       "      <th>hp</th>\n",
       "      <th>drat</th>\n",
       "      <th>wt</th>\n",
       "      <th>qsec</th>\n",
       "      <th>vs</th>\n",
       "      <th>am</th>\n",
       "      <th>gear</th>\n",
       "      <th>carb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Mazda RX4</td>\n",
       "      <td>21.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>160.0</td>\n",
       "      <td>110.0</td>\n",
       "      <td>3.90</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Mazda RX4 Wag</td>\n",
       "      <td>21.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>160.0</td>\n",
       "      <td>110.0</td>\n",
       "      <td>3.90</td>\n",
       "      <td>2.875</td>\n",
       "      <td>17.02</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>22.8</td>\n",
       "      <td>4.0</td>\n",
       "      <td>108.0</td>\n",
       "      <td>93.0</td>\n",
       "      <td>3.85</td>\n",
       "      <td>2.320</td>\n",
       "      <td>18.61</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Hornet 4 Drive</td>\n",
       "      <td>21.4</td>\n",
       "      <td>6.0</td>\n",
       "      <td>258.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.08</td>\n",
       "      <td>3.215</td>\n",
       "      <td>19.44</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Hornet Sportabout</td>\n",
       "      <td>18.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>360.0</td>\n",
       "      <td>175.0</td>\n",
       "      <td>3.15</td>\n",
       "      <td>3.440</td>\n",
       "      <td>17.02</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               model   mpg  cyl   disp     hp  drat     wt   qsec   vs   am  \\\n",
       "0          Mazda RX4  21.0  6.0  160.0  110.0  3.90    NaN    NaN  0.0  1.0   \n",
       "1      Mazda RX4 Wag  21.0  6.0  160.0  110.0  3.90  2.875  17.02  0.0  1.0   \n",
       "2                NaN  22.8  4.0  108.0   93.0  3.85  2.320  18.61  NaN  1.0   \n",
       "3     Hornet 4 Drive  21.4  6.0  258.0    NaN  3.08  3.215  19.44  1.0  NaN   \n",
       "4  Hornet Sportabout  18.7  NaN  360.0  175.0  3.15  3.440  17.02  0.0  0.0   \n",
       "\n",
       "   gear  carb  \n",
       "0   4.0   4.0  \n",
       "1   4.0   NaN  \n",
       "2   4.0   1.0  \n",
       "3   3.0   1.0  \n",
       "4   3.0   NaN  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# First add NaN to rows\n",
    "inds = np.random.choice([True, False], p=[.1, .9], size=mtcars.shape)\n",
    "\n",
    "# Copy our original array so we don't overwrite it!\n",
    "nan_cars = mtcars.copy()\n",
    "\n",
    "# Add NaN\n",
    "nan_cars[inds] = np.NaN\n",
    "\n",
    "display(nan_cars.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "e7bc45ba-6c7a-4c69-899b-8d7557e58586",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>model</th>\n",
       "      <th>mpg</th>\n",
       "      <th>cyl</th>\n",
       "      <th>disp</th>\n",
       "      <th>hp</th>\n",
       "      <th>drat</th>\n",
       "      <th>wt</th>\n",
       "      <th>qsec</th>\n",
       "      <th>vs</th>\n",
       "      <th>am</th>\n",
       "      <th>gear</th>\n",
       "      <th>carb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Duster 360</td>\n",
       "      <td>14.3</td>\n",
       "      <td>8.0</td>\n",
       "      <td>360.0</td>\n",
       "      <td>245.0</td>\n",
       "      <td>3.21</td>\n",
       "      <td>3.57</td>\n",
       "      <td>15.84</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>Merc 450SLC</td>\n",
       "      <td>15.2</td>\n",
       "      <td>8.0</td>\n",
       "      <td>275.8</td>\n",
       "      <td>180.0</td>\n",
       "      <td>3.07</td>\n",
       "      <td>3.78</td>\n",
       "      <td>18.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>Fiat 128</td>\n",
       "      <td>32.4</td>\n",
       "      <td>4.0</td>\n",
       "      <td>78.7</td>\n",
       "      <td>66.0</td>\n",
       "      <td>4.08</td>\n",
       "      <td>2.20</td>\n",
       "      <td>19.47</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>Dodge Challenger</td>\n",
       "      <td>15.5</td>\n",
       "      <td>8.0</td>\n",
       "      <td>318.0</td>\n",
       "      <td>150.0</td>\n",
       "      <td>2.76</td>\n",
       "      <td>3.52</td>\n",
       "      <td>16.87</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>Volvo 142E</td>\n",
       "      <td>21.4</td>\n",
       "      <td>4.0</td>\n",
       "      <td>121.0</td>\n",
       "      <td>109.0</td>\n",
       "      <td>4.11</td>\n",
       "      <td>2.78</td>\n",
       "      <td>18.60</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               model   mpg  cyl   disp     hp  drat    wt   qsec   vs   am  \\\n",
       "6         Duster 360  14.3  8.0  360.0  245.0  3.21  3.57  15.84  0.0  0.0   \n",
       "13       Merc 450SLC  15.2  8.0  275.8  180.0  3.07  3.78  18.00  0.0  0.0   \n",
       "17          Fiat 128  32.4  4.0   78.7   66.0  4.08  2.20  19.47  1.0  1.0   \n",
       "21  Dodge Challenger  15.5  8.0  318.0  150.0  2.76  3.52  16.87  0.0  0.0   \n",
       "31        Volvo 142E  21.4  4.0  121.0  109.0  4.11  2.78  18.60  1.0  1.0   \n",
       "\n",
       "    gear  carb  \n",
       "6    3.0   4.0  \n",
       "13   3.0   3.0  \n",
       "17   4.0   1.0  \n",
       "21   3.0   2.0  \n",
       "31   4.0   2.0  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Use .isnull() to find problem participants with .any()\n",
    "problems = nan_cars.isnull().any(axis=1)\n",
    "\n",
    "# Subset!\n",
    "display(nan_cars[~problems])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d31cf973-a55e-4b79-949c-274077732c71",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "source": [
    "### Imputing values and summary statistics\n",
    "Dropping data is great, but most of the time you don't want to do that. Psychological data can be hard to come by and you want to make the most of it. There is a solution - replacing missing values with some educated guesses. An example of this might be the mean or median.\n",
    "\n",
    "In NumPy, computing summary statistics across vectors with missing data (`np.NaN`) will always result in `nan` - the missing value will cause problems. But in Pandas, calling `.mean()`, `.median()`, or `.std()` will *ignore* missing values and compute the statistic on the valid data. \n",
    "\n",
    "And once you have those, you can replace the missing values with them. This is made very simple with Pandas, because of the `.fillna()` method. This method accepts either a constant value, or a vector of scores that match the axis of interest (e.g., same number of rows or columns) to fill the gaps with."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "ca0c89ef-5c9e-4e5d-846d-30f4295186a8",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>model</th>\n",
       "      <th>mpg</th>\n",
       "      <th>cyl</th>\n",
       "      <th>disp</th>\n",
       "      <th>hp</th>\n",
       "      <th>drat</th>\n",
       "      <th>wt</th>\n",
       "      <th>qsec</th>\n",
       "      <th>vs</th>\n",
       "      <th>am</th>\n",
       "      <th>gear</th>\n",
       "      <th>carb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>Lotus Europa</td>\n",
       "      <td>9999.0</td>\n",
       "      <td>9999.0</td>\n",
       "      <td>95.1</td>\n",
       "      <td>113.0</td>\n",
       "      <td>3.77</td>\n",
       "      <td>1.513</td>\n",
       "      <td>16.9</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>Ford Pantera L</td>\n",
       "      <td>15.8</td>\n",
       "      <td>8.0</td>\n",
       "      <td>351.0</td>\n",
       "      <td>9999.0</td>\n",
       "      <td>4.22</td>\n",
       "      <td>3.170</td>\n",
       "      <td>14.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>Ferrari Dino</td>\n",
       "      <td>19.7</td>\n",
       "      <td>6.0</td>\n",
       "      <td>145.0</td>\n",
       "      <td>175.0</td>\n",
       "      <td>3.62</td>\n",
       "      <td>2.770</td>\n",
       "      <td>9999.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9999.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>Maserati Bora</td>\n",
       "      <td>9999.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>335.0</td>\n",
       "      <td>3.54</td>\n",
       "      <td>3.570</td>\n",
       "      <td>14.6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>9999.0</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>Volvo 142E</td>\n",
       "      <td>21.4</td>\n",
       "      <td>4.0</td>\n",
       "      <td>121.0</td>\n",
       "      <td>109.0</td>\n",
       "      <td>4.11</td>\n",
       "      <td>2.780</td>\n",
       "      <td>18.6</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             model     mpg     cyl   disp      hp  drat     wt    qsec   vs  \\\n",
       "27    Lotus Europa  9999.0  9999.0   95.1   113.0  3.77  1.513    16.9  1.0   \n",
       "28  Ford Pantera L    15.8     8.0  351.0  9999.0  4.22  3.170    14.5  0.0   \n",
       "29    Ferrari Dino    19.7     6.0  145.0   175.0  3.62  2.770  9999.0  0.0   \n",
       "30   Maserati Bora  9999.0     8.0  301.0   335.0  3.54  3.570    14.6  0.0   \n",
       "31      Volvo 142E    21.4     4.0  121.0   109.0  4.11  2.780    18.6  1.0   \n",
       "\n",
       "        am    gear  carb  \n",
       "27     1.0     5.0   2.0  \n",
       "28     1.0     5.0   4.0  \n",
       "29  9999.0     5.0   6.0  \n",
       "30     1.0  9999.0   8.0  \n",
       "31     1.0     4.0   2.0  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Demonstrate fillna with a constant\n",
    "display(nan_cars.fillna(9999).tail())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "274c2736-25b1-45d3-9f8f-5c01b58d423e",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/mw/xt4ddf0j2n3dr4qcr__qhqlr0000gn/T/ipykernel_37590/463963669.py:2: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.\n",
      "  mean_scores = nan_cars.mean(axis=0) # altrenatively use `axis='rows'`\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>model</th>\n",
       "      <th>mpg</th>\n",
       "      <th>cyl</th>\n",
       "      <th>disp</th>\n",
       "      <th>hp</th>\n",
       "      <th>drat</th>\n",
       "      <th>wt</th>\n",
       "      <th>qsec</th>\n",
       "      <th>vs</th>\n",
       "      <th>am</th>\n",
       "      <th>gear</th>\n",
       "      <th>carb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>Lotus Europa</td>\n",
       "      <td>19.678571</td>\n",
       "      <td>6.148148</td>\n",
       "      <td>95.1</td>\n",
       "      <td>113.000000</td>\n",
       "      <td>3.77</td>\n",
       "      <td>1.513</td>\n",
       "      <td>16.90</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>Ford Pantera L</td>\n",
       "      <td>15.800000</td>\n",
       "      <td>8.000000</td>\n",
       "      <td>351.0</td>\n",
       "      <td>141.896552</td>\n",
       "      <td>4.22</td>\n",
       "      <td>3.170</td>\n",
       "      <td>14.50</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>Ferrari Dino</td>\n",
       "      <td>19.700000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>145.0</td>\n",
       "      <td>175.000000</td>\n",
       "      <td>3.62</td>\n",
       "      <td>2.770</td>\n",
       "      <td>17.74</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.44</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>Maserati Bora</td>\n",
       "      <td>19.678571</td>\n",
       "      <td>8.000000</td>\n",
       "      <td>301.0</td>\n",
       "      <td>335.000000</td>\n",
       "      <td>3.54</td>\n",
       "      <td>3.570</td>\n",
       "      <td>14.60</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>3.607143</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>Volvo 142E</td>\n",
       "      <td>21.400000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>121.0</td>\n",
       "      <td>109.000000</td>\n",
       "      <td>4.11</td>\n",
       "      <td>2.780</td>\n",
       "      <td>18.60</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             model        mpg       cyl   disp          hp  drat     wt  \\\n",
       "27    Lotus Europa  19.678571  6.148148   95.1  113.000000  3.77  1.513   \n",
       "28  Ford Pantera L  15.800000  8.000000  351.0  141.896552  4.22  3.170   \n",
       "29    Ferrari Dino  19.700000  6.000000  145.0  175.000000  3.62  2.770   \n",
       "30   Maserati Bora  19.678571  8.000000  301.0  335.000000  3.54  3.570   \n",
       "31      Volvo 142E  21.400000  4.000000  121.0  109.000000  4.11  2.780   \n",
       "\n",
       "     qsec   vs    am      gear  carb  \n",
       "27  16.90  1.0  1.00  5.000000   2.0  \n",
       "28  14.50  0.0  1.00  5.000000   4.0  \n",
       "29  17.74  0.0  0.44  5.000000   6.0  \n",
       "30  14.60  0.0  1.00  3.607143   8.0  \n",
       "31  18.60  1.0  1.00  4.000000   2.0  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "mpg      19.678571\n",
       "cyl       6.148148\n",
       "disp    230.721875\n",
       "hp      141.896552\n",
       "drat      3.593226\n",
       "wt        3.189536\n",
       "qsec     17.740000\n",
       "vs        0.413793\n",
       "am        0.440000\n",
       "gear      3.607143\n",
       "carb      2.807692\n",
       "dtype: float64"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Compute means PER VARIABLE (across axis 0)\n",
    "mean_scores = nan_cars.mean(axis=0) # altrenatively use `axis='rows'`\n",
    "\n",
    "# Now fill\n",
    "fixed = nan_cars.fillna(mean_scores)\n",
    "\n",
    "display(fixed.tail())\n",
    "display(mean_scores.T)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}