{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "7c064e80-f169-469e-ab13-cd508234e6ee",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    },
    "tags": []
   },
   "source": [
    "## Accessing data in Pandas\n",
    "NumPy used an efficent indexing syntax, similar to lists, to extract values. pandas' greater complexity means it has more options to access data, and these require a bit of practice to master. There are three ways to get at data in a DataFrame:\n",
    "\n",
    "* Standard notation - acts as if the DataFrame was a dictionary with lists in it.\n",
    "* `.iloc()` method - the traditional NumPy indexing approach, accessing rows and columns with numbers and colons.\n",
    "* `.loc()` method - allows you to use labels as indexes - no numbers required!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "0b047875-e24f-46d2-abda-f9eb219e3b0d",
   "metadata": {
    "slideshow": {
     "slide_type": "skip"
    },
    "tags": [
     "remove-cell"
    ]
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "mtcars = pd.read_csv('https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "534fb243-f5ca-4578-963c-963d70b30c8d",
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "27    30.4\n",
       "28    15.8\n",
       "29    19.7\n",
       "30    15.0\n",
       "31    21.4\n",
       "Name: mpg, dtype: float64"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Demonstrate standard notation for accessing Pandas. How to look at the 'mpg' column? Just the last 5 rows!\n",
    "display(mtcars['mpg'].tail())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "67f81f54-b8e5-4368-bf76-a3cf208f4423",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>mpg</th>\n",
       "      <th>hp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>21.0</td>\n",
       "      <td>110</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>21.0</td>\n",
       "      <td>110</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>22.8</td>\n",
       "      <td>93</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>21.4</td>\n",
       "      <td>110</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>18.7</td>\n",
       "      <td>175</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    mpg   hp\n",
       "0  21.0  110\n",
       "1  21.0  110\n",
       "2  22.8   93\n",
       "3  21.4  110\n",
       "4  18.7  175"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# You can access two or more columns with a list of column names!\n",
    "display(mtcars[['mpg', 'hp']].head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "78dc9c24-5104-4a65-8367-56acb7b1d6c3",
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>mpg</th>\n",
       "      <th>hp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>521.0</td>\n",
       "      <td>610</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>521.0</td>\n",
       "      <td>610</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>522.8</td>\n",
       "      <td>593</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>521.4</td>\n",
       "      <td>610</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>518.7</td>\n",
       "      <td>675</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     mpg   hp\n",
       "0  521.0  610\n",
       "1  521.0  610\n",
       "2  522.8  593\n",
       "3  521.4  610\n",
       "4  518.7  675"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Manipulation works like NumPy\n",
    "display(mtcars[['mpg', 'hp']].head() + 500)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6d456004-8438-446f-bd1f-8643ba4ad4d3",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "source": [
    "### Accessing with `.iloc`\n",
    "Accessing data in a DataFrame with `iloc` is as simple as knowing what rows and columns you want, and passing them to the `iloc` method - this is slightly unusual, as the method is an indexer - so you call it with square brackets, and not parentheses."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "34a04f98-dee4-46ae-a8e6-a9af57c93bac",
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>model</th>\n",
       "      <th>mpg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Mazda RX4</td>\n",
       "      <td>21.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Mazda RX4 Wag</td>\n",
       "      <td>21.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Datsun 710</td>\n",
       "      <td>22.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Hornet 4 Drive</td>\n",
       "      <td>21.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Hornet Sportabout</td>\n",
       "      <td>18.7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               model   mpg\n",
       "0          Mazda RX4  21.0\n",
       "1      Mazda RX4 Wag  21.0\n",
       "2         Datsun 710  22.8\n",
       "3     Hornet 4 Drive  21.4\n",
       "4  Hornet Sportabout  18.7"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Get all rows and first two columns\n",
    "display(mtcars.iloc[:,:2].head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "e95e692d-9b86-499a-ab42-f9dd146c8ea4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>disp</th>\n",
       "      <th>hp</th>\n",
       "      <th>qsec</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>160.0</td>\n",
       "      <td>110</td>\n",
       "      <td>16.46</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>160.0</td>\n",
       "      <td>110</td>\n",
       "      <td>17.02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>108.0</td>\n",
       "      <td>93</td>\n",
       "      <td>18.61</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    disp   hp   qsec\n",
       "0  160.0  110  16.46\n",
       "1  160.0  110  17.02\n",
       "2  108.0   93  18.61"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Just like in NumPy, various types of subsetting is allowed\n",
    "display(mtcars.iloc[[0, 1, 2], [3, 4, 7]])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "62bf8280-fb87-43ee-b177-6d5a009fe966",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "source": [
    "### Accessing with `.loc`\n",
    "`.loc` exists to provide *label* based indexing. So, rather than specifying numbers, you specify labels, but using the **[row, column]** approach we are familiar with."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "d0607bf1-3be7-4b6c-8d41-4f3297c3d2a6",
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>model</th>\n",
       "      <th>mpg</th>\n",
       "      <th>cyl</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Mazda RX4</td>\n",
       "      <td>21.0</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Mazda RX4 Wag</td>\n",
       "      <td>21.0</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Datsun 710</td>\n",
       "      <td>22.8</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Hornet 4 Drive</td>\n",
       "      <td>21.4</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Hornet Sportabout</td>\n",
       "      <td>18.7</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               model   mpg  cyl\n",
       "0          Mazda RX4  21.0    6\n",
       "1      Mazda RX4 Wag  21.0    6\n",
       "2         Datsun 710  22.8    4\n",
       "3     Hornet 4 Drive  21.4    6\n",
       "4  Hornet Sportabout  18.7    8"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>model</th>\n",
       "      <th>mpg</th>\n",
       "      <th>cyl</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Mazda RX4 Wag</td>\n",
       "      <td>21.0</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Datsun 710</td>\n",
       "      <td>22.8</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Hornet 4 Drive</td>\n",
       "      <td>21.4</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            model   mpg  cyl\n",
       "1   Mazda RX4 Wag  21.0    6\n",
       "2      Datsun 710  22.8    4\n",
       "3  Hornet 4 Drive  21.4    6"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Get mtcars first two columns - notice how the stride is label based but ALSO INCLUSIVE - the final element IS included!!\n",
    "display(mtcars.loc[:, 'model':'cyl'].head())\n",
    "\n",
    "# What if you want specific rows? Easy with iloc, impossible with standard notation - see here the indexes are inclusive too, and they are 'numbers'\n",
    "display(mtcars.loc[1:3, 'model':'cyl'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "4c769875-8b3e-4d04-a4a7-1e9559dc6056",
   "metadata": {
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "outputs": [],
   "source": [
    "# But bear in mind - the numbers in the index are only integers because THE INDEX CONTAINS INTEGERS. We can reassign them -\n",
    "mtcars.index = ['row_' + str(ind) for ind in mtcars.index]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "51d89891-bd2f-47bd-9f5a-0558b2e21a8f",
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>hp</th>\n",
       "      <th>drat</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>row_4</th>\n",
       "      <td>175</td>\n",
       "      <td>3.15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>row_10</th>\n",
       "      <td>123</td>\n",
       "      <td>3.92</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         hp  drat\n",
       "row_4   175  3.15\n",
       "row_10  123  3.92"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(mtcars.loc[['row_4', 'row_10'], ['hp', 'drat']])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}