Spaces:
Sleeping
Sleeping
Commit
·
c161b3b
1
Parent(s):
d217164
feat: data-pipelines
Browse filesimplemented data loading pipelines, data cleaning and basic EDA along with TFIDF vectorization
- data/MMR_DATA.csv +0 -0
- data/__init__.py +0 -0
- notebooks/EDA.ipynb +658 -0
- notebooks/TFIDF.ipynb +0 -0
- notebooks/__init__.py +0 -0
- notebooks/data_loading.ipynb +996 -0
- src/main.py +33 -0
- utilities/__init__.py +2 -0
- utilities/__pycache__/__init__.cpython-311.pyc +0 -0
- utilities/__pycache__/__init__.cpython-312.pyc +0 -0
- utilities/__pycache__/data_loader.cpython-311.pyc +0 -0
- utilities/__pycache__/data_loader.cpython-312.pyc +0 -0
- utilities/data_cleaner.py +27 -0
- utilities/data_loader.py +222 -0
- utils/__init__.py +0 -0
data/MMR_DATA.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/__init__.py
DELETED
|
File without changes
|
notebooks/EDA.ipynb
ADDED
|
@@ -0,0 +1,658 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"## __Exploratory Data Analysis__"
|
| 8 |
+
]
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"cell_type": "code",
|
| 12 |
+
"execution_count": 53,
|
| 13 |
+
"metadata": {},
|
| 14 |
+
"outputs": [],
|
| 15 |
+
"source": [
|
| 16 |
+
"## importing libraries\n",
|
| 17 |
+
"\n",
|
| 18 |
+
"import numpy as numpy\n",
|
| 19 |
+
"import pandas as pd\n",
|
| 20 |
+
"import matplotlib.pyplot as plt\n",
|
| 21 |
+
"import seaborn as sns\n",
|
| 22 |
+
"\n",
|
| 23 |
+
"import sys\n",
|
| 24 |
+
"import os\n",
|
| 25 |
+
"\n",
|
| 26 |
+
"import re\n",
|
| 27 |
+
"import nltk\n",
|
| 28 |
+
"from nltk.corpus import stopwords\n",
|
| 29 |
+
"from nltk.stem import PorterStemmer\n",
|
| 30 |
+
"from nltk.stem import WordNetLemmatizer"
|
| 31 |
+
]
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"cell_type": "code",
|
| 35 |
+
"execution_count": 6,
|
| 36 |
+
"metadata": {},
|
| 37 |
+
"outputs": [
|
| 38 |
+
{
|
| 39 |
+
"data": {
|
| 40 |
+
"text/html": [
|
| 41 |
+
"<div>\n",
|
| 42 |
+
"<style scoped>\n",
|
| 43 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 44 |
+
" vertical-align: middle;\n",
|
| 45 |
+
" }\n",
|
| 46 |
+
"\n",
|
| 47 |
+
" .dataframe tbody tr th {\n",
|
| 48 |
+
" vertical-align: top;\n",
|
| 49 |
+
" }\n",
|
| 50 |
+
"\n",
|
| 51 |
+
" .dataframe thead th {\n",
|
| 52 |
+
" text-align: right;\n",
|
| 53 |
+
" }\n",
|
| 54 |
+
"</style>\n",
|
| 55 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 56 |
+
" <thead>\n",
|
| 57 |
+
" <tr style=\"text-align: right;\">\n",
|
| 58 |
+
" <th></th>\n",
|
| 59 |
+
" <th>row</th>\n",
|
| 60 |
+
" <th>col</th>\n",
|
| 61 |
+
" <th>latitude</th>\n",
|
| 62 |
+
" <th>longitude</th>\n",
|
| 63 |
+
" <th>Map Data</th>\n",
|
| 64 |
+
" </tr>\n",
|
| 65 |
+
" </thead>\n",
|
| 66 |
+
" <tbody>\n",
|
| 67 |
+
" <tr>\n",
|
| 68 |
+
" <th>0</th>\n",
|
| 69 |
+
" <td>0</td>\n",
|
| 70 |
+
" <td>0</td>\n",
|
| 71 |
+
" <td>18.89433</td>\n",
|
| 72 |
+
" <td>72.784597</td>\n",
|
| 73 |
+
" <td>NaN</td>\n",
|
| 74 |
+
" </tr>\n",
|
| 75 |
+
" <tr>\n",
|
| 76 |
+
" <th>1</th>\n",
|
| 77 |
+
" <td>0</td>\n",
|
| 78 |
+
" <td>1</td>\n",
|
| 79 |
+
" <td>18.89433</td>\n",
|
| 80 |
+
" <td>72.794102</td>\n",
|
| 81 |
+
" <td>Prongs Reef is a Natural;</td>\n",
|
| 82 |
+
" </tr>\n",
|
| 83 |
+
" <tr>\n",
|
| 84 |
+
" <th>2</th>\n",
|
| 85 |
+
" <td>0</td>\n",
|
| 86 |
+
" <td>2</td>\n",
|
| 87 |
+
" <td>18.89433</td>\n",
|
| 88 |
+
" <td>72.803607</td>\n",
|
| 89 |
+
" <td>United Services Club Golf Course is a Leisure ...</td>\n",
|
| 90 |
+
" </tr>\n",
|
| 91 |
+
" <tr>\n",
|
| 92 |
+
" <th>3</th>\n",
|
| 93 |
+
" <td>0</td>\n",
|
| 94 |
+
" <td>3</td>\n",
|
| 95 |
+
" <td>18.89433</td>\n",
|
| 96 |
+
" <td>72.813112</td>\n",
|
| 97 |
+
" <td>Indian Meterological Department is a Commercia...</td>\n",
|
| 98 |
+
" </tr>\n",
|
| 99 |
+
" <tr>\n",
|
| 100 |
+
" <th>4</th>\n",
|
| 101 |
+
" <td>0</td>\n",
|
| 102 |
+
" <td>4</td>\n",
|
| 103 |
+
" <td>18.89433</td>\n",
|
| 104 |
+
" <td>72.822617</td>\n",
|
| 105 |
+
" <td>NaN</td>\n",
|
| 106 |
+
" </tr>\n",
|
| 107 |
+
" </tbody>\n",
|
| 108 |
+
"</table>\n",
|
| 109 |
+
"</div>"
|
| 110 |
+
],
|
| 111 |
+
"text/plain": [
|
| 112 |
+
" row col latitude longitude \\\n",
|
| 113 |
+
"0 0 0 18.89433 72.784597 \n",
|
| 114 |
+
"1 0 1 18.89433 72.794102 \n",
|
| 115 |
+
"2 0 2 18.89433 72.803607 \n",
|
| 116 |
+
"3 0 3 18.89433 72.813112 \n",
|
| 117 |
+
"4 0 4 18.89433 72.822617 \n",
|
| 118 |
+
"\n",
|
| 119 |
+
" Map Data \n",
|
| 120 |
+
"0 NaN \n",
|
| 121 |
+
"1 Prongs Reef is a Natural; \n",
|
| 122 |
+
"2 United Services Club Golf Course is a Leisure ... \n",
|
| 123 |
+
"3 Indian Meterological Department is a Commercia... \n",
|
| 124 |
+
"4 NaN "
|
| 125 |
+
]
|
| 126 |
+
},
|
| 127 |
+
"execution_count": 6,
|
| 128 |
+
"metadata": {},
|
| 129 |
+
"output_type": "execute_result"
|
| 130 |
+
}
|
| 131 |
+
],
|
| 132 |
+
"source": [
|
| 133 |
+
"data_folder = os.path.join(os.path.dirname(os.getcwd()), 'data')\n",
|
| 134 |
+
"data_file = os.path.join(data_folder, 'MMR_DATA.csv')\n",
|
| 135 |
+
"df = pd.read_csv(data_file)\n",
|
| 136 |
+
"df.head()"
|
| 137 |
+
]
|
| 138 |
+
},
|
| 139 |
+
{
|
| 140 |
+
"cell_type": "code",
|
| 141 |
+
"execution_count": 7,
|
| 142 |
+
"metadata": {},
|
| 143 |
+
"outputs": [
|
| 144 |
+
{
|
| 145 |
+
"data": {
|
| 146 |
+
"text/html": [
|
| 147 |
+
"<div>\n",
|
| 148 |
+
"<style scoped>\n",
|
| 149 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 150 |
+
" vertical-align: middle;\n",
|
| 151 |
+
" }\n",
|
| 152 |
+
"\n",
|
| 153 |
+
" .dataframe tbody tr th {\n",
|
| 154 |
+
" vertical-align: top;\n",
|
| 155 |
+
" }\n",
|
| 156 |
+
"\n",
|
| 157 |
+
" .dataframe thead th {\n",
|
| 158 |
+
" text-align: right;\n",
|
| 159 |
+
" }\n",
|
| 160 |
+
"</style>\n",
|
| 161 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 162 |
+
" <thead>\n",
|
| 163 |
+
" <tr style=\"text-align: right;\">\n",
|
| 164 |
+
" <th></th>\n",
|
| 165 |
+
" <th>row</th>\n",
|
| 166 |
+
" <th>col</th>\n",
|
| 167 |
+
" <th>latitude</th>\n",
|
| 168 |
+
" <th>longitude</th>\n",
|
| 169 |
+
" <th>Map Data</th>\n",
|
| 170 |
+
" </tr>\n",
|
| 171 |
+
" </thead>\n",
|
| 172 |
+
" <tbody>\n",
|
| 173 |
+
" <tr>\n",
|
| 174 |
+
" <th>0</th>\n",
|
| 175 |
+
" <td>0</td>\n",
|
| 176 |
+
" <td>0</td>\n",
|
| 177 |
+
" <td>18.89433</td>\n",
|
| 178 |
+
" <td>72.784597</td>\n",
|
| 179 |
+
" <td></td>\n",
|
| 180 |
+
" </tr>\n",
|
| 181 |
+
" <tr>\n",
|
| 182 |
+
" <th>1</th>\n",
|
| 183 |
+
" <td>0</td>\n",
|
| 184 |
+
" <td>1</td>\n",
|
| 185 |
+
" <td>18.89433</td>\n",
|
| 186 |
+
" <td>72.794102</td>\n",
|
| 187 |
+
" <td>Prongs Reef is a Natural;</td>\n",
|
| 188 |
+
" </tr>\n",
|
| 189 |
+
" <tr>\n",
|
| 190 |
+
" <th>2</th>\n",
|
| 191 |
+
" <td>0</td>\n",
|
| 192 |
+
" <td>2</td>\n",
|
| 193 |
+
" <td>18.89433</td>\n",
|
| 194 |
+
" <td>72.803607</td>\n",
|
| 195 |
+
" <td>United Services Club Golf Course is a Leisure ...</td>\n",
|
| 196 |
+
" </tr>\n",
|
| 197 |
+
" <tr>\n",
|
| 198 |
+
" <th>3</th>\n",
|
| 199 |
+
" <td>0</td>\n",
|
| 200 |
+
" <td>3</td>\n",
|
| 201 |
+
" <td>18.89433</td>\n",
|
| 202 |
+
" <td>72.813112</td>\n",
|
| 203 |
+
" <td>Indian Meterological Department is a Commercia...</td>\n",
|
| 204 |
+
" </tr>\n",
|
| 205 |
+
" <tr>\n",
|
| 206 |
+
" <th>4</th>\n",
|
| 207 |
+
" <td>0</td>\n",
|
| 208 |
+
" <td>4</td>\n",
|
| 209 |
+
" <td>18.89433</td>\n",
|
| 210 |
+
" <td>72.822617</td>\n",
|
| 211 |
+
" <td></td>\n",
|
| 212 |
+
" </tr>\n",
|
| 213 |
+
" </tbody>\n",
|
| 214 |
+
"</table>\n",
|
| 215 |
+
"</div>"
|
| 216 |
+
],
|
| 217 |
+
"text/plain": [
|
| 218 |
+
" row col latitude longitude \\\n",
|
| 219 |
+
"0 0 0 18.89433 72.784597 \n",
|
| 220 |
+
"1 0 1 18.89433 72.794102 \n",
|
| 221 |
+
"2 0 2 18.89433 72.803607 \n",
|
| 222 |
+
"3 0 3 18.89433 72.813112 \n",
|
| 223 |
+
"4 0 4 18.89433 72.822617 \n",
|
| 224 |
+
"\n",
|
| 225 |
+
" Map Data \n",
|
| 226 |
+
"0 \n",
|
| 227 |
+
"1 Prongs Reef is a Natural; \n",
|
| 228 |
+
"2 United Services Club Golf Course is a Leisure ... \n",
|
| 229 |
+
"3 Indian Meterological Department is a Commercia... \n",
|
| 230 |
+
"4 "
|
| 231 |
+
]
|
| 232 |
+
},
|
| 233 |
+
"execution_count": 7,
|
| 234 |
+
"metadata": {},
|
| 235 |
+
"output_type": "execute_result"
|
| 236 |
+
}
|
| 237 |
+
],
|
| 238 |
+
"source": [
|
| 239 |
+
"## filling the NaN values in the Map Data Column with empty string\n",
|
| 240 |
+
"\n",
|
| 241 |
+
"df['Map Data'] = df['Map Data'].fillna('')\n",
|
| 242 |
+
"df.head()"
|
| 243 |
+
]
|
| 244 |
+
},
|
| 245 |
+
{
|
| 246 |
+
"cell_type": "code",
|
| 247 |
+
"execution_count": 20,
|
| 248 |
+
"metadata": {},
|
| 249 |
+
"outputs": [
|
| 250 |
+
{
|
| 251 |
+
"data": {
|
| 252 |
+
"text/plain": [
|
| 253 |
+
"1225"
|
| 254 |
+
]
|
| 255 |
+
},
|
| 256 |
+
"execution_count": 20,
|
| 257 |
+
"metadata": {},
|
| 258 |
+
"output_type": "execute_result"
|
| 259 |
+
}
|
| 260 |
+
],
|
| 261 |
+
"source": [
|
| 262 |
+
"len(df)"
|
| 263 |
+
]
|
| 264 |
+
},
|
| 265 |
+
{
|
| 266 |
+
"cell_type": "code",
|
| 267 |
+
"execution_count": 42,
|
| 268 |
+
"metadata": {},
|
| 269 |
+
"outputs": [],
|
| 270 |
+
"source": [
|
| 271 |
+
"df_len_explore = df.copy()"
|
| 272 |
+
]
|
| 273 |
+
},
|
| 274 |
+
{
|
| 275 |
+
"cell_type": "code",
|
| 276 |
+
"execution_count": 43,
|
| 277 |
+
"metadata": {},
|
| 278 |
+
"outputs": [
|
| 279 |
+
{
|
| 280 |
+
"data": {
|
| 281 |
+
"text/html": [
|
| 282 |
+
"<div>\n",
|
| 283 |
+
"<style scoped>\n",
|
| 284 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 285 |
+
" vertical-align: middle;\n",
|
| 286 |
+
" }\n",
|
| 287 |
+
"\n",
|
| 288 |
+
" .dataframe tbody tr th {\n",
|
| 289 |
+
" vertical-align: top;\n",
|
| 290 |
+
" }\n",
|
| 291 |
+
"\n",
|
| 292 |
+
" .dataframe thead th {\n",
|
| 293 |
+
" text-align: right;\n",
|
| 294 |
+
" }\n",
|
| 295 |
+
"</style>\n",
|
| 296 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 297 |
+
" <thead>\n",
|
| 298 |
+
" <tr style=\"text-align: right;\">\n",
|
| 299 |
+
" <th></th>\n",
|
| 300 |
+
" <th>Map Data</th>\n",
|
| 301 |
+
" </tr>\n",
|
| 302 |
+
" </thead>\n",
|
| 303 |
+
" <tbody>\n",
|
| 304 |
+
" <tr>\n",
|
| 305 |
+
" <th>0</th>\n",
|
| 306 |
+
" <td></td>\n",
|
| 307 |
+
" </tr>\n",
|
| 308 |
+
" <tr>\n",
|
| 309 |
+
" <th>1</th>\n",
|
| 310 |
+
" <td>Prongs Reef is a Natural;</td>\n",
|
| 311 |
+
" </tr>\n",
|
| 312 |
+
" <tr>\n",
|
| 313 |
+
" <th>2</th>\n",
|
| 314 |
+
" <td>United Services Club Golf Course is a Leisure ...</td>\n",
|
| 315 |
+
" </tr>\n",
|
| 316 |
+
" <tr>\n",
|
| 317 |
+
" <th>3</th>\n",
|
| 318 |
+
" <td>Indian Meterological Department is a Commercia...</td>\n",
|
| 319 |
+
" </tr>\n",
|
| 320 |
+
" <tr>\n",
|
| 321 |
+
" <th>4</th>\n",
|
| 322 |
+
" <td></td>\n",
|
| 323 |
+
" </tr>\n",
|
| 324 |
+
" </tbody>\n",
|
| 325 |
+
"</table>\n",
|
| 326 |
+
"</div>"
|
| 327 |
+
],
|
| 328 |
+
"text/plain": [
|
| 329 |
+
" Map Data\n",
|
| 330 |
+
"0 \n",
|
| 331 |
+
"1 Prongs Reef is a Natural; \n",
|
| 332 |
+
"2 United Services Club Golf Course is a Leisure ...\n",
|
| 333 |
+
"3 Indian Meterological Department is a Commercia...\n",
|
| 334 |
+
"4 "
|
| 335 |
+
]
|
| 336 |
+
},
|
| 337 |
+
"execution_count": 43,
|
| 338 |
+
"metadata": {},
|
| 339 |
+
"output_type": "execute_result"
|
| 340 |
+
}
|
| 341 |
+
],
|
| 342 |
+
"source": [
|
| 343 |
+
"## dropping the columns that are not needed for the analysis\n",
|
| 344 |
+
"\n",
|
| 345 |
+
"df_len_explore = df_len_explore.drop(columns=['row', 'col', 'latitude', 'longitude'])\n",
|
| 346 |
+
"df_len_explore.head()"
|
| 347 |
+
]
|
| 348 |
+
},
|
| 349 |
+
{
|
| 350 |
+
"cell_type": "code",
|
| 351 |
+
"execution_count": 44,
|
| 352 |
+
"metadata": {},
|
| 353 |
+
"outputs": [
|
| 354 |
+
{
|
| 355 |
+
"data": {
|
| 356 |
+
"text/plain": [
|
| 357 |
+
"791"
|
| 358 |
+
]
|
| 359 |
+
},
|
| 360 |
+
"execution_count": 44,
|
| 361 |
+
"metadata": {},
|
| 362 |
+
"output_type": "execute_result"
|
| 363 |
+
}
|
| 364 |
+
],
|
| 365 |
+
"source": [
|
| 366 |
+
"## dropping the rows with 0 string length and string length > 5000\n",
|
| 367 |
+
"\n",
|
| 368 |
+
"df_len_explore = df_len_explore[df_len_explore['Map Data'].str.len() > 0]\n",
|
| 369 |
+
"df_len_explore = df_len_explore[df_len_explore['Map Data'].str.len() < 5000]\n",
|
| 370 |
+
"len(df_len_explore)"
|
| 371 |
+
]
|
| 372 |
+
},
|
| 373 |
+
{
|
| 374 |
+
"cell_type": "code",
|
| 375 |
+
"execution_count": 45,
|
| 376 |
+
"metadata": {},
|
| 377 |
+
"outputs": [
|
| 378 |
+
{
|
| 379 |
+
"name": "stdout",
|
| 380 |
+
"output_type": "stream",
|
| 381 |
+
"text": [
|
| 382 |
+
"Discarded rows: 434 / 1225\n"
|
| 383 |
+
]
|
| 384 |
+
}
|
| 385 |
+
],
|
| 386 |
+
"source": [
|
| 387 |
+
"print('Discarded rows: ', len(df) - len(df_len_explore), '/', len(df))"
|
| 388 |
+
]
|
| 389 |
+
},
|
| 390 |
+
{
|
| 391 |
+
"cell_type": "code",
|
| 392 |
+
"execution_count": 52,
|
| 393 |
+
"metadata": {},
|
| 394 |
+
"outputs": [
|
| 395 |
+
{
|
| 396 |
+
"name": "stdout",
|
| 397 |
+
"output_type": "stream",
|
| 398 |
+
"text": [
|
| 399 |
+
"Mean string length: 834.7509481668774\n",
|
| 400 |
+
"80th percentile string length: 1560.0\n"
|
| 401 |
+
]
|
| 402 |
+
}
|
| 403 |
+
],
|
| 404 |
+
"source": [
|
| 405 |
+
"## mean of the string length\n",
|
| 406 |
+
"\n",
|
| 407 |
+
"print('Mean string length: ', df_len_explore['Map Data'].str.len().mean())\n",
|
| 408 |
+
"print('80th percentile string length: ', df_len_explore['Map Data'].str.len().quantile(0.8))"
|
| 409 |
+
]
|
| 410 |
+
},
|
| 411 |
+
{
|
| 412 |
+
"cell_type": "code",
|
| 413 |
+
"execution_count": 51,
|
| 414 |
+
"metadata": {},
|
| 415 |
+
"outputs": [
|
| 416 |
+
{
|
| 417 |
+
"data": {
|
| 418 |
+
"text/plain": [
|
| 419 |
+
"<matplotlib.lines.Line2D at 0x1c5b27a9790>"
|
| 420 |
+
]
|
| 421 |
+
},
|
| 422 |
+
"execution_count": 51,
|
| 423 |
+
"metadata": {},
|
| 424 |
+
"output_type": "execute_result"
|
| 425 |
+
},
|
| 426 |
+
{
|
| 427 |
+
"data": {
|
| 428 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAj4AAAGdCAYAAAASUnlxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA2pklEQVR4nO3de3QU9f3/8ddCyJLAJiGEXAkXFUHlUrkYU1FBkKuIQr9fRayoHC5taJWoKK0FsZ6GSptaFUH7VdCjSEu/iBYK3yKXoDWi3ESUhktBzJIAGsgmATYJmd8f1P25JNyW2cwk83ycM2fZmc/OvudNJC9nPjvrMgzDEAAAgAM0sboAAACA+kLwAQAAjkHwAQAAjkHwAQAAjkHwAQAAjkHwAQAAjkHwAQAAjkHwAQAAjhFhdQF2UFNTo4MHD8rj8cjlclldDgAAuACGYaisrEypqalq0uTCzuUQfCQdPHhQ6enpVpcBAABC8PXXX6tt27YXNJbgI8nj8Ug63biYmBiLq3GoBQukigqpRQvpgQesrsZSC7YuUEVVhVo0a6EHrnV2LwDgXHw+n9LT0wO/xy+Ei+/qOt242NhYlZaWEnwAAGggQvn9zeRmAADgGAQfAADgGMzxgT0UFUmnTklNm0opKVZXY6misiKdMk6pqaupUjzO7gUAmI3gA3vo00fyeqW0NKmw0OpqLNXnT33kLfMqzZOmwmxn9wIAzMalLgAA4BgEHwAA4BgEHwAA4BgEHwAA4BgEHwAA4BgEHwAA4BgEHwAA4BgEHwAA4BgEHwAA4BjcuRn2sGaNVF0tRfAjuea+NaquqVZEE3oBAGbjX9Yw6/DEilrr9s8ebkElNte5s9UV2EbnBHoBAOHCpS4AAOAYBB8AAOAYXOqCPSxaJB0/LkVHS/fcY3U1llr0+SIdrzqu6GbRuqebs3sBAGYj+MAepk2TvF4pLc3xwWfa6mnylnmV5kkj+ACAybjUBQAAHIPgAwAAHIPgAwAAHIPgAwAAHIPgAwAAHIPgAwAAHIPgAwAAHIPgAwAAHIMbGMIekpODHx0suWVy0CMAwDwEH9jDpk1WV2AbmybSCwAIFy51AQAAx7A0+OTk5KhPnz7yeDxKTEzUHXfcoYKCgqAxJ0+eVFZWllq3bq2WLVtq9OjROnToUNCYAwcOaPjw4YqOjlZiYqIee+wxVVdX1+ehAACABsDS4JOXl6esrCx9/PHHWr16taqqqjRo0CBVVFQExkydOlV/+9vftGTJEuXl5engwYMaNWpUYPupU6c0fPhwVVZW6qOPPtLrr7+uhQsXasaMGVYcEgAAsDGXYRiG1UV858iRI0pMTFReXp5uuukmlZaWqk2bNlq0aJF+9KMfSZL+9a9/6aqrrlJ+fr6uv/56rVy5UrfddpsOHjyopKQkSdL8+fP1+OOP68iRI4qMjDzv+/p8PsXGxqq0tFQxMTGmHlOHJ1bUWrd/9nBT36NRmDRJKimR4uOll1+2uhpLTfrbJJWcLFF883i9PMLZvQCAcwnl97et5viUlpZKkuLj4yVJmzdvVlVVlQYOHBgY06VLF7Vr1075+fmSpPz8fHXr1i0QeiRp8ODB8vl8+uKLL+p8H7/fL5/PF7TAYitWSH/96+lHh1uxe4X++uVftWI3vQAAs9km+NTU1Ojhhx/WDTfcoK5du0qSiouLFRkZqbi4uKCxSUlJKi4uDoz5fuj5bvt32+qSk5Oj2NjYwJKenm7y0QAAADuyTfDJysrSjh07tHjx4rC/1/Tp01VaWhpYvv7667C/JwAAsJ4t7uMzZcoULV++XBs2bFDbtm0D65OTk1VZWaljx44FnfU5dOiQkv9zo7vk5GR98sknQfv77lNfyWe5GZ7b7Zbb7Tb5KAAAgN1ZesbHMAxNmTJF77zzjtauXauOHTsGbe/Vq5eaNWumNWvWBNYVFBTowIEDyszMlCRlZmbq888/1+HDhwNjVq9erZiYGF199dX1cyAAAKBBsPSMT1ZWlhYtWqR3331XHo8nMCcnNjZWUVFRio2N1fjx45Wdna34+HjFxMToZz/7mTIzM3X99ddLkgYNGqSrr75aP/7xj/Xss8+quLhYTz75pLKysjirAwAAglgafObNmydJ6tevX9D6BQsW6P7775ck/eEPf1CTJk00evRo+f1+DR48WC+99FJgbNOmTbV8+XL95Cc/UWZmplq0aKFx48bp6aefrq/DAAAADYSlwedCbiHUvHlzzZ07V3Pnzj3rmPbt2+vvf/+7maUBAIBGyDaf6gIAAAg3W3yqC9CYMdLRo1KrVlZXYrkxXcfo6MmjatWcXgCA2Qg+sIc5c6yuwDbmDKIXABAuXOoCAACOQfABAACOQfABAACOQfCBPXTpIsXEnH50uC4vdlFMToy6vEgvAMBsBB/YQ3m5VFZ2+tHhyivLVVZZpvJKegEAZiP4AAAAxyD4AAAAxyD4AAAAxyD4AAAAxyD4AAAAxyD4AAAAxyD4AAAAxyD4AAAAx+Db2WEP8+dLJ05IUVFWV2K5+bfN14mqE4pqRi8AwGwEH9jDbbdZXYFt3HYlvQCAcOFSFwAAcAyCDwAAcAwudcEeNm+WKiulyEipVy+rq7HU5oObVXmqUpFNI9Ur1dm9AACzEXxgDyNHSl6vlJYmFRZaXY2lRi4eKW+ZV2meNBVmO7sXAGA2LnUBAADHIPgAAADHIPgAAADHIPgAAADHIPgAAADHIPgAAADHIPgAAADHIPgAAADHIPgAAADHsDT4bNiwQSNGjFBqaqpcLpeWLVsWtN3lctW5zJkzJzCmQ4cOtbbPnj27no8El2znTqm09PSjw+3M2qnSJ0q1M4teAIDZLP3KioqKCvXo0UMPPvigRo0aVWt7UVFR0POVK1dq/PjxGj16dND6p59+WhMmTAg893g84SkY4cPfWYDHTS8AIFwsDT5Dhw7V0KFDz7o9OTk56Pm7776r/v3767LLLgta7/F4ao0FAAA4U4OZ43Po0CGtWLFC48ePr7Vt9uzZat26ta699lrNmTNH1dXV59yX3++Xz+cLWgAAQOPXYL6d/fXXX5fH46l1SeznP/+5evbsqfj4eH300UeaPn26ioqKlJube9Z95eTkaNasWeEuGRcjN1fy+aSYGCk72+pqLJWbnyuf36cYd4yyM53dCwAwm8swDMPqIqTTE5nfeecd3XHHHXVu79Kli2699Va98MIL59zPa6+9pkmTJqm8vFxut7vOMX6/X36/P/Dc5/MpPT1dpaWliomJCfkY6tLhiRW11u2fPdzU92gU2raVvF4pLU0qLLS6Gku1zW0rb5lXaZ40FWY7uxcAcC4+n0+xsbEX9fu7QZzx+eCDD1RQUKA///nP5x2bkZGh6upq7d+/X507d65zjNvtPmsoAgAAjVeDmOPz6quvqlevXurRo8d5x27btk1NmjRRYmJiPVQGAAAaEkvP+JSXl2vPnj2B5/v27dO2bdsUHx+vdu3aSTp9GmvJkiX6/e9/X+v1+fn52rhxo/r37y+Px6P8/HxNnTpV9957r1q1alVvxwEAABoGS4PPpk2b1L9//8Dz7P9Mah03bpwWLlwoSVq8eLEMw9CYMWNqvd7tdmvx4sV66qmn5Pf71bFjR02dOjWwHwAAgO+zNPj069dP55tbPXHiRE2cOLHObT179tTHH38cjtIAAEAj1CDm+AAAAJiB4AMAAByD4AMAAByjQdzHBw7Qs6eUni61aWN1JZbrmdJT6bHpahNNLwDAbAQf2MN771ldgW28N4ZeAEC4cKkLAAA4BsEHAAA4BsEHAAA4BnN8YA+33y4dOXJ6crPD5/vc/vbtOnL8iNpEt2G+DwCYjOADe9iyRfJ6pbQ0qyux3JaiLfKWeZXmoRcAYDYudQEAAMcg+AAAAMcg+AAAAMcg+AAAAMcg+AAAAMcg+AAAAMcg+AAAAMcg+AAAAMfgBoawh+xsyeeTYmKsrsRy2ZnZ8vl9inHTCwAwG8EH9pCdbXUFtpGdSS8AIFy41AUAAByD4AMAAByDS12wh7IyyTAkl0vyeKyuxlJl/jIZMuSSSx63s3sBAGbjjA/s4aqrpNjY048Od9XcqxQ7O1ZXzaUXAGA2gg8AAHAMgg8AAHAMgg8AAHAMgg8AAHAMgg8AAHAMgg8AAHAMS4PPhg0bNGLECKWmpsrlcmnZsmVB2++//365XK6gZciQIUFjSkpKNHbsWMXExCguLk7jx49XeXl5PR4FAABoKCwNPhUVFerRo4fmzp171jFDhgxRUVFRYHn77beDto8dO1ZffPGFVq9ereXLl2vDhg2aOHFiuEsHAAANkKV3bh46dKiGDh16zjFut1vJycl1btu5c6dWrVqlTz/9VL1795YkvfDCCxo2bJh+97vfKTU11fSaAQBAw2X7r6xYv369EhMT1apVK91yyy165pln1Lp1a0lSfn6+4uLiAqFHkgYOHKgmTZpo48aNuvPOO+vcp9/vl9/vDzz3+XzhPQic37vvSpWVUmSk1ZVY7t2731XlqUpFNqUXAGA2WwefIUOGaNSoUerYsaP27t2rX/ziFxo6dKjy8/PVtGlTFRcXKzExMeg1ERERio+PV3Fx8Vn3m5OTo1mzZoW7fFyMXr2srsA2eqXSCwAIF1sHn7vvvjvw527duql79+66/PLLtX79eg0YMCDk/U6fPl3Z2dmB5z6fT+np6ZdUKwAAsL8G9XH2yy67TAkJCdqzZ48kKTk5WYcPHw4aU11drZKSkrPOC5JOzxuKiYkJWgAAQONn6zM+ZyosLNS3336rlJQUSVJmZqaOHTumzZs3q9d/LpWsXbtWNTU1ysjIsLJUXKzly6UTJ6SoKOm226yuxlLLdy3XiaoTimoWpduudHYvAMBslgaf8vLywNkbSdq3b5+2bdum+Ph4xcfHa9asWRo9erSSk5O1d+9eTZs2TVdccYUGDx4sSbrqqqs0ZMgQTZgwQfPnz1dVVZWmTJmiu+++m090NTSTJ0ter5SWJhUWWl2NpSYvnyxvmVdpnjQVZju7FwBgNksvdW3atEnXXnutrr32WklSdna2rr32Ws2YMUNNmzbV9u3bdfvtt+vKK6/U+PHj1atXL33wwQdyu92Bfbz11lvq0qWLBgwYoGHDhqlv37565ZVXrDokAABgY5ae8enXr58Mwzjr9v/7v/877z7i4+O1aNEiM8sCAACNVIOa3AwAAHApCD4AAMAxCD4AAMAxCD4AAMAxCD4AAMAxCD4AAMAxCD6wh5YtJY/n9KPDtYxsKU+kRy0j6QUAmK1BfWUFGrF//cvqCmzjX1PoBQCEC2d8AACAYxB8AACAYxB8AACAYzDHB/bw2GPS0aNSq1bSnDlWV2Opx/7xmI6ePKpWzVtpziBn9wIAzEbwgT28/bbk9UppaY4PPm/veFveMq/SPGkEHwAwGZe6AACAYxB8AACAYxB8AACAYxB8AACAYxB8AACAYxB8AACAYxB8AACAYxB8AACAY3ADQ9jD8OFSSYkUH291JZYb3mm4Sk6WKL45vQAAsxF8YA8vv2x1Bbbx8gh6AQDhwqUuAADgGAQfAADgGCEFn3//+99m1wEAABB2IQWfK664Qv3799ebb76pkydPml0TnKh3b6lt29OPDtf7ld5qm9tWvV+hFwBgtpCCz5YtW9S9e3dlZ2crOTlZkyZN0ieffGJ2bXCS4mLJ6z396HDF5cXylnlVXE4vAMBsIQWfH/zgB/rjH/+ogwcP6rXXXlNRUZH69u2rrl27Kjc3V0eOHDG7TgAAgEt2SZObIyIiNGrUKC1ZskS//e1vtWfPHj366KNKT0/Xfffdp6KiIrPqBAAAuGSXFHw2bdqkn/70p0pJSVFubq4effRR7d27V6tXr9bBgwc1cuRIs+oEAAC4ZCEFn9zcXHXr1k0//OEPdfDgQb3xxhv66quv9Mwzz6hjx4668cYbtXDhQm3ZsuWc+9mwYYNGjBih1NRUuVwuLVu2LLCtqqpKjz/+uLp166YWLVooNTVV9913nw4ePBi0jw4dOsjlcgUts2fPDuWwAABAIxfSnZvnzZunBx98UPfff79SUlLqHJOYmKhXX331nPupqKhQjx499OCDD2rUqFFB244fP64tW7boV7/6lXr06KGjR4/qoYce0u23365NmzYFjX366ac1YcKEwHOPxxPKYQEAgEYupOCze/fu846JjIzUuHHjzjlm6NChGjp0aJ3bYmNjtXr16qB1L774oq677jodOHBA7dq1C6z3eDxKTk6+gMoBAICThXSpa8GCBVqyZEmt9UuWLNHrr79+yUWdTWlpqVwul+Li4oLWz549W61bt9a1116rOXPmqLq6+pz78fv98vl8QQsAAGj8Qgo+OTk5SkhIqLU+MTFRv/nNby65qLqcPHlSjz/+uMaMGaOYmJjA+p///OdavHix1q1bp0mTJuk3v/mNpk2bds595eTkKDY2NrCkp6eHpWYAAGAvIV3qOnDggDp27Fhrffv27XXgwIFLLupMVVVV+u///m8ZhqF58+YFbcvOzg78uXv37oqMjNSkSZOUk5Mjt9td5/6mT58e9Dqfz0f4sdqzz0rHj0vR0VZXYrlnb31Wx6uOK7oZvQAAs4UUfBITE7V9+3Z16NAhaP1nn32m1q1bm1FXwHeh56uvvtLatWuDzvbUJSMjQ9XV1dq/f786d+5c5xi3233WUASL3HOP1RXYxj3d6AUAhEtIwWfMmDH6+c9/Lo/Ho5tuukmSlJeXp4ceekh33323acV9F3p2796tdevWXVCo2rZtm5o0aaLExETT6gAAAI1DSMHn17/+tfbv368BAwYoIuL0LmpqanTfffdd1Byf8vJy7dmzJ/B837592rZtm+Lj45WSkqIf/ehH2rJli5YvX65Tp06p+D/f4xQfH6/IyEjl5+dr48aN6t+/vzwej/Lz8zV16lTde++9atWqVSiHBgAAGjGXYRhGqC/etWuXPvvsM0VFRalbt25q3779Rb1+/fr16t+/f63148aN01NPPVXnPCJJWrdunfr166ctW7bopz/9qf71r3/J7/erY8eO+vGPf6zs7OyLupTl8/kUGxur0tLS815Ku1gdnlhRa93+2cNNfY9GoaBAqq6WIiKks1yidIqCbwpUXVOtiCYR6pzg7F4AwLmE8vv7koJPY0HwsYG2bU9/O3tamlRYaHU1lmqb21beMq/SPGkqzHZ2LwDgXEL5/R3Spa5Tp05p4cKFWrNmjQ4fPqyampqg7WvXrg1ltwAAAGEVUvB56KGHtHDhQg0fPlxdu3aVy+Uyuy4AAADThRR8Fi9erL/85S8aNmyY2fUAAACETUh3bo6MjNQVV1xhdi0AAABhFVLweeSRR/THP/5RzIsGAAANSUiXuj788EOtW7dOK1eu1DXXXKNmzZoFbV+6dKkpxQEAAJgppOATFxenO++80+xaAAAAwiqk4LNgwQKz6wAAAAi7kOb4SFJ1dbXef/99vfzyyyorK5MkHTx4UOXl5aYVBwAAYKaQzvh89dVXGjJkiA4cOCC/369bb71VHo9Hv/3tb+X3+zV//nyz60Rj9+mn0qlTUtOmVldiuU8nfKpTxik1ddELADBbyDcw7N27tz777LOgb0y/8847NWHCBNOKg4OkpFhdgW2keOgFAIRLSMHngw8+0EcffaTIyMig9R06dJDX6zWlMAAAALOFNMenpqZGp06dqrW+sLBQHo/nkosCAAAIh5DO+AwaNEjPPfecXnnlFUmSy+VSeXm5Zs6cyddYIDSvvCKVl0stW0oTJ1pdjaVe2fyKyivL1TKypSb2cnYvAMBsLiOE2y8XFhZq8ODBMgxDu3fvVu/evbV7924lJCRow4YNSkxMDEetYRPK19pfqA5PrKi1bv/s4aa+R6PQtq3k9UppaVJhodXVWKptblt5y7xK86SpMNvZvQCAcwnl93dIZ3zatm2rzz77TIsXL9b27dtVXl6u8ePHa+zYsYqKigpllwAAAGEXUvCRpIiICN17771m1gIAABBWIQWfN95445zb77vvvpCKAQAACKeQ7+PzfVVVVTp+/LgiIyMVHR1N8AEAALYU0sfZjx49GrSUl5eroKBAffv21dtvv212jQAAAKYI+bu6ztSpUyfNnj271tkgAAAAuzAt+EinJzwfPHjQzF0CAACYJqQ5Pu+9917Qc8MwVFRUpBdffFE33HCDKYUBAACYLaTgc8cddwQ9d7lcatOmjW655Rb9/ve/N6MuOM2VV0qxsVJSktWVWO7K1lcqtnmsklrQCwAwW0jBp6amxuw64HRr11pdgW2sHUcvACBcTJ3jAwAAYGchnfHJzs6+4LG5ubmhvAUAAIDpQgo+W7du1datW1VVVaXOnTtLknbt2qWmTZuqZ8+egXEul8ucKgEAAEwQUvAZMWKEPB6PXn/9dbVq1UrS6ZsaPvDAA7rxxhv1yCOPmFokHGDsWOmbb6SEBOmtt6yuxlJjl47VN8e/UUJ0gt4a5exeAIDZQgo+v//97/WPf/wjEHokqVWrVnrmmWc0aNAggg8uXl6e5PVKaWlWV2K5vP158pZ5leahFwBgtpAmN/t8Ph05cqTW+iNHjqisrOyC97NhwwaNGDFCqampcrlcWrZsWdB2wzA0Y8YMpaSkKCoqSgMHDtTu3buDxpSUlGjs2LGKiYlRXFycxo8fr/Ly8lAOCwAANHIhBZ8777xTDzzwgJYuXarCwkIVFhbqf//3fzV+/HiNGjXqgvdTUVGhHj16aO7cuXVuf/bZZ/X8889r/vz52rhxo1q0aKHBgwfr5MmTgTFjx47VF198odWrV2v58uXasGGDJk6cGMphAQCARi6kS13z58/Xo48+qnvuuUdVVVWndxQRofHjx2vOnDkXvJ+hQ4dq6NChdW4zDEPPPfecnnzySY0cOVKS9MYbbygpKUnLli3T3XffrZ07d2rVqlX69NNP1bt3b0nSCy+8oGHDhul3v/udUlNTQzk8AADQSIV0xic6OlovvfSSvv3228AnvEpKSvTSSy+pRYsWphS2b98+FRcXa+DAgYF1sbGxysjIUH5+viQpPz9fcXFxgdAjSQMHDlSTJk20cePGs+7b7/fL5/MFLQAAoPG7pBsYFhUVqaioSJ06dVKLFi1kGIZZdam4uFiSlHTGVxgkJSUFthUXFysxMTFoe0REhOLj4wNj6pKTk6PY2NjAkp6eblrdAADAvkIKPt9++60GDBigK6+8UsOGDVNRUZEkafz48Q3iE13Tp09XaWlpYPn666+tLgkAANSDkILP1KlT1axZMx04cEDR0dGB9XfddZdWrVplSmHJycmSpEOHDgWtP3ToUGBbcnKyDh8+HLS9urpaJSUlgTF1cbvdiomJCVoAAEDjF1Lw+cc//qHf/va3atu2bdD6Tp066auvvjKlsI4dOyo5OVlr1qwJrPP5fNq4caMyMzMlSZmZmTp27Jg2b94cGLN27VrV1NQoIyPDlDoAAEDjEdKnuioqKoLO9HynpKREbrf7gvdTXl6uPXv2BJ7v27dP27ZtU3x8vNq1a6eHH35YzzzzjDp16qSOHTvqV7/6lVJTU3XHHXdIkq666ioNGTJEEyZM0Pz581VVVaUpU6bo7rvv5hNdDc2ECVJpqRQba3UllpvQc4JK/aWKddMLADBbSMHnxhtv1BtvvKFf//rXkk5/J1dNTY2effZZ9e/f/4L3s2nTpqDx33356bhx47Rw4UJNmzZNFRUVmjhxoo4dO6a+fftq1apVat68eeA1b731lqZMmaIBAwaoSZMmGj16tJ5//vlQDgtWmjnT6gpsY2Y/egEA4eIyQvgo1o4dOzRgwAD17NlTa9eu1e23364vvvhCJSUl+uc//6nLL788HLWGjc/nU2xsrEpLS02f79PhiRW11u2fPdzU9wAAwIlC+f0d0hyfrl27ateuXerbt69GjhypiooKjRo1Slu3bm1woQcAADjHRV/qqqqq0pAhQzR//nz98pe/DEdNAAAAYXHRZ3yaNWum7du3h6MWOFnbtpLLdfrR4drmtpVrlkttc+kFAJgtpEtd9957r1599VWzawEAAAirkD7VVV1drddee03vv/++evXqVev7uXJzc00pDgAAwEwXFXz+/e9/q0OHDtqxY4d69uwpSdq1a1fQGJfLZV51AAAAJrqo4NOpUycVFRVp3bp1kk5/RcXzzz9f64tEAQAA7Oii5vicecuflStXqqKiwtSCAAAAwiWkOT7fCeHeh1DtmxpyQ0MAAOrHRZ3xcblctebwMKcHAAA0FBd1xscwDN1///2BLyI9efKkJk+eXOtTXUuXLjWvQgAAAJNcVPAZN25c0PN7773X1GIAAADC6aKCz4IFC8JVB5zuzTclv1/6z9lEJ3tz1JvyV/vljqAXAGC2S5rcDJimXz+rK7CNfh36WV0CADRaIX1lBQAAQENE8AEAAI7BpS7Yw/r1/3+Oj8Mve63fvz4wx4fLXgBgLoIP7OHeeyWvV0pLkwoLra7GUvcuvVfeMq/SPGkqzHZ2LwDAbFzqAgAAjkHwAQAAjkHwAQAAjkHwAQAAjkHwAQAAjkHwAQAAjkHwAQAAjkHwAQAAjkHwAQAAjsGdm2EPDr9b8/dxt2YACB/O+AAAAMcg+AAAAMcg+AAAAMewffDp0KGDXC5XrSUrK0uS1K9fv1rbJk+ebHHVuGizZknZ2acfHW7W+lnK/r9szVpPLwDAbLaf3Pzpp5/q1KlTgec7duzQrbfeqv/6r/8KrJswYYKefvrpwPPo6Oh6rREm+NOfJK9XSkuTZs60uhpL/WnLn+Qt8yrNk6aZ/ZzdCwAwm+2DT5s2bYKez549W5dffrluvvnmwLro6GglJyfXd2kAAKCBsf2lru+rrKzUm2++qQcffFAulyuw/q233lJCQoK6du2q6dOn6/jx4+fcj9/vl8/nC1oAAEDjZ/szPt+3bNkyHTt2TPfff39g3T333KP27dsrNTVV27dv1+OPP66CggItXbr0rPvJycnRLOaSAADgOA0q+Lz66qsaOnSoUlNTA+smTpwY+HO3bt2UkpKiAQMGaO/evbr88svr3M/06dOVnZ0deO7z+ZSenh6+wgEAgC00mODz1Vdf6f333z/nmRxJysjIkCTt2bPnrMHH7XbL7XabXiMAALC3BjPHZ8GCBUpMTNTw4cPPOW7btm2SpJSUlHqoCgAANCQN4oxPTU2NFixYoHHjxiki4v+XvHfvXi1atEjDhg1T69attX37dk2dOlU33XSTunfvbmHFAADAjhpE8Hn//fd14MABPfjgg0HrIyMj9f777+u5555TRUWF0tPTNXr0aD355JMWVQoAAOysQQSfQYMGyTCMWuvT09OVl5dnQUUw3c03S998IyUkWF2J5W7ucLO+Of6NEqLpBQCYrUEEHzjAW29ZXYFtvDWKXgBAuDSYyc0AAACXiuADAAAcg+ADAAAcgzk+sIdbbpEOHZKSkqS1a62uxlK3vH6LDlUcUlKLJK0d5+xeAIDZCD6wh127JK9XKi21uhLL7fp2l7xlXpWepBcAYDYudQEAAMcg+AAAAMcg+AAAAMcg+AAAAMcg+AAAAMcg+AAAAMcg+AAAAMcg+AAAAMfgBoawhxkzpPJyqWVLqyux3IybZ6i8slwtI+kFAJiN4AN7mDjR6gpsY2IvegEA4cKlLgAA4BgEHwAA4Bhc6oI9FBVJp05JTZtKKSlWV2OporIinTJOqamrqVI8zu4FAJiN4AN76NPn9Lezp6VJhYVWV2OpPn/qI2+ZV2meNBVmO7sXAGA2LnUBAADHIPgAAADHIPgAAADHIPgAAADHIPgAAADHIPgAAADHIPgAAADHIPgAAADHIPgAAADH4M7NsIc1a6TqaimCH8k1961RdU21IprQCwAwm63P+Dz11FNyuVxBS5cuXQLbT548qaysLLVu3VotW7bU6NGjdejQIQsrRsg6d5auueb0o8N1TuisaxKvUecEegEAZrN18JGka665RkVFRYHlww8/DGybOnWq/va3v2nJkiXKy8vTwYMHNWrUKAurBQAAdmb7c+kRERFKTk6utb60tFSvvvqqFi1apFtuuUWStGDBAl111VX6+OOPdf3119d3qQAAwOZsH3x2796t1NRUNW/eXJmZmcrJyVG7du20efNmVVVVaeDAgYGxXbp0Ubt27ZSfn3/O4OP3++X3+wPPfT5fWI8BF2DRIun4cSk6WrrnHqursdSizxfpeNVxRTeL1j3dnN0LADCbrYNPRkaGFi5cqM6dO6uoqEizZs3SjTfeqB07dqi4uFiRkZGKi4sLek1SUpKKi4vPud+cnBzNmjUrjJXjok2bJnm9Ulqa44PPtNXT5C3zKs2TRvABAJPZOvgMHTo08Ofu3bsrIyND7du311/+8hdFRUWFvN/p06crOzs78Nzn8yk9Pf2SagUAAPZn+8nN3xcXF6crr7xSe/bsUXJysiorK3Xs2LGgMYcOHapzTtD3ud1uxcTEBC0AAKDxa1DBp7y8XHv37lVKSop69eqlZs2aac2aNYHtBQUFOnDggDIzMy2sEgAA2JWtL3U9+uijGjFihNq3b6+DBw9q5syZatq0qcaMGaPY2FiNHz9e2dnZio+PV0xMjH72s58pMzOTT3QBAIA62Tr4FBYWasyYMfr222/Vpk0b9e3bVx9//LHatGkjSfrDH/6gJk2aaPTo0fL7/Ro8eLBeeukli6sGAAB2Zevgs3jx4nNub968uebOnau5c+fWU0UAAKAha1BzfAAAAC4FwQcAADiGrS91wUG+uwXBeW5F4ATJLZODHgEA5iH4wB42bbK6AtvYNJFeAEC4cKkLAAA4BsEHAAA4BsEHAAA4BnN8YA+TJkklJVJ8vPTyy1ZXY6lJf5ukkpMlim8er5dHOLsXAGA2gg/sYcUKyeuV0tKsrsRyK3avkLfMqzQPvQAAs3GpCwAAOAbBBwAAOAbBBwAAOAbBBwAAOAbBBwAAOAbBBwAAOAbBBwAAOAbBBwAAOAY3MLSBDk+sqLVu/+zhFlRioTFjpKNHpVatrK7EcmO6jtHRk0fVqjm9AACzEXxgD3PmWF2BbcwZRC8AIFy41AUAAByD4AMAAByD4AMAAByD4AN76NJFiok5/ehwXV7sopicGHV5kV4AgNkIPrCH8nKprOz0o8OVV5arrLJM5ZX0AgDMRvABAACOQfABAACOQfABAACOQfABAACOwZ2bberMr7Fw3FdYAAAQBpzxAQAAjkHwAQAAjmHr4JOTk6M+ffrI4/EoMTFRd9xxhwoKCoLG9OvXTy6XK2iZPHmyRRUDAAA7s/Ucn7y8PGVlZalPnz6qrq7WL37xCw0aNEhffvmlWrRoERg3YcIEPf3004Hn0dHRVpSLSzF/vnTihBQVZXUllpt/23ydqDqhqGb0AgDMZuvgs2rVqqDnCxcuVGJiojZv3qybbropsD46OlrJycn1XR7MdNttVldgG7ddSS8AIFxsfanrTKWlpZKk+Pj4oPVvvfWWEhIS1LVrV02fPl3Hjx8/5378fr98Pl/QAgAAGj9bn/H5vpqaGj388MO64YYb1LVr18D6e+65R+3bt1dqaqq2b9+uxx9/XAUFBVq6dOlZ95WTk6NZs2bVR9kAAMBGXIZhGFYXcSF+8pOfaOXKlfrwww/Vtm3bs45bu3atBgwYoD179ujyyy+vc4zf75ff7w889/l8Sk9PV2lpqWJiYkyt+8z78YSq0d/HZ/NmqbJSioyUevWyuhpLbT64WZWnKhXZNFK9Up3dCwA4F5/Pp9jY2Iv6/d0gzvhMmTJFy5cv14YNG84ZeiQpIyNDks4ZfNxut9xut+l14hKMHCl5vVJamlRYaHU1lhq5eKS8ZV6ledJUmO3sXgCA2WwdfAzD0M9+9jO98847Wr9+vTp27Hje12zbtk2SlJKSEubq6lddZ44a/VkgAABMZuvgk5WVpUWLFundd9+Vx+NRcXGxJCk2NlZRUVHau3evFi1apGHDhql169bavn27pk6dqptuukndu3e3uHoAAGA3tg4+8+bNk3T6JoXft2DBAt1///2KjIzU+++/r+eee04VFRVKT0/X6NGj9eSTT1pQLQAAsDtbB5/zzbtOT09XXl5ePVUDAAAaugZ1Hx8AAIBLYeszPji3Myc8M9kZAIBz44wPAABwDIIPAABwDIIPAABwDOb4wB527pQMQ3K5rK7EcjuzdsqQIZfoBQCYjeCDCxL2idQej7n7a8A8bnoBAOHCpS4AAOAYBB8AAOAYXOqCPeTmSj6fFBMjZWcHbXLaF7Tm5ufK5/cpxh2j7Mzs878AAHDBCD6wh9xcyeuV0tJqBR+nyc3PlbfMqzRPGsEHAExG8AF3gAYAOAZzfAAAgGNwxqcRcdpcGAAALhbBB7XUFaDC/V75pSeVIqnoP48AAIQDl7oAAIBjcMankQvXxOVwXlarzzNOAABn4YwPAABwDM74OAxnUwAATkbwQb06W/DakXy5io4n6Nvo2HquyH56pvRUemy62kS3sbqUsOL+UQCsQPCBLUwYPcPqEmzjvTHvWV0CADRaBB+ETTgvq5l1tqA+zzpcSD846wEA4cXkZgAA4Bic8UGjxRySi0fPADR2BB+Y5lIubf3pf59W6+Ol+jY6NqT5Plbcbfr7zAwIt799u44cP6I20W3qnO/DV5MAQOgIPrCFrsV7lVL+rYpatra6FMttKdoib5lXaZ40q0sBgEaH4APHaAj3MOrwxAoVNz8puaRi38mLet35hOuskNVnoOw20T2U/VjdQ8BJmNwMAAAcgzM+wPfY7f+86/OWAOF+XX2xe31Sw6gRaKwIPsB5XMgvqYbwi8zqCeBOZVYv7BbKrdYQP4F4ITXz9xx+jeZS19y5c9WhQwc1b95cGRkZ+uSTT6wuCQAA2EyjOOPz5z//WdnZ2Zo/f74yMjL03HPPafDgwSooKFBiYqLV5QENltVnbqz8v3qrj70+OWVit5Punm7WsV7I309DO0vVKM745ObmasKECXrggQd09dVXa/78+YqOjtZrr71mdWkAAMBGGvwZn8rKSm3evFnTp08PrGvSpIkGDhyo/Pz8Ol/j9/vl9/sDz0tLSyVJPp/P9Ppq/MdN32djVGbUqMV/Hp3eM8NVc/pRzupFu6lLTBnTWJz571FdPwtnjuk68//Ou98dswbXWnfmvuv6t/BC9n2muv6+znz/uo4rlL9ns342LuTY6+rh+V5Tlwut+ULGXUhfQ33/UF535pgL6Vkovvv7Mgzjwl9kNHBer9eQZHz00UdB6x977DHjuuuuq/M1M2fONCSxsLCwsLCwNILl66+/vuDc0ODP+IRi+vTpys7ODjyvqalRSUmJWrduLZfLdcn79/l8Sk9P19dff62YmJhL3h/Oj57XL/pdv+h3/aLf9etS+m0YhsrKypSamnrBr2nwwSchIUFNmzbVoUOHgtYfOnRIycnJdb7G7XbL7XYHrYuLizO9tpiYGP6jqWf0vH7R7/pFv+sX/a5fofY7Njb2osY3+MnNkZGR6tWrl9asWRNYV1NTozVr1igzM9PCygAAgN00+DM+kpSdna1x48apd+/euu666/Tcc8+poqJCDzzwgNWlAQAAG2kUweeuu+7SkSNHNGPGDBUXF+sHP/iBVq1apaSkJEvqcbvdmjlzZq3LaQgfel6/6Hf9ot/1i37Xr/rut8swLuYzYAAAAA1Xg5/jAwAAcKEIPgAAwDEIPgAAwDEIPgAAwDEIPmEwd+5cdejQQc2bN1dGRoY++eQTq0tqEDZs2KARI0YoNTVVLpdLy5YtC9puGIZmzJihlJQURUVFaeDAgdq9e3fQmJKSEo0dO1YxMTGKi4vT+PHjVV5eHjRm+/btuvHGG9W8eXOlp6fr2WefDfeh2U5OTo769Okjj8ejxMRE3XHHHSooKAgac/LkSWVlZal169Zq2bKlRo8eXetGoQcOHNDw4cMVHR2txMREPfbYY6qurg4as379evXs2VNut1tXXHGFFi5cGO7Ds5158+ape/fugRu0ZWZmauXKlYHt9Dq8Zs+eLZfLpYcffjiwjp6b66mnnpLL5QpaunTpEthuq36H/CVZqNPixYuNyMhI47XXXjO++OILY8KECUZcXJxx6NAhq0uzvb///e/GL3/5S2Pp0qWGJOOdd94J2j579mwjNjbWWLZsmfHZZ58Zt99+u9GxY0fjxIkTgTFDhgwxevToYXz88cfGBx98YFxxxRXGmDFjAttLS0uNpKQkY+zYscaOHTuMt99+24iKijJefvnl+jpMWxg8eLCxYMECY8eOHca2bduMYcOGGe3atTPKy8sDYyZPnmykp6cba9asMTZt2mRcf/31xg9/+MPA9urqaqNr167GwIEDja1btxp///vfjYSEBGP69OmBMf/+97+N6OhoIzs72/jyyy+NF154wWjatKmxatWqej1eq7333nvGihUrjF27dhkFBQXGL37xC6NZs2bGjh07DMOg1+H0ySefGB06dDC6d+9uPPTQQ4H19NxcM2fONK655hqjqKgosBw5ciSw3U79JviY7LrrrjOysrICz0+dOmWkpqYaOTk5FlbV8JwZfGpqaozk5GRjzpw5gXXHjh0z3G638fbbbxuGYRhffvmlIcn49NNPA2NWrlxpuFwuw+v1GoZhGC+99JLRqlUrw+/3B8Y8/vjjRufOncN8RPZ2+PBhQ5KRl5dnGMbp3jZr1sxYsmRJYMzOnTsNSUZ+fr5hGKeDapMmTYzi4uLAmHnz5hkxMTGB/k6bNs245pprgt7rrrvuMgYPHhzuQ7K9Vq1aGf/zP/9Dr8OorKzM6NSpk7F69Wrj5ptvDgQfem6+mTNnGj169Khzm936zaUuE1VWVmrz5s0aOHBgYF2TJk00cOBA5efnW1hZw7dv3z4VFxcH9TY2NlYZGRmB3ubn5ysuLk69e/cOjBk4cKCaNGmijRs3BsbcdNNNioyMDIwZPHiwCgoKdPTo0Xo6GvspLS2VJMXHx0uSNm/erKqqqqB+d+nSRe3atQvqd7du3YJuFDp48GD5fD598cUXgTHf38d3Y5z838OpU6e0ePFiVVRUKDMzk16HUVZWloYPH16rL/Q8PHbv3q3U1FRddtllGjt2rA4cOCDJfv0m+Jjom2++0alTp2rdMTopKUnFxcUWVdU4fNe/c/W2uLhYiYmJQdsjIiIUHx8fNKaufXz/PZympqZGDz/8sG644QZ17dpV0uleREZG1vry3jP7fb5enm2Mz+fTiRMnwnE4tvX555+rZcuWcrvdmjx5st555x1dffXV9DpMFi9erC1btignJ6fWNnpuvoyMDC1cuFCrVq3SvHnztG/fPt14440qKyuzXb8bxVdWAAhdVlaWduzYoQ8//NDqUhq1zp07a9u2bSotLdVf//pXjRs3Tnl5eVaX1Sh9/fXXeuihh7R69Wo1b97c6nIcYejQoYE/d+/eXRkZGWrfvr3+8pe/KCoqysLKauOMj4kSEhLUtGnTWjPVDx06pOTkZIuqahy+69+5epucnKzDhw8Hba+urlZJSUnQmLr28f33cJIpU6Zo+fLlWrdundq2bRtYn5ycrMrKSh07dixo/Jn9Pl8vzzYmJibGdv8YhltkZKSuuOIK9erVSzk5OerRo4f++Mc/0usw2Lx5sw4fPqyePXsqIiJCERERysvL0/PPP6+IiAglJSXR8zCLi4vTlVdeqT179tjuZ5zgY6LIyEj16tVLa9asCayrqanRmjVrlJmZaWFlDV/Hjh2VnJwc1Fufz6eNGzcGepuZmaljx45p8+bNgTFr165VTU2NMjIyAmM2bNigqqqqwJjVq1erc+fOatWqVT0djfUMw9CUKVP0zjvvaO3aterYsWPQ9l69eqlZs2ZB/S4oKNCBAweC+v35558Hhc3Vq1crJiZGV199dWDM9/fx3Rj+ezj9b4Pf76fXYTBgwAB9/vnn2rZtW2Dp3bu3xo4dG/gzPQ+v8vJy7d27VykpKfb7Gb+oqdA4r8WLFxtut9tYuHCh8eWXXxoTJ0404uLigmaqo25lZWXG1q1bja1btxqSjNzcXGPr1q3GV199ZRjG6Y+zx8XFGe+++66xfft2Y+TIkXV+nP3aa681Nm7caHz44YdGp06dgj7OfuzYMSMpKcn48Y9/bOzYscNYvHixER0d7biPs//kJz8xYmNjjfXr1wd9/PT48eOBMZMnTzbatWtnrF271ti0aZORmZlpZGZmBrZ/9/HTQYMGGdu2bTNWrVpltGnTps6Pnz722GPGzp07jblz5zry475PPPGEkZeXZ+zbt8/Yvn278cQTTxgul8v4xz/+YRgGva4P3/9Ul2HQc7M98sgjxvr16419+/YZ//znP42BAwcaCQkJxuHDhw3DsFe/CT5h8MILLxjt2rUzIiMjjeuuu874+OOPrS6pQVi3bp0hqdYybtw4wzBOf6T9V7/6lZGUlGS43W5jwIABRkFBQdA+vv32W2PMmDFGy5YtjZiYGOOBBx4wysrKgsZ89tlnRt++fQ23222kpaUZs2fPrq9DtI26+izJWLBgQWDMiRMnjJ/+9KdGq1atjOjoaOPOO+80ioqKgvazf/9+Y+jQoUZUVJSRkJBgPPLII0ZVVVXQmHXr1hk/+MEPjMjISOOyyy4Leg+nePDBB4327dsbkZGRRps2bYwBAwYEQo9h0Ov6cGbwoefmuuuuu4yUlBQjMjLSSEtLM+666y5jz549ge126rfLMAzj4s4RAQAANEzM8QEAAI5B8AEAAI5B8AEAAI5B8AEAAI5B8AEAAI5B8AEAAI5B8AEAAI5B8AEAAI5B8AEAAI5B8AEAAI5B8AEAAI5B8AEAAI7x/wD6U45YNSHdxwAAAABJRU5ErkJggg==",
|
| 429 |
+
"text/plain": [
|
| 430 |
+
"<Figure size 640x480 with 1 Axes>"
|
| 431 |
+
]
|
| 432 |
+
},
|
| 433 |
+
"metadata": {},
|
| 434 |
+
"output_type": "display_data"
|
| 435 |
+
}
|
| 436 |
+
],
|
| 437 |
+
"source": [
|
| 438 |
+
"## plotting lenth of strings in the Map Data Column, mean and 75th percentile\n",
|
| 439 |
+
"\n",
|
| 440 |
+
"df_len_explore['Map Data'].str.len().plot(kind='hist', bins=100)\n",
|
| 441 |
+
"plt.axvline(df_len_explore['Map Data'].str.len().mean(), color='red', linestyle='dashed', linewidth=2)\n",
|
| 442 |
+
"plt.axvline(df_len_explore['Map Data'].str.len().quantile(0.80), color='green', linestyle='dashed', linewidth=2)"
|
| 443 |
+
]
|
| 444 |
+
},
|
| 445 |
+
{
|
| 446 |
+
"cell_type": "markdown",
|
| 447 |
+
"metadata": {},
|
| 448 |
+
"source": [
|
| 449 |
+
"### Same visualization, post cleaning"
|
| 450 |
+
]
|
| 451 |
+
},
|
| 452 |
+
{
|
| 453 |
+
"cell_type": "code",
|
| 454 |
+
"execution_count": 54,
|
| 455 |
+
"metadata": {},
|
| 456 |
+
"outputs": [
|
| 457 |
+
{
|
| 458 |
+
"name": "stderr",
|
| 459 |
+
"output_type": "stream",
|
| 460 |
+
"text": [
|
| 461 |
+
"[nltk_data] Downloading package stopwords to C:\\Users\\Akhil\n",
|
| 462 |
+
"[nltk_data] PC\\AppData\\Roaming\\nltk_data...\n",
|
| 463 |
+
"[nltk_data] Package stopwords is already up-to-date!\n",
|
| 464 |
+
"[nltk_data] Downloading package wordnet to C:\\Users\\Akhil\n",
|
| 465 |
+
"[nltk_data] PC\\AppData\\Roaming\\nltk_data...\n",
|
| 466 |
+
"[nltk_data] Package wordnet is already up-to-date!\n"
|
| 467 |
+
]
|
| 468 |
+
}
|
| 469 |
+
],
|
| 470 |
+
"source": [
|
| 471 |
+
"nltk.download('stopwords')\n",
|
| 472 |
+
"nltk.download('wordnet')\n",
|
| 473 |
+
"\n",
|
| 474 |
+
"stop_words = set(stopwords.words('english'))\n",
|
| 475 |
+
"stemmer = PorterStemmer()\n",
|
| 476 |
+
"lemmatizer = WordNetLemmatizer()"
|
| 477 |
+
]
|
| 478 |
+
},
|
| 479 |
+
{
|
| 480 |
+
"cell_type": "code",
|
| 481 |
+
"execution_count": 55,
|
| 482 |
+
"metadata": {},
|
| 483 |
+
"outputs": [],
|
| 484 |
+
"source": [
|
| 485 |
+
"## cleaning the strings, stemming and lemmatizing\n",
|
| 486 |
+
"\n",
|
| 487 |
+
"def clean_text(text):\n",
|
| 488 |
+
" text = re.sub(r'[^\\w\\s]', '', text)\n",
|
| 489 |
+
" text = text.lower()\n",
|
| 490 |
+
" text = [word for word in text.split() if word not in stop_words]\n",
|
| 491 |
+
" text = [stemmer.stem(word) for word in text] \n",
|
| 492 |
+
" text = [lemmatizer.lemmatize(word) for word in text]\n",
|
| 493 |
+
" return ' '.join(text)\n",
|
| 494 |
+
"\n",
|
| 495 |
+
"df_len_explore['Map Data'] = df_len_explore['Map Data'].apply(clean_text)"
|
| 496 |
+
]
|
| 497 |
+
},
|
| 498 |
+
{
|
| 499 |
+
"cell_type": "code",
|
| 500 |
+
"execution_count": 56,
|
| 501 |
+
"metadata": {},
|
| 502 |
+
"outputs": [
|
| 503 |
+
{
|
| 504 |
+
"name": "stdout",
|
| 505 |
+
"output_type": "stream",
|
| 506 |
+
"text": [
|
| 507 |
+
"Mean string length: 596.3046776232617\n",
|
| 508 |
+
"80th percentile string length: 1114.0\n"
|
| 509 |
+
]
|
| 510 |
+
}
|
| 511 |
+
],
|
| 512 |
+
"source": [
|
| 513 |
+
"print('Mean string length: ', df_len_explore['Map Data'].str.len().mean())\n",
|
| 514 |
+
"print('80th percentile string length: ', df_len_explore['Map Data'].str.len().quantile(0.8))"
|
| 515 |
+
]
|
| 516 |
+
},
|
| 517 |
+
{
|
| 518 |
+
"cell_type": "code",
|
| 519 |
+
"execution_count": 57,
|
| 520 |
+
"metadata": {},
|
| 521 |
+
"outputs": [
|
| 522 |
+
{
|
| 523 |
+
"data": {
|
| 524 |
+
"text/plain": [
|
| 525 |
+
"<matplotlib.lines.Line2D at 0x1c5b5e37b00>"
|
| 526 |
+
]
|
| 527 |
+
},
|
| 528 |
+
"execution_count": 57,
|
| 529 |
+
"metadata": {},
|
| 530 |
+
"output_type": "execute_result"
|
| 531 |
+
},
|
| 532 |
+
{
|
| 533 |
+
"data": {
|
| 534 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAGdCAYAAAD0e7I1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA4iElEQVR4nO3de3RU5b3/8c8QyEBgkhBCyIVwERBULhXQmIoIgkBARKHnp4AVlAVqoVXiBbFWxLoaKi21tSjao6CrIC09iBYKp9zRGlEQRBQjUBATEkACmSTA5Pb8/qDMcUiAMOywJ3ver7X2GmbvZ/Z8n2dmZT48e88elzHGCAAAwKEa2F0AAABAXSLsAAAARyPsAAAARyPsAAAARyPsAAAARyPsAAAARyPsAAAARyPsAAAAR2todwGhoKqqSgcPHpTH45HL5bK7HAAAUAvGGBUXFys5OVkNGpx7/oawI+ngwYNKTU21uwwAABCEb7/9Vq1btz7ndsKOJI/HI+n0YEVHR9tcTRiaP18qLZWaNpXuu8/uamwzf9t8lZaXqmmjprrv2vAdBwCoLa/Xq9TUVP/n+Lm4+G2s04MVExOjoqIiwg4AAPVEbT+/OUEZAAA4GmEHAAA4GufswH75+VJlpRQRISUl2V2NbfKL81VpKhXhilCSJ3zHAQCsRtiB/a67TsrLk1JSpNxcu6uxzXV/uk55xXlK8aQoNzN8xwEArMZhLAAA4GiEHQAA4GiEHQAA4GiEHQAA4GiEHQAA4GiEHQAA4GiEHQAA4GiEHQAA4GiEHQAA4GhcQRn2W7tWqqiQGob323HtvWtVUVWhhg3CexwAwGr8Va1j7Z5cUW3d/lnDbKgkhHXubHcFIaFzPOMAAHWBw1gAAMDRCDsAAMDROIwF+y1aJJ04IUVFSWPG2F2NbRZ9vkgnyk8oqlGUxnQL33EAAKsRdmC/J56Q8vKklJSwDjtPrH5CecV5SvGkEHYAwEIcxgIAAI5G2AEAAI5G2AEAAI5G2AEAAI5G2AEAAI5G2AEAAI5G2AEAAI5G2AEAAI7GRQVhv8TEwNswldgsMeAWAGANwg7st2WL3RWEhC2TGAcAqAscxgIAAI5ma9jJysrSddddJ4/Ho4SEBN1xxx3KyckJaHPq1ClNnjxZLVq0ULNmzTRq1CgdOnQooM2BAwc0bNgwRUVFKSEhQY8//rgqKiouZ1cAAECIsjXsbNy4UZMnT9ZHH32k1atXq7y8XIMGDVJpaam/zdSpU/X3v/9dS5Ys0caNG3Xw4EGNHDnSv72yslLDhg1TWVmZPvzwQ7355ptasGCBnnnmGTu6BAAAQozLGGPsLuKMI0eOKCEhQRs3blTfvn1VVFSkli1batGiRfrRj34kSfrqq6901VVXKTs7WzfccINWrlyp2267TQcPHlSrVq0kSfPmzdO0adN05MgRRUZGXvB5vV6vYmJiVFRUpOjoaEv71O7JFdXW7Z81zNLnqPceeEAqLJTi4qRXX7W7Gts88PcHVHiqUHGN4/Tq8PAdBwCordp+fofUOTtFRUWSpLi4OEnS1q1bVV5eroEDB/rbdOnSRW3atFF2drYkKTs7W926dfMHHUkaPHiwvF6vvvjiixqfx+fzyev1Biyw0YoV0t/+dvo2jK3YvUJ/+/JvWrE7vMcBAKwWMmGnqqpKjzzyiG688UZ17dpVklRQUKDIyEjFxsYGtG3VqpUKCgr8bb4fdM5sP7OtJllZWYqJifEvqampFvcGAACEipAJO5MnT9bOnTu1ePHiOn+u6dOnq6ioyL98++23df6cAADAHiFxnZ0pU6Zo+fLl2rRpk1q3bu1fn5iYqLKyMh0/fjxgdufQoUNK/M8F6BITE/Xxxx8H7O/Mt7USz3GROrfbLbfbbXEvAABAKLJ1ZscYoylTpuidd97RunXr1L59+4DtvXr1UqNGjbR27Vr/upycHB04cEDp6emSpPT0dH3++ec6fPiwv83q1asVHR2tq6+++vJ0BAAAhCxbZ3YmT56sRYsW6d1335XH4/GfYxMTE6MmTZooJiZGEyZMUGZmpuLi4hQdHa2f/vSnSk9P1w033CBJGjRokK6++mr9+Mc/1gsvvKCCggI9/fTTmjx5MrM3AADA3rDzyiuvSJL69esXsH7+/PkaP368JOl3v/udGjRooFGjRsnn82nw4MF6+eWX/W0jIiK0fPlyPfTQQ0pPT1fTpk01btw4Pffcc5erGwAAIITZGnZqc4mfxo0ba+7cuZo7d+4527Rt21b/+Mc/rCwNAAA4RMh8GwsAAKAuhMS3sRDmRo+Wjh2Tmje3uxJbje46WsdOHVPzxuE9DgBgNcIO7Dd7tt0VhITZgxgHAKgLHMYCAACORtgBAACORtgBAACORtiB/bp0kaKjT9+GsS5/7KLorGh1+WN4jwMAWI2wA/uVlEjFxadvw1hJWYmKy4pVUhbe4wAAViPsAAAARyPsAAAARyPsAAAARyPsAAAARyPsAAAARyPsAAAARyPsAAAARyPsAAAAR+NXz2G/efOkkyelJk3srsRW826bp5PlJ9WkUXiPAwBYjbAD+912m90VhITbrmQcAKAucBgLAAA4GmEHAAA4GoexYL+tW6WyMikyUurVy+5qbLP14FaVVZYpMiJSvZLDdxwAwGqEHdhvxAgpL09KSZFyc+2uxjYjFo9QXnGeUjwpys0M33EAAKtxGAsAADgaYQcAADgaYQcAADgaYQcAADgaYQcAADgaYQcAADgaYQcAADgaYQcAADgaYQcAADiarWFn06ZNGj58uJKTk+VyubRs2bKA7S6Xq8Zl9uzZ/jbt2rWrtn3WrFmXuSe4JLt2SUVFp2/D2K7Ju1T0ZJF2TQ7vcQAAq9n6cxGlpaXq0aOH7r//fo0cObLa9vz8/ID7K1eu1IQJEzRq1KiA9c8995wmTpzov+/xeOqmYNQNXi9JksfNOABAXbA17GRkZCgjI+Oc2xMTEwPuv/vuu+rfv7+uuOKKgPUej6daWwAAAKkenbNz6NAhrVixQhMmTKi2bdasWWrRooWuvfZazZ49WxUVFefdl8/nk9frDVgAAIAz1ZtfPX/zzTfl8XiqHe762c9+pp49eyouLk4ffvihpk+frvz8fM2ZM+ec+8rKytLMmTPrumTU1pw5ktcrRUdLmZl2V2ObOdlz5PV5Fe2OVmZ6+I4DAFjNZYwxdhchnT4Z+Z133tEdd9xR4/YuXbro1ltv1UsvvXTe/bzxxht64IEHVFJSIrfbXWMbn88nn8/nv+/1epWamqqioiJFR0cH3YeatHtyRbV1+2cNs/Q56r3WraW8PCklRcrNtbsa27Se01p5xXlK8aQoNzN8xwEAasvr9SomJuaCn9/1Ymbn/fffV05Ojv7yl79csG1aWpoqKiq0f/9+de7cucY2brf7nEEIAAA4S704Z+f1119Xr1691KNHjwu23b59uxo0aKCEhITLUBkAAAh1ts7slJSUaM+ePf77+/bt0/bt2xUXF6c2bdpIOj1FtWTJEv32t7+t9vjs7Gxt3rxZ/fv3l8fjUXZ2tqZOnap77rlHzZs3v2z9AAAAocvWsLNlyxb179/ffz/zPyenjhs3TgsWLJAkLV68WMYYjR49utrj3W63Fi9erGeffVY+n0/t27fX1KlT/fsBAACwNez069dPFzo/etKkSZo0aVKN23r27KmPPvqoLkoDAAAOUS/O2QEAAAgWYQcAADgaYQcAADhavbjODhyuZ08pNVVq2dLuSmzVM6mnUmNS1TIqvMcBAKxG2IH93nvP7gpCwnujGQcAqAscxgIAAI5G2AEAAI5G2AEAAI7GOTuw3+23S0eOnD5BOYzP37n97dt15MQRtYxqyfk7AGAhwg7s9+mnUl6elJJidyW2+jT/U+UV5ynFE97jAABW4zAWAABwNMIOAABwNMIOAABwNMIOAABwNMIOAABwNMIOAABwNMIOAABwNMIOAABwNC4qCPtlZkperxQdbXcltspMz5TX51W0O7zHAQCsRtiB/TIz7a4gJGSmMw4AUBc4jAUAAByNsAMAAByNw1iwX3GxZIzkckkej93V2KbYVywjI5dc8rjDdxwAwGrM7MB+V10lxcScvg1jV829SjGzYnTV3PAeBwCwGmEHAAA4GmEHAAA4GmEHAAA4GmEHAAA4GmEHAAA4GmEHAAA4mq1hZ9OmTRo+fLiSk5Plcrm0bNmygO3jx4+Xy+UKWIYMGRLQprCwUGPHjlV0dLRiY2M1YcIElZSUXMZeAACAUGZr2CktLVWPHj00d+7cc7YZMmSI8vPz/cvbb78dsH3s2LH64osvtHr1ai1fvlybNm3SpEmT6rp0AABQT9h6BeWMjAxlZGSct43b7VZiYmKN23bt2qVVq1bpk08+Ue/evSVJL730koYOHarf/OY3Sk5OtrxmAABQv4T8z0Vs2LBBCQkJat68uW655RY9//zzatGihSQpOztbsbGx/qAjSQMHDlSDBg20efNm3XnnnTXu0+fzyefz+e97vd667QTO7913pbIyKTLS7kps9e7d76qsskyREeE9DgBgtZAOO0OGDNHIkSPVvn177d27V0899ZQyMjKUnZ2tiIgIFRQUKCEhIeAxDRs2VFxcnAoKCs6536ysLM2cObOuy0dt9epldwUhoVcy4wAAdSGkw87dd9/t/3e3bt3UvXt3dejQQRs2bNCAAQOC3u/06dOVmZnpv+/1epWamnpJtQIAgNBUr756fsUVVyg+Pl579uyRJCUmJurw4cMBbSoqKlRYWHjO83yk0+cBRUdHBywAAMCZQnpm52y5ubk6evSokpKSJEnp6ek6fvy4tm7dql7/ORSybt06VVVVKS0tzc5ScTGWL5dOnpSaNJFuu83uamyz/OvlOll+Uk0aNdFtV4bvOACA1WwNOyUlJf5ZGknat2+ftm/frri4OMXFxWnmzJkaNWqUEhMTtXfvXj3xxBPq2LGjBg8eLEm66qqrNGTIEE2cOFHz5s1TeXm5pkyZorvvvptvYtUnDz4o5eVJKSlSbq7d1djmweUPKq84TymeFOVmhu84AIDVbD2MtWXLFl177bW69tprJUmZmZm69tpr9cwzzygiIkI7duzQ7bffriuvvFITJkxQr1699P7778vtdvv3sXDhQnXp0kUDBgzQ0KFD1adPH7322mt2dQkAAIQYW2d2+vXrJ2PMObf/7//+7wX3ERcXp0WLFllZFgAAcJB6dYIyAADAxSLsAAAARyPsAAAARyPsAAAARyPsAAAARyPsAAAARyPswH7Nmkkez+nbMNYsspk8kR41iwzvcQAAq9Wrn4uAQ331ld0VhISvpjAOAFAXmNkBAACORtgBAACORtgBAACOxjk7sN/jj0vHjknNm0uzZ9tdjW0e/+fjOnbqmJo3bq7Zg8J3HADAaoQd2O/tt6W8PCklJazDzts731ZecZ5SPCmEHQCwEIexAACAoxF2AACAoxF2AACAoxF2AACAoxF2AACAoxF2AACAoxF2AACAoxF2AACAo3FRQdhv2DCpsFCKi7O7ElsN6zRMhacKFdc4vMcBAKxG2IH9Xn3V7gpCwqvDGQcAqAscxgIAAI5G2AEAAI4WVNj597//bXUdAAAAdSKosNOxY0f1799ff/7zn3Xq1Cmra0K46d1bat369G0Y6/1ab7We01q9XwvvcQAAqwUVdj799FN1795dmZmZSkxM1AMPPKCPP/7Y6toQLgoKpLy807dhrKCkQHnFeSooCe9xAACrBRV2fvCDH+j3v/+9Dh48qDfeeEP5+fnq06ePunbtqjlz5ujIkSNW1wkAABCUSzpBuWHDhho5cqSWLFmiX//619qzZ48ee+wxpaam6t5771V+fr5VdQIAAATlksLOli1b9JOf/ERJSUmaM2eOHnvsMe3du1erV6/WwYMHNWLECKvqBAAACEpQYWfOnDnq1q2bfvjDH+rgwYN666239M033+j5559X+/btddNNN2nBggX69NNPz7ufTZs2afjw4UpOTpbL5dKyZcv828rLyzVt2jR169ZNTZs2VXJysu69914dPHgwYB/t2rWTy+UKWGbNmhVMtwAAgAMFdQXlV155Rffff7/Gjx+vpKSkGtskJCTo9ddfP+9+SktL1aNHD91///0aOXJkwLYTJ07o008/1S9+8Qv16NFDx44d08MPP6zbb79dW7ZsCWj73HPPaeLEif77Ho8nmG4BAAAHCirs7N69+4JtIiMjNW7cuPO2ycjIUEZGRo3bYmJitHr16oB1f/zjH3X99dfrwIEDatOmjX+9x+NRYmJiLSoHAADhJqjDWPPnz9eSJUuqrV+yZInefPPNSy7qXIqKiuRyuRQbGxuwftasWWrRooWuvfZazZ49WxUVFefdj8/nk9frDVgAAIAzBRV2srKyFB8fX219QkKCfvWrX11yUTU5deqUpk2bptGjRys6Otq//mc/+5kWL16s9evX64EHHtCvfvUrPfHEE+fdV1ZWlmJiYvxLampqndQMAADsF9RhrAMHDqh9+/bV1rdt21YHDhy45KLOVl5erv/3//6fjDF65ZVXArZlZmb6/929e3dFRkbqgQceUFZWltxud437mz59esDjvF4vgcdOL7wgnTghRUXZXYmtXrj1BZ0oP6GoRuE9DgBgtaDCTkJCgnbs2KF27doFrP/ss8/UokULK+ryOxN0vvnmG61bty5gVqcmaWlpqqio0P79+9W5c+ca27jd7nMGIdhgzBi7KwgJY7oxDgBQF4IKO6NHj9bPfvYzeTwe9e3bV5K0ceNGPfzww7r77rstK+5M0Nm9e7fWr19fqyC1fft2NWjQQAkJCZbVAQAA6q+gws4vf/lL7d+/XwMGDFDDhqd3UVVVpXvvvfeiztkpKSnRnj17/Pf37dun7du3Ky4uTklJSfrRj36kTz/9VMuXL1dlZaUK/vPbSXFxcYqMjFR2drY2b96s/v37y+PxKDs7W1OnTtU999yj5s2bB9M1AADgMC5jjAn2wV9//bU+++wzNWnSRN26dVPbtm0v6vEbNmxQ//79q60fN26cnn322RrPC5Kk9evXq1+/fvr000/1k5/8RF999ZV8Pp/at2+vH//4x8rMzLyow1Rer1cxMTEqKiq64GGyi9XuyRXV1u2fNczS56j3cnKkigqpYUPpHIcew0HOdzmqqKpQwwYN1Tk+fMcBAGqrtp/flxR2nIKwY7PWrU//6nlKipSba3c1tmk9p7XyivOU4klRbmb4jgMA1FZtP7+DOoxVWVmpBQsWaO3atTp8+LCqqqoCtq9bty6Y3QIAAFguqLDz8MMPa8GCBRo2bJi6du0ql8tldV0AAACWCCrsLF68WH/96181dOhQq+sBAACwVFBXUI6MjFTHjh2trgUAAMByQYWdRx99VL///e/Fuc0AACDUBXUY64MPPtD69eu1cuVKXXPNNWrUqFHA9qVLl1pSHAAAwKUKKuzExsbqzjvvtLoWAAAAywUVdubPn291HQAAAHUiqHN2JKmiokJr1qzRq6++quLiYknSwYMHVVJSYllxAAAAlyqomZ1vvvlGQ4YM0YEDB+Tz+XTrrbfK4/Ho17/+tXw+n+bNm2d1nXCyTz6RKiuliAi7K7HVJxM/UaWpVIQrvMcBAKwW9EUFe/furc8++yzgl8jvvPNOTZw40bLiECaSkuyuICQkeRgHAKgLQYWd999/Xx9++KEiIyMD1rdr1055eXmWFAYAAGCFoM7ZqaqqUmVlZbX1ubm58ng8l1wUAACAVYKa2Rk0aJBefPFFvfbaa5Ikl8ulkpISzZgxg5+QwMV77TWppERq1kyaNMnuamzz2tbXVFJWomaRzTSpV/iOAwBYzWWCuAxybm6uBg8eLGOMdu/erd69e2v37t2Kj4/Xpk2blJCQUBe11pna/kR8MNo9uaLauv2zhln6HPVe69ZSXp6UkiLl5tpdjW1az2mtvOI8pXhSlJsZvuMAALVV28/voGZ2Wrdurc8++0yLFy/Wjh07VFJSogkTJmjs2LFq0qRJ0EUDAABYLaiwI0kNGzbUPffcY2UtAAAAlgsq7Lz11lvn3X7vvfcGVQwAAIDVgr7OzveVl5frxIkTioyMVFRUFGEHAACEjKC+en7s2LGApaSkRDk5OerTp4/efvttq2sEAAAIWtC/jXW2Tp06adasWdVmfQAAAOxkWdiRTp+0fPDgQSt3CQAAcEmCOmfnvffeC7hvjFF+fr7++Mc/6sYbb7SkMAAAACsEFXbuuOOOgPsul0stW7bULbfcot/+9rdW1IVwcuWVUkyM1KqV3ZXY6soWVyqmcYxaNQ3vcQAAqwUVdqqqqqyuA+Fs3Tq7KwgJ68YxDgBQFyw9ZwcAACDUBDWzk5mZWeu2c+bMCeYpAAAALBFU2Nm2bZu2bdum8vJyde7cWZL09ddfKyIiQj179vS3c7lc1lQJAAAQpKDCzvDhw+XxePTmm2+qefPmkk5faPC+++7TTTfdpEcffdTSIuFwY8dK330nxcdLCxfaXY1txi4dq+9OfKf4qHgtHBm+4wAAVgsq7Pz2t7/VP//5T3/QkaTmzZvr+eef16BBgwg7uDgbN0p5eVJKit2V2Grj/o3KK85Tiie8xwEArBbUCcper1dHjhyptv7IkSMqLi6u9X42bdqk4cOHKzk5WS6XS8uWLQvYbozRM888o6SkJDVp0kQDBw7U7t27A9oUFhZq7Nixio6OVmxsrCZMmKCSkpJgugUAABwoqLBz55136r777tPSpUuVm5ur3Nxc/c///I8mTJigkSNH1no/paWl6tGjh+bOnVvj9hdeeEF/+MMfNG/ePG3evFlNmzbV4MGDderUKX+bsWPH6osvvtDq1au1fPlybdq0SZMmTQqmWwAAwIGCOow1b948PfbYYxozZozKy8tP76hhQ02YMEGzZ8+u9X4yMjKUkZFR4zZjjF588UU9/fTTGjFihCTprbfeUqtWrbRs2TLdfffd2rVrl1atWqVPPvlEvXv3liS99NJLGjp0qH7zm98oOTk5mO4BAAAHCWpmJyoqSi+//LKOHj3q/2ZWYWGhXn75ZTVt2tSSwvbt26eCggINHDjQvy4mJkZpaWnKzs6WJGVnZys2NtYfdCRp4MCBatCggTZv3nzOfft8Pnm93oAFAAA40yVdVDA/P1/5+fnq1KmTmjZtKmOMVXWpoKBAktTqrJ8QaNWqlX9bQUGBEhISArY3bNhQcXFx/jY1ycrKUkxMjH9JTU21rG4AABBaggo7R48e1YABA3TllVdq6NChys/PlyRNmDChXnwTa/r06SoqKvIv3377rd0lAQCAOhJU2Jk6daoaNWqkAwcOKCoqyr/+rrvu0qpVqywpLDExUZJ06NChgPWHDh3yb0tMTNThw4cDtldUVKiwsNDfpiZut1vR0dEBCwAAcKagws4///lP/frXv1br1q0D1nfq1EnffPONJYW1b99eiYmJWrt2rX+d1+vV5s2blZ6eLklKT0/X8ePHtXXrVn+bdevWqaqqSmlpaZbUAQAA6regvo1VWloaMKNzRmFhodxud633U1JSoj179vjv79u3T9u3b1dcXJzatGmjRx55RM8//7w6deqk9u3b6xe/+IWSk5N1xx13SJKuuuoqDRkyRBMnTtS8efNUXl6uKVOm6O677+abWPXJxIlSUZEUE2N3Jbaa2HOiinxFinGH9zgAgNWCCjs33XST3nrrLf3yl7+UdPo3sKqqqvTCCy+of//+td7Pli1bAtqf+YHRcePGacGCBXriiSdUWlqqSZMm6fjx4+rTp49WrVqlxo0b+x+zcOFCTZkyRQMGDFCDBg00atQo/eEPfwimW7DLjBl2VxASZvRjHACgLrhMEF+h2rlzpwYMGKCePXtq3bp1uv322/XFF1+osLBQ//rXv9ShQ4e6qLXOeL1excTEqKioyPLzd9o9uaLauv2zhln6HAAAhKPafn4Hdc5O165d9fXXX6tPnz4aMWKESktLNXLkSG3btq3eBR0AAOBsF30Yq7y8XEOGDNG8efP085//vC5qAgAAsMxFz+w0atRIO3bsqItaEK5at5ZcrtO3Yaz1nNZyzXSp9ZzwHgcAsFpQh7Huuecevf7661bXAgAAYLmgvo1VUVGhN954Q2vWrFGvXr2q/R7WnDlzLCkOAADgUl1U2Pn3v/+tdu3aaefOnerZs6ck6euvvw5o43K5rKsOAADgEl1U2OnUqZPy8/O1fv16Sad/HuIPf/hDtR/rBAAACBUXdc7O2ZfkWblypUpLSy0tCAAAwEpBnaB8RhDXIwQAALisLirsuFyuaufkcI4OAAAIZRd1zo4xRuPHj/f/2OepU6f04IMPVvs21tKlS62r0IHO/gkJfj4CAIC6c1FhZ9y4cQH377nnHkuLAQAAsNpFhZ358+fXVR0IZ3/+s+TzSf+ZMQxXfx75Z/kqfHI3DO9xAACrBXVRQcBS/frZXUFI6Neun90lAIAjXdK3sQAAAEIdYQcAADgah7Fgvw0b/u+cnTA+pLVh/wb/OTsc0gIA6xB2YL977pHy8qSUFCk31+5qbHPP0nuUV5ynFE+KcjPDdxwAwGocxgIAAI5G2AEAAI5G2AEAAI5G2AEAAI5G2AEAAI5G2AEAAI5G2AEAAI5G2AEAAI5G2AEAAI7GFZRhvzC+avL3cdVkAKgbzOwAAABHI+wAAABHI+wAAABHC/mw065dO7lcrmrL5MmTJUn9+vWrtu3BBx+0uWpclJkzpczM07dhbOaGmcr830zN3BDe4wAAVgv5E5Q/+eQTVVZW+u/v3LlTt956q/7rv/7Lv27ixIl67rnn/PejoqIua424RH/6k5SXJ6WkSDNm2F2Nbf706Z+UV5ynFE+KZvQL33EAAKuFfNhp2bJlwP1Zs2apQ4cOuvnmm/3roqKilJiYeLlLAwAA9UDIH8b6vrKyMv35z3/W/fffL5fL5V+/cOFCxcfHq2vXrpo+fbpOnDhx3v34fD55vd6ABQAAOFPIz+x837Jly3T8+HGNHz/ev27MmDFq27atkpOTtWPHDk2bNk05OTlaunTpOfeTlZWlmWF+fggAAOGiXoWd119/XRkZGUpOTvavmzRpkv/f3bp1U1JSkgYMGKC9e/eqQ4cONe5n+vTpyszM9N/3er1KTU2tu8IBAIBt6k3Y+eabb7RmzZrzzthIUlpamiRpz5495ww7brdbbrfb8hoBAEDoqTfn7MyfP18JCQkaNmzYedtt375dkpSUlHQZqgIAAKGuXszsVFVVaf78+Ro3bpwaNvy/kvfu3atFixZp6NChatGihXbs2KGpU6eqb9++6t69u40VAwCAUFEvws6aNWt04MAB3X///QHrIyMjtWbNGr344osqLS1VamqqRo0apaefftqmSgEAQKipF2Fn0KBBMsZUW5+amqqNGzfaUBEsdfPN0nffSfHxdldiq5vb3azvTnyn+KjwHgcAsFq9CDtwuIUL7a4gJCwcyTgAQF2oNycoAwAABIOwAwAAHI2wAwAAHI1zdmC/W26RDh2SWrWS1q2zuxrb3PLmLTpUekitmrbSunHhOw4AYDXCDuz39ddSXp5UVGR3Jbb6+ujXyivOU9Gp8B4HALAah7EAAICjEXYAAICjEXYAAICjEXYAAICjEXYAAICjEXYAAICjEXYAAICjEXYAAICjcVFB2O+ZZ6SSEqlZM7srsdUzNz+jkrISNYsM73EAAKsRdmC/SZPsriAkTOrFOABAXeAwFgAAcDTCDgAAcDQOY8F++flSZaUUESElJdldjW3yi/NVaSoV4YpQkid8xwEArEbYgf2uu+70r56npEi5uXZXY5vr/nSd8orzlOJJUW5m+I4DAFiNw1gAAMDRCDsAAMDRCDsAAMDRCDsAAMDRCDsAAMDRCDsAAMDRCDsAAMDRCDsAAMDRCDsAAMDRuIIy7Ld2rVRRITUM77fj2nvXqqKqQg0bhPc4AIDVQnpm59lnn5XL5QpYunTp4t9+6tQpTZ48WS1atFCzZs00atQoHTp0yMaKEZTOnaVrrjl9G8Y6x3fWNQnXqHN8eI8DAFgtpMOOJF1zzTXKz8/3Lx988IF/29SpU/X3v/9dS5Ys0caNG3Xw4EGNHDnSxmoBAECoCfn58oYNGyoxMbHa+qKiIr3++utatGiRbrnlFknS/PnzddVVV+mjjz7SDTfccLlLBQAAISjkw87u3buVnJysxo0bKz09XVlZWWrTpo22bt2q8vJyDRw40N+2S5cuatOmjbKzs88bdnw+n3w+n/++1+ut0z7gAhYtkk6ckKKipDFj7K7GNos+X6QT5ScU1ShKY7qF7zgAgNVCOuykpaVpwYIF6ty5s/Lz8zVz5kzddNNN2rlzpwoKChQZGanY2NiAx7Rq1UoFBQXn3W9WVpZmzpxZh5XjojzxhJSXJ6WkhHXYeWL1E8orzlOKJ4WwAwAWCumwk5GR4f939+7dlZaWprZt2+qvf/2rmjRpEvR+p0+frszMTP99r9er1NTUS6oVAACEppA/Qfn7YmNjdeWVV2rPnj1KTExUWVmZjh8/HtDm0KFDNZ7j831ut1vR0dEBCwAAcKZ6FXZKSkq0d+9eJSUlqVevXmrUqJHWrl3r356Tk6MDBw4oPT3dxioBAEAoCenDWI899piGDx+utm3b6uDBg5oxY4YiIiI0evRoxcTEaMKECcrMzFRcXJyio6P105/+VOnp6XwTCwAA+IV02MnNzdXo0aN19OhRtWzZUn369NFHH32kli1bSpJ+97vfqUGDBho1apR8Pp8GDx6sl19+2eaqAQBAKAnpsLN48eLzbm/cuLHmzp2ruXPnXqaKAABAfVOvztkBAAC4WIQdAADgaCF9GAth4sylAi5wyQCnS2yWGHALALAGYQf227LF7gpCwpZJjAMA1AUOYwEAAEcj7AAAAEcj7AAAAEfjnB3Y74EHpMJCKS5OevVVu6uxzQN/f0CFpwoV1zhOrw4P33EAAKsRdmC/FSukvDwpJcXuSmy1YvcK5RXnKcUT3uMAAFbjMBYAAHA0wg4AAHA0wg4AAHA0wg4AAHA0wg4AAHA0wg4AAHA0vnoeAto9uaLauv2zhtlQCQAAzsPMDgAAcDRmdmC/0aOlY8ek5s3trsRWo7uO1rFTx9S8cXiPAwBYjbAD+82ebXcFIWH2IMYBAOoCh7EAAICjEXYAAICjEXYAAICjEXZgvy5dpOjo07dhrMsfuyg6K1pd/hje4wAAViPswH4lJVJx8enbMFZSVqLismKVlIX3OACA1Qg7AADA0Qg7AADA0bjOTog6+yck+PkIAACCw8wOAABwNMIOAABwNMIOAABwNMIOAABwtJAOO1lZWbruuuvk8XiUkJCgO+64Qzk5OQFt+vXrJ5fLFbA8+OCDNlUMAABCTUh/G2vjxo2aPHmyrrvuOlVUVOipp57SoEGD9OWXX6pp06b+dhMnTtRzzz3nvx8VFWVHuQjWvHnSyZNSkyZ2V2KrebfN08nyk2rSKLzHAQCsFtJhZ9WqVQH3FyxYoISEBG3dulV9+/b1r4+KilJiYuLlLg9Wue02uysICbddyTgAQF0I6cNYZysqKpIkxcXFBaxfuHCh4uPj1bVrV02fPl0nTpw47358Pp+8Xm/AAgAAnCmkZ3a+r6qqSo888ohuvPFGde3a1b9+zJgxatu2rZKTk7Vjxw5NmzZNOTk5Wrp06Tn3lZWVpZkzZ16OsgEAgM1cxhhjdxG18dBDD2nlypX64IMP1Lp163O2W7dunQYMGKA9e/aoQ4cONbbx+Xzy+Xz++16vV6mpqSoqKlJ0dLSldZ99JWQrOeaqylu3SmVlUmSk1KuX3dXYZuvBrSqrLFNkRKR6JYfvOABAbXm9XsXExFzw87tezOxMmTJFy5cv16ZNm84bdCQpLS1Nks4bdtxut9xut+V1IkgjRkh5eVJKipSba3c1thmxeITyivOU4klRbmb4jgMAWC2kw44xRj/96U/1zjvvaMOGDWrfvv0FH7N9+3ZJUlJSUh1XBwAA6oOQDjuTJ0/WokWL9O6778rj8aigoECSFBMToyZNmmjv3r1atGiRhg4dqhYtWmjHjh2aOnWq+vbtq+7du9tcPQAACAUhHXZeeeUVSacvHPh98+fP1/jx4xUZGak1a9boxRdfVGlpqVJTUzVq1Cg9/fTTNlQLAABCUUiHnQudO52amqqNGzdepmoAAEB9VK+uswMAAHCxCDsAAMDRCDsAAMDRQvqcHZzf2RcsrMuLDF7O5wIAwErM7AAAAEdjZgf227VLMkZyueyuxFa7Ju+SkZFL4T0OAGA1wg7s5/HYXUFI8LgZBwCoCxzGAgAAjkbYAQAAjsZhLNhvzhzJ65Wio6XMTLursc2c7Dny+ryKdkcrMz18xwEArEbYgf3mzJHy8qSUlLAPO3nFeUrxpBB2AMBChB3Yqt2TK5RddEpJkvKLTin9yRXVruFz9jV+JK7zAwCoPc7ZAQAAjsbMTpipaZbkbMyaAACchLCDkFObQAYAQG1xGAsAADgaMzsOYtWMSG32w0nDAID6gpkdAADgaMzswLHOnn1i5gkAwhNhB7bbmdhB+SfidTQq5rI/dygFop5JPZUak6qWUS1tqwEAnIiwA9tNHPWM3SWEhPdGv2d3CQDgSIQd1Bm+Qh4olGaRACCccIIyAABwNGZ2AJvU5dWsuTQAAPwfwg4sE+xhqz/9z3NqcaJIR6Nigj5/xwmHzA5HPqdKV5EiTIwSykL3PKZgQlp9CF/BHGYMtl8c0gQuL8IObNe1YK+SSo4qv1kLu0uxVVmDvap0HVWECe9xAACrEXZQL9XVTE6o/U+9PsyIBMuqMavNfphJAcIbJygDAABHY2YHuEjB/nZYXQqmplCbAbFqFsvu87eYRQJCD2EHYcPuD0GrOKUfwXBq3518uNIK9XF8alNzbd/Pl/NQulPDumMOY82dO1ft2rVT48aNlZaWpo8//tjukgAAQAhwxMzOX/7yF2VmZmrevHlKS0vTiy++qMGDBysnJ0cJCQl2l4d6LpxmE4JpY+XjnKg+jEV9/Np9XV6n6kLPVR9mOy7n+NQHjpjZmTNnjiZOnKj77rtPV199tebNm6eoqCi98cYbdpcGAABsVu9ndsrKyrR161ZNnz7dv65BgwYaOHCgsrOza3yMz+eTz+fz3y8qKpIkeb1ey+ur8p2wfJ9OU2yq1PQ/t+E8XsZVdfpW4T0O39dm6hLb9ltTm50zBwfcr8vX6ey/R11n/G9Q+zm75trsp6a/hbV5XDCvV12O89n7Pnu/UvV+1dTm7OevzWdFTTWf/bja9ivY57uQ2rxewX4u1mZcrXCmPmPM+Ruaei4vL89IMh9++GHA+scff9xcf/31NT5mxowZRhILCwsLCwuLA5Zvv/32vFmh3s/sBGP69OnKzMz036+qqlJhYaFatGghl8tlyXN4vV6lpqbq22+/VXR0tCX7rC/Cue9SePefvodn36Xw7n84912yt//GGBUXFys5Ofm87ep92ImPj1dERIQOHToUsP7QoUNKTEys8TFut1tutztgXWxsbJ3UFx0dHZZvfim8+y6Fd//pe3j2XQrv/odz3yX7+h8TE3PBNvX+BOXIyEj16tVLa9eu9a+rqqrS2rVrlZ6ebmNlAAAgFNT7mR1JyszM1Lhx49S7d29df/31evHFF1VaWqr77rvP7tIAAIDNHBF27rrrLh05ckTPPPOMCgoK9IMf/ECrVq1Sq1atbKvJ7XZrxowZ1Q6XhYNw7rsU3v2n7+HZdym8+x/OfZfqR/9dxlzo+1oAAAD1V70/ZwcAAOB8CDsAAMDRCDsAAMDRCDsAAMDRCDt1YO7cuWrXrp0aN26stLQ0ffzxx3aXdMmeffZZuVyugKVLly7+7adOndLkyZPVokULNWvWTKNGjap2occDBw5o2LBhioqKUkJCgh5//HFVVFRc7q7UyqZNmzR8+HAlJyfL5XJp2bJlAduNMXrmmWeUlJSkJk2aaODAgdq9e3dAm8LCQo0dO1bR0dGKjY3VhAkTVFJSEtBmx44duummm9S4cWOlpqbqhRdeqOuuXdCF+j5+/Phq74UhQ4YEtKmvfc/KytJ1110nj8ejhIQE3XHHHcrJyQloY9V7fcOGDerZs6fcbrc6duyoBQsW1HX3zqs2fe/Xr1+11/7BBx8MaFMf+y5Jr7zyirp37+6/MF56erpWrlzp3+7U1126cN8d8bpb8gNV8Fu8eLGJjIw0b7zxhvniiy/MxIkTTWxsrDl06JDdpV2SGTNmmGuuucbk5+f7lyNHjvi3P/jggyY1NdWsXbvWbNmyxdxwww3mhz/8oX97RUWF6dq1qxk4cKDZtm2b+cc//mHi4+PN9OnT7ejOBf3jH/8wP//5z83SpUuNJPPOO+8EbJ81a5aJiYkxy5YtM5999pm5/fbbTfv27c3Jkyf9bYYMGWJ69OhhPvroI/P++++bjh07mtGjR/u3FxUVmVatWpmxY8eanTt3mrfffts0adLEvPrqq5ermzW6UN/HjRtnhgwZEvBeKCwsDGhTX/s+ePBgM3/+fLNz506zfft2M3ToUNOmTRtTUlLib2PFe/3f//63iYqKMpmZmebLL780L730komIiDCrVq26rP39vtr0/eabbzYTJ04MeO2Lior82+tr340x5r333jMrVqwwX3/9tcnJyTFPPfWUadSokdm5c6cxxrmvuzEX7rsTXnfCjsWuv/56M3nyZP/9yspKk5ycbLKysmys6tLNmDHD9OjRo8Ztx48fN40aNTJLlizxr9u1a5eRZLKzs40xpz9AGzRoYAoKCvxtXnnlFRMdHW18Pl+d1n6pzv7Ar6qqMomJiWb27Nn+dcePHzdut9u8/fbbxhhjvvzySyPJfPLJJ/42K1euNC6Xy+Tl5RljjHn55ZdN8+bNA/o/bdo007lz5zruUe2dK+yMGDHinI9xSt+NMebw4cNGktm4caMxxrr3+hNPPGGuueaagOe66667zODBg+u6S7V2dt+NOf2h9/DDD5/zMU7p+xnNmzc3//3f/x1Wr/sZZ/pujDNedw5jWaisrExbt27VwIED/esaNGiggQMHKjs728bKrLF7924lJyfriiuu0NixY3XgwAFJ0tatW1VeXh7Q7y5duqhNmzb+fmdnZ6tbt24BF3ocPHiwvF6vvvjii8vbkUu0b98+FRQUBPQ3JiZGaWlpAf2NjY1V7969/W0GDhyoBg0aaPPmzf42ffv2VWRkpL/N4MGDlZOTo2PHjl2m3gRnw4YNSkhIUOfOnfXQQw/p6NGj/m1O6ntRUZEkKS4uTpJ17/Xs7OyAfZxpE0p/J87u+xkLFy5UfHy8unbtqunTp+vEiRP+bU7pe2VlpRYvXqzS0lKlp6eH1et+dt/PqO+vuyOuoBwqvvvuO1VWVla7cnOrVq301Vdf2VSVNdLS0rRgwQJ17txZ+fn5mjlzpm666Sbt3LlTBQUFioyMrPZjqq1atVJBQYEkqaCgoMZxObOtPjlTb039+X5/ExISArY3bNhQcXFxAW3at29fbR9ntjVv3rxO6r9UQ4YM0ciRI9W+fXvt3btXTz31lDIyMpSdna2IiAjH9L2qqkqPPPKIbrzxRnXt2lWSLHuvn6uN1+vVyZMn1aRJk7roUq3V1HdJGjNmjNq2bavk5GTt2LFD06ZNU05OjpYuXSqp/vf9888/V3p6uk6dOqVmzZrpnXfe0dVXX63t27c7/nU/V98lZ7zuhB3USkZGhv/f3bt3V1pamtq2bau//vWvtv9hxuV19913+//drVs3de/eXR06dNCGDRs0YMAAGyuz1uTJk7Vz50598MEHdpdy2Z2r75MmTfL/u1u3bkpKStKAAQO0d+9edejQ4XKXabnOnTtr+/btKioq0t/+9jeNGzdOGzdutLusy+Jcfb/66qsd8bpzGMtC8fHxioiIqHaG/qFDh5SYmGhTVXUjNjZWV155pfbs2aPExESVlZXp+PHjAW2+3+/ExMQax+XMtvrkTL3ne50TExN1+PDhgO0VFRUqLCx03JhcccUVio+P1549eyQ5o+9TpkzR8uXLtX79erVu3dq/3qr3+rnaREdH2/6fh3P1vSZpaWmSFPDa1+e+R0ZGqmPHjurVq5eysrLUo0cP/f73vw+L1/1cfa9JfXzdCTsWioyMVK9evbR27Vr/uqqqKq1duzbg2KcTlJSUaO/evUpKSlKvXr3UqFGjgH7n5OTowIED/n6np6fr888/D/gQXL16taKjo/1TpfVF+/btlZiYGNBfr9erzZs3B/T3+PHj2rp1q7/NunXrVFVV5f9DkZ6erk2bNqm8vNzfZvXq1ercuXNIHMaprdzcXB09elRJSUmS6nffjTGaMmWK3nnnHa1bt67aoTar3uvp6ekB+zjTxs6/Exfqe022b98uSQGvfX3s+7lUVVXJ5/M5+nU/lzN9r0m9fN0vy2nQYWTx4sXG7XabBQsWmC+//NJMmjTJxMbGBpylXh89+uijZsOGDWbfvn3mX//6lxk4cKCJj483hw8fNsac/lpmmzZtzLp168yWLVtMenq6SU9P9z/+zFcTBw0aZLZv325WrVplWrZsGbJfPS8uLjbbtm0z27ZtM5LMnDlzzLZt28w333xjjDn91fPY2Fjz7rvvmh07dpgRI0bU+NXza6+91mzevNl88MEHplOnTgFfvz5+/Lhp1aqV+fGPf2x27txpFi9ebKKiomz/+vX5+l5cXGwee+wxk52dbfbt22fWrFljevbsaTp16mROnTrl30d97ftDDz1kYmJizIYNGwK+ZnvixAl/Gyve62e+hvv444+bXbt2mblz59r+FeQL9X3Pnj3mueeeM1u2bDH79u0z7777rrniiitM3759/fuor303xpgnn3zSbNy40ezbt8/s2LHDPPnkk8blcpl//vOfxhjnvu7GnL/vTnndCTt14KWXXjJt2rQxkZGR5vrrrzcfffSR3SVdsrvuusskJSWZyMhIk5KSYu666y6zZ88e//aTJ0+an/zkJ6Z58+YmKirK3HnnnSY/Pz9gH/v37zcZGRmmSZMmJj4+3jz66KOmvLz8cnelVtavX28kVVvGjRtnjDn99fNf/OIXplWrVsbtdpsBAwaYnJycgH0cPXrUjB492jRr1sxER0eb++67zxQXFwe0+eyzz0yfPn2M2+02KSkpZtasWZeri+d0vr6fOHHCDBo0yLRs2dI0atTItG3b1kycOLFamK+vfa+p35LM/Pnz/W2seq+vX7/e/OAHPzCRkZHmiiuuCHgOO1yo7wcOHDB9+/Y1cXFxxu12m44dO5rHH3884HorxtTPvhtjzP3332/atm1rIiMjTcuWLc2AAQP8QccY577uxpy/70553V3GGHN55pAAAAAuP87ZAQAAjkbYAQAAjkbYAQAAjkbYAQAAjkbYAQAAjkbYAQAAjkbYAQAAjkbYAQAAjkbYAQAAjkbYAQAAjkbYAQAAjkbYAQAAjvb/AeumRhJ66noPAAAAAElFTkSuQmCC",
|
| 535 |
+
"text/plain": [
|
| 536 |
+
"<Figure size 640x480 with 1 Axes>"
|
| 537 |
+
]
|
| 538 |
+
},
|
| 539 |
+
"metadata": {},
|
| 540 |
+
"output_type": "display_data"
|
| 541 |
+
}
|
| 542 |
+
],
|
| 543 |
+
"source": [
|
| 544 |
+
"df_len_explore['Map Data'].str.len().plot(kind='hist', bins=100)\n",
|
| 545 |
+
"plt.axvline(df_len_explore['Map Data'].str.len().mean(), color='red', linestyle='dashed', linewidth=2)\n",
|
| 546 |
+
"plt.axvline(df_len_explore['Map Data'].str.len().quantile(0.80), color='green', linestyle='dashed', linewidth=2)"
|
| 547 |
+
]
|
| 548 |
+
},
|
| 549 |
+
{
|
| 550 |
+
"cell_type": "code",
|
| 551 |
+
"execution_count": 59,
|
| 552 |
+
"metadata": {},
|
| 553 |
+
"outputs": [
|
| 554 |
+
{
|
| 555 |
+
"name": "stdout",
|
| 556 |
+
"output_type": "stream",
|
| 557 |
+
"text": [
|
| 558 |
+
"Original data length: 1225\n",
|
| 559 |
+
"Number of blanks: 407\n",
|
| 560 |
+
"Number of strings with length > 5000: 27\n",
|
| 561 |
+
"Number of useful rows: 791\n"
|
| 562 |
+
]
|
| 563 |
+
}
|
| 564 |
+
],
|
| 565 |
+
"source": [
|
| 566 |
+
"## Final Summary\n",
|
| 567 |
+
"\n",
|
| 568 |
+
"print('Original data length: ', len(df))\n",
|
| 569 |
+
"print('Number of blanks: ', len(df) - len(df[df['Map Data'].str.len() > 0]))\n",
|
| 570 |
+
"print('Number of strings with length > 5000: ', len(df[df['Map Data'].str.len() > 5000]))\n",
|
| 571 |
+
"print('Number of useful rows: ', len(df_len_explore))"
|
| 572 |
+
]
|
| 573 |
+
},
|
| 574 |
+
{
|
| 575 |
+
"cell_type": "code",
|
| 576 |
+
"execution_count": 62,
|
| 577 |
+
"metadata": {},
|
| 578 |
+
"outputs": [
|
| 579 |
+
{
|
| 580 |
+
"name": "stdout",
|
| 581 |
+
"output_type": "stream",
|
| 582 |
+
"text": [
|
| 583 |
+
"Post cleaning data length: 791\n",
|
| 584 |
+
"Avg string length: 596.3046776232617\n",
|
| 585 |
+
"Median string length: 192.0\n",
|
| 586 |
+
"25th percentile string length: 43.5\n",
|
| 587 |
+
"80th percentile string length: 1114.0\n"
|
| 588 |
+
]
|
| 589 |
+
}
|
| 590 |
+
],
|
| 591 |
+
"source": [
|
| 592 |
+
"print('Post cleaning data length: ', len(df_len_explore))\n",
|
| 593 |
+
"print('Avg string length: ', df_len_explore['Map Data'].str.len().mean())\n",
|
| 594 |
+
"print('Median string length: ', df_len_explore['Map Data'].str.len().median())\n",
|
| 595 |
+
"print('25th percentile string length: ', df_len_explore['Map Data'].str.len().quantile(0.25)) \n",
|
| 596 |
+
"print('80th percentile string length: ', df_len_explore['Map Data'].str.len().quantile(0.8))"
|
| 597 |
+
]
|
| 598 |
+
},
|
| 599 |
+
{
|
| 600 |
+
"cell_type": "markdown",
|
| 601 |
+
"metadata": {},
|
| 602 |
+
"source": [
|
| 603 |
+
"Highly uneven string length distribution.\n",
|
| 604 |
+
"- 33% of the total data is useless, ie. blank rows\n",
|
| 605 |
+
"- 25% of the strings have length less than 45 characters\n",
|
| 606 |
+
"- 50% of the strings have length less than 200 characters\n",
|
| 607 |
+
"- 30% of the strings have length between 200 - 1100 characters (huge variation)\n",
|
| 608 |
+
"- 20% of the strings have length greater than 1100 characters\n",
|
| 609 |
+
"- about 5% of strings are longer than 5000 characters (wont be used in training)"
|
| 610 |
+
]
|
| 611 |
+
},
|
| 612 |
+
{
|
| 613 |
+
"cell_type": "code",
|
| 614 |
+
"execution_count": null,
|
| 615 |
+
"metadata": {},
|
| 616 |
+
"outputs": [],
|
| 617 |
+
"source": [
|
| 618 |
+
"## function to clean the given data frame\n",
|
| 619 |
+
"\n",
|
| 620 |
+
"def clean_text(text):\n",
|
| 621 |
+
" text = re.sub(r'[^\\w\\s]', '', text)\n",
|
| 622 |
+
" text = text.lower()\n",
|
| 623 |
+
" text = [word for word in text.split() if word not in stop_words]\n",
|
| 624 |
+
" text = [stemmer.stem(word) for word in text] \n",
|
| 625 |
+
" text = [lemmatizer.lemmatize(word) for word in text]\n",
|
| 626 |
+
" return ' '.join(text)\n",
|
| 627 |
+
"\n",
|
| 628 |
+
"def clean_data(df):\n",
|
| 629 |
+
" df['Map Data'] = df['Map Data'].fillna('')\n",
|
| 630 |
+
" df = df[df['Map Data'].str.len() > 0]\n",
|
| 631 |
+
" df = df[df['Map Data'].str.len() < 5000]\n",
|
| 632 |
+
" df['Map Data'] = df['Map Data'].apply(clean_text)\n",
|
| 633 |
+
" return df"
|
| 634 |
+
]
|
| 635 |
+
}
|
| 636 |
+
],
|
| 637 |
+
"metadata": {
|
| 638 |
+
"kernelspec": {
|
| 639 |
+
"display_name": "Python 3",
|
| 640 |
+
"language": "python",
|
| 641 |
+
"name": "python3"
|
| 642 |
+
},
|
| 643 |
+
"language_info": {
|
| 644 |
+
"codemirror_mode": {
|
| 645 |
+
"name": "ipython",
|
| 646 |
+
"version": 3
|
| 647 |
+
},
|
| 648 |
+
"file_extension": ".py",
|
| 649 |
+
"mimetype": "text/x-python",
|
| 650 |
+
"name": "python",
|
| 651 |
+
"nbconvert_exporter": "python",
|
| 652 |
+
"pygments_lexer": "ipython3",
|
| 653 |
+
"version": "3.12.0"
|
| 654 |
+
}
|
| 655 |
+
},
|
| 656 |
+
"nbformat": 4,
|
| 657 |
+
"nbformat_minor": 2
|
| 658 |
+
}
|
notebooks/TFIDF.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
notebooks/__init__.py
DELETED
|
File without changes
|
notebooks/data_loading.ipynb
ADDED
|
@@ -0,0 +1,996 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"## __Data Pipelines__ \n",
|
| 8 |
+
"Loading data from OpenStreetMap using overpass API"
|
| 9 |
+
]
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"cell_type": "code",
|
| 13 |
+
"execution_count": 60,
|
| 14 |
+
"metadata": {},
|
| 15 |
+
"outputs": [],
|
| 16 |
+
"source": [
|
| 17 |
+
"import requests\n",
|
| 18 |
+
"import pandas as pd\n",
|
| 19 |
+
"import re\n",
|
| 20 |
+
"import math\n",
|
| 21 |
+
"from typing import Tuple, List, Dict"
|
| 22 |
+
]
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"cell_type": "code",
|
| 26 |
+
"execution_count": 97,
|
| 27 |
+
"metadata": {},
|
| 28 |
+
"outputs": [],
|
| 29 |
+
"source": [
|
| 30 |
+
"def fetch_osm_data(lat: float, lon: float, radius: int) -> List[Dict]:\n",
|
| 31 |
+
" overpass_url = \"http://overpass-api.de/api/interpreter\"\n",
|
| 32 |
+
" overpass_query = f\"\"\"\n",
|
| 33 |
+
" [out:json];\n",
|
| 34 |
+
" (\n",
|
| 35 |
+
" node[\"name\"](around:{radius},{lat},{lon});\n",
|
| 36 |
+
" way[\"name\"](around:{radius},{lat},{lon});\n",
|
| 37 |
+
" relation[\"name\"](around:{radius},{lat},{lon});\n",
|
| 38 |
+
" );\n",
|
| 39 |
+
" out center;\n",
|
| 40 |
+
" \"\"\"\n",
|
| 41 |
+
" \n",
|
| 42 |
+
" response = requests.get(overpass_url, params={'data': overpass_query})\n",
|
| 43 |
+
" data = response.json()\n",
|
| 44 |
+
" return data['elements']\n",
|
| 45 |
+
"\n",
|
| 46 |
+
"def determine_location_type(tags: Dict[str, str]) -> str:\n",
|
| 47 |
+
" # Residential\n",
|
| 48 |
+
" if 'building' in tags and tags['building'] in ['residential', 'house', 'apartments', 'detached', 'terrace', 'dormitory', 'bungalow']:\n",
|
| 49 |
+
" return 'Residential'\n",
|
| 50 |
+
" \n",
|
| 51 |
+
" # Commercial\n",
|
| 52 |
+
" if any(key in tags for key in ['shop', 'office', 'craft']):\n",
|
| 53 |
+
" return 'Commercial'\n",
|
| 54 |
+
" if 'building' in tags and tags['building'] in ['commercial', 'office', 'retail', 'supermarket', 'kiosk']:\n",
|
| 55 |
+
" return 'Commercial'\n",
|
| 56 |
+
" \n",
|
| 57 |
+
" # Industrial\n",
|
| 58 |
+
" if 'building' in tags and tags['building'] in ['industrial', 'warehouse', 'factory', 'manufacture']:\n",
|
| 59 |
+
" return 'Industrial'\n",
|
| 60 |
+
" if 'industrial' in tags or 'industry' in tags:\n",
|
| 61 |
+
" return 'Industrial'\n",
|
| 62 |
+
" \n",
|
| 63 |
+
" # Educational\n",
|
| 64 |
+
" if 'amenity' in tags and tags['amenity'] in ['school', 'university', 'college', 'library', 'kindergarten', 'language_school']:\n",
|
| 65 |
+
" return 'Educational'\n",
|
| 66 |
+
" \n",
|
| 67 |
+
" # Healthcare\n",
|
| 68 |
+
" if 'amenity' in tags and tags['amenity'] in ['hospital', 'clinic', 'doctors', 'dentist', 'pharmacy', 'veterinary']:\n",
|
| 69 |
+
" return 'Healthcare'\n",
|
| 70 |
+
" \n",
|
| 71 |
+
" # Food & Drink\n",
|
| 72 |
+
" if 'amenity' in tags and tags['amenity'] in ['restaurant', 'cafe', 'bar', 'fast_food', 'pub', 'food_court']:\n",
|
| 73 |
+
" return 'Food & Drink'\n",
|
| 74 |
+
" \n",
|
| 75 |
+
" # Leisure & Entertainment\n",
|
| 76 |
+
" if 'leisure' in tags or 'tourism' in tags:\n",
|
| 77 |
+
" return 'Leisure & Entertainment'\n",
|
| 78 |
+
" if 'amenity' in tags and tags['amenity'] in ['theatre', 'cinema', 'nightclub', 'arts_centre', 'community_centre']:\n",
|
| 79 |
+
" return 'Leisure & Entertainment'\n",
|
| 80 |
+
" \n",
|
| 81 |
+
" # Transportation\n",
|
| 82 |
+
" if 'amenity' in tags and tags['amenity'] in ['parking', 'bicycle_parking', 'bus_station', 'ferry_terminal']:\n",
|
| 83 |
+
" return 'Transportation'\n",
|
| 84 |
+
" if 'highway' in tags or 'railway' in tags or 'aeroway' in tags:\n",
|
| 85 |
+
" return 'Transportation'\n",
|
| 86 |
+
" \n",
|
| 87 |
+
" # Religious\n",
|
| 88 |
+
" if 'amenity' in tags and tags['amenity'] in ['place_of_worship', 'monastery']:\n",
|
| 89 |
+
" return 'Religious'\n",
|
| 90 |
+
" \n",
|
| 91 |
+
" # Government & Public Services\n",
|
| 92 |
+
" if 'amenity' in tags and tags['amenity'] in ['townhall', 'courthouse', 'police', 'fire_station', 'post_office']:\n",
|
| 93 |
+
" return 'Government & Public Services'\n",
|
| 94 |
+
" \n",
|
| 95 |
+
" # Parks & Recreation\n",
|
| 96 |
+
" if 'leisure' in tags and tags['leisure'] in ['park', 'playground', 'sports_centre', 'stadium', 'garden']:\n",
|
| 97 |
+
" return 'Parks & Recreation'\n",
|
| 98 |
+
" \n",
|
| 99 |
+
" # Natural\n",
|
| 100 |
+
" if 'natural' in tags:\n",
|
| 101 |
+
" return 'Natural'\n",
|
| 102 |
+
" \n",
|
| 103 |
+
" # Landuse\n",
|
| 104 |
+
" if 'landuse' in tags:\n",
|
| 105 |
+
" landuse = tags['landuse'].capitalize()\n",
|
| 106 |
+
" if landuse in ['Residential', 'Commercial', 'Industrial', 'Retail']:\n",
|
| 107 |
+
" return landuse\n",
|
| 108 |
+
" else:\n",
|
| 109 |
+
" return f'Landuse: {landuse}'\n",
|
| 110 |
+
" \n",
|
| 111 |
+
" # If no specific category is found, return 'Other'\n",
|
| 112 |
+
" return 'Other'\n",
|
| 113 |
+
"\n",
|
| 114 |
+
"def parse_osm_data(elements: List[Dict]) -> pd.DataFrame:\n",
|
| 115 |
+
" parsed_data = []\n",
|
| 116 |
+
" for element in elements:\n",
|
| 117 |
+
" tags = element.get('tags', {})\n",
|
| 118 |
+
" parsed_element = {\n",
|
| 119 |
+
" 'ID': f\"{element['type']}_{element['id']}\",\n",
|
| 120 |
+
" 'Location Name': tags.get('name', ''),\n",
|
| 121 |
+
" 'Location Type': determine_location_type(tags)\n",
|
| 122 |
+
" }\n",
|
| 123 |
+
" parsed_data.append(parsed_element)\n",
|
| 124 |
+
" if len(parsed_data) == 0:\n",
|
| 125 |
+
" return pd.DataFrame(columns=['ID', 'Location Name', 'Location Type'])\n",
|
| 126 |
+
" return pd.DataFrame(parsed_data)\n",
|
| 127 |
+
"\n",
|
| 128 |
+
"def get_osm_data(lat: float, lon: float, radius: int) -> pd.DataFrame:\n",
|
| 129 |
+
" raw_data = fetch_osm_data(lat, lon, radius)\n",
|
| 130 |
+
" return parse_osm_data(raw_data)\n",
|
| 131 |
+
"\n",
|
| 132 |
+
"def dms_to_decimal(coord_str):\n",
|
| 133 |
+
" # Regular expression to match the coordinate format\n",
|
| 134 |
+
" pattern = r'(\\d+)°(\\d+)\\'([\\d.]+)\"([NS])\\s*(\\d+)°(\\d+)\\'([\\d.]+)\"([EW])'\n",
|
| 135 |
+
" \n",
|
| 136 |
+
" match = re.match(pattern, coord_str)\n",
|
| 137 |
+
" if not match:\n",
|
| 138 |
+
" raise ValueError(\"Invalid coordinate format. Expected format: 19°03'08.6\\\"N 72°54'06.0\\\"E\")\n",
|
| 139 |
+
"\n",
|
| 140 |
+
" lat_deg, lat_min, lat_sec, lat_dir, lon_deg, lon_min, lon_sec, lon_dir = match.groups()\n",
|
| 141 |
+
"\n",
|
| 142 |
+
" # Convert to decimal degrees\n",
|
| 143 |
+
" lat = float(lat_deg) + float(lat_min)/60 + float(lat_sec)/3600\n",
|
| 144 |
+
" lon = float(lon_deg) + float(lon_min)/60 + float(lon_sec)/3600\n",
|
| 145 |
+
"\n",
|
| 146 |
+
" # Adjust sign based on direction\n",
|
| 147 |
+
" if lat_dir == 'S':\n",
|
| 148 |
+
" lat = -lat\n",
|
| 149 |
+
" if lon_dir == 'W':\n",
|
| 150 |
+
" lon = -lon\n",
|
| 151 |
+
"\n",
|
| 152 |
+
" return lat, lon"
|
| 153 |
+
]
|
| 154 |
+
},
|
| 155 |
+
{
|
| 156 |
+
"cell_type": "code",
|
| 157 |
+
"execution_count": 91,
|
| 158 |
+
"metadata": {},
|
| 159 |
+
"outputs": [
|
| 160 |
+
{
|
| 161 |
+
"name": "stdout",
|
| 162 |
+
"output_type": "stream",
|
| 163 |
+
"text": [
|
| 164 |
+
"Latitude: 19.015805555555556\n",
|
| 165 |
+
"Longitude: 72.89944444444446\n"
|
| 166 |
+
]
|
| 167 |
+
}
|
| 168 |
+
],
|
| 169 |
+
"source": [
|
| 170 |
+
"coord_str = '19°00\\'56.9\"N 72°53\\'58.0\"E'\n",
|
| 171 |
+
"radius_meters = 1000\n",
|
| 172 |
+
"try:\n",
|
| 173 |
+
" latitude, longitude = dms_to_decimal(coord_str)\n",
|
| 174 |
+
" print(f\"Latitude: {latitude}\")\n",
|
| 175 |
+
" print(f\"Longitude: {longitude}\")\n",
|
| 176 |
+
"except ValueError as e:\n",
|
| 177 |
+
" print(f\"Error: {e}\")"
|
| 178 |
+
]
|
| 179 |
+
},
|
| 180 |
+
{
|
| 181 |
+
"cell_type": "code",
|
| 182 |
+
"execution_count": 92,
|
| 183 |
+
"metadata": {},
|
| 184 |
+
"outputs": [],
|
| 185 |
+
"source": [
|
| 186 |
+
"result_df = get_osm_data(latitude, longitude, radius_meters)"
|
| 187 |
+
]
|
| 188 |
+
},
|
| 189 |
+
{
|
| 190 |
+
"cell_type": "code",
|
| 191 |
+
"execution_count": 93,
|
| 192 |
+
"metadata": {},
|
| 193 |
+
"outputs": [
|
| 194 |
+
{
|
| 195 |
+
"data": {
|
| 196 |
+
"text/html": [
|
| 197 |
+
"<div>\n",
|
| 198 |
+
"<style scoped>\n",
|
| 199 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 200 |
+
" vertical-align: middle;\n",
|
| 201 |
+
" }\n",
|
| 202 |
+
"\n",
|
| 203 |
+
" .dataframe tbody tr th {\n",
|
| 204 |
+
" vertical-align: top;\n",
|
| 205 |
+
" }\n",
|
| 206 |
+
"\n",
|
| 207 |
+
" .dataframe thead th {\n",
|
| 208 |
+
" text-align: right;\n",
|
| 209 |
+
" }\n",
|
| 210 |
+
"</style>\n",
|
| 211 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 212 |
+
" <thead>\n",
|
| 213 |
+
" <tr style=\"text-align: right;\">\n",
|
| 214 |
+
" <th></th>\n",
|
| 215 |
+
" <th>ID</th>\n",
|
| 216 |
+
" <th>Location Name</th>\n",
|
| 217 |
+
" <th>Location Type</th>\n",
|
| 218 |
+
" </tr>\n",
|
| 219 |
+
" </thead>\n",
|
| 220 |
+
" <tbody>\n",
|
| 221 |
+
" <tr>\n",
|
| 222 |
+
" <th>0</th>\n",
|
| 223 |
+
" <td>node_622002639</td>\n",
|
| 224 |
+
" <td>Mahul</td>\n",
|
| 225 |
+
" <td>Other</td>\n",
|
| 226 |
+
" </tr>\n",
|
| 227 |
+
" <tr>\n",
|
| 228 |
+
" <th>1</th>\n",
|
| 229 |
+
" <td>node_622005407</td>\n",
|
| 230 |
+
" <td>Gowanpada</td>\n",
|
| 231 |
+
" <td>Other</td>\n",
|
| 232 |
+
" </tr>\n",
|
| 233 |
+
" <tr>\n",
|
| 234 |
+
" <th>2</th>\n",
|
| 235 |
+
" <td>node_1646222635</td>\n",
|
| 236 |
+
" <td>gadakary bus stop</td>\n",
|
| 237 |
+
" <td>Transportation</td>\n",
|
| 238 |
+
" </tr>\n",
|
| 239 |
+
" <tr>\n",
|
| 240 |
+
" <th>3</th>\n",
|
| 241 |
+
" <td>node_1646222681</td>\n",
|
| 242 |
+
" <td>vishnu nagar bus stop</td>\n",
|
| 243 |
+
" <td>Other</td>\n",
|
| 244 |
+
" </tr>\n",
|
| 245 |
+
" <tr>\n",
|
| 246 |
+
" <th>4</th>\n",
|
| 247 |
+
" <td>node_2932495033</td>\n",
|
| 248 |
+
" <td>Sree Dutta mandir</td>\n",
|
| 249 |
+
" <td>Religious</td>\n",
|
| 250 |
+
" </tr>\n",
|
| 251 |
+
" <tr>\n",
|
| 252 |
+
" <th>5</th>\n",
|
| 253 |
+
" <td>node_11954176622</td>\n",
|
| 254 |
+
" <td>Gavhanpada</td>\n",
|
| 255 |
+
" <td>Other</td>\n",
|
| 256 |
+
" </tr>\n",
|
| 257 |
+
" <tr>\n",
|
| 258 |
+
" <th>6</th>\n",
|
| 259 |
+
" <td>way_25587616</td>\n",
|
| 260 |
+
" <td>Bhikaji Damaji Patil Marg</td>\n",
|
| 261 |
+
" <td>Transportation</td>\n",
|
| 262 |
+
" </tr>\n",
|
| 263 |
+
" <tr>\n",
|
| 264 |
+
" <th>7</th>\n",
|
| 265 |
+
" <td>way_122289587</td>\n",
|
| 266 |
+
" <td>Mulund - Trombay 220 KV line</td>\n",
|
| 267 |
+
" <td>Other</td>\n",
|
| 268 |
+
" </tr>\n",
|
| 269 |
+
" <tr>\n",
|
| 270 |
+
" <th>8</th>\n",
|
| 271 |
+
" <td>way_151783563</td>\n",
|
| 272 |
+
" <td>Laxman Umaji Gadkari Marg</td>\n",
|
| 273 |
+
" <td>Transportation</td>\n",
|
| 274 |
+
" </tr>\n",
|
| 275 |
+
" <tr>\n",
|
| 276 |
+
" <th>9</th>\n",
|
| 277 |
+
" <td>way_151783570</td>\n",
|
| 278 |
+
" <td>Vishnu Nagar Road</td>\n",
|
| 279 |
+
" <td>Transportation</td>\n",
|
| 280 |
+
" </tr>\n",
|
| 281 |
+
" </tbody>\n",
|
| 282 |
+
"</table>\n",
|
| 283 |
+
"</div>"
|
| 284 |
+
],
|
| 285 |
+
"text/plain": [
|
| 286 |
+
" ID Location Name Location Type\n",
|
| 287 |
+
"0 node_622002639 Mahul Other\n",
|
| 288 |
+
"1 node_622005407 Gowanpada Other\n",
|
| 289 |
+
"2 node_1646222635 gadakary bus stop Transportation\n",
|
| 290 |
+
"3 node_1646222681 vishnu nagar bus stop Other\n",
|
| 291 |
+
"4 node_2932495033 Sree Dutta mandir Religious\n",
|
| 292 |
+
"5 node_11954176622 Gavhanpada Other\n",
|
| 293 |
+
"6 way_25587616 Bhikaji Damaji Patil Marg Transportation\n",
|
| 294 |
+
"7 way_122289587 Mulund - Trombay 220 KV line Other\n",
|
| 295 |
+
"8 way_151783563 Laxman Umaji Gadkari Marg Transportation\n",
|
| 296 |
+
"9 way_151783570 Vishnu Nagar Road Transportation"
|
| 297 |
+
]
|
| 298 |
+
},
|
| 299 |
+
"execution_count": 93,
|
| 300 |
+
"metadata": {},
|
| 301 |
+
"output_type": "execute_result"
|
| 302 |
+
}
|
| 303 |
+
],
|
| 304 |
+
"source": [
|
| 305 |
+
"result_df.head(10)"
|
| 306 |
+
]
|
| 307 |
+
},
|
| 308 |
+
{
|
| 309 |
+
"cell_type": "code",
|
| 310 |
+
"execution_count": 94,
|
| 311 |
+
"metadata": {},
|
| 312 |
+
"outputs": [
|
| 313 |
+
{
|
| 314 |
+
"data": {
|
| 315 |
+
"text/html": [
|
| 316 |
+
"<div>\n",
|
| 317 |
+
"<style scoped>\n",
|
| 318 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 319 |
+
" vertical-align: middle;\n",
|
| 320 |
+
" }\n",
|
| 321 |
+
"\n",
|
| 322 |
+
" .dataframe tbody tr th {\n",
|
| 323 |
+
" vertical-align: top;\n",
|
| 324 |
+
" }\n",
|
| 325 |
+
"\n",
|
| 326 |
+
" .dataframe thead th {\n",
|
| 327 |
+
" text-align: right;\n",
|
| 328 |
+
" }\n",
|
| 329 |
+
"</style>\n",
|
| 330 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 331 |
+
" <thead>\n",
|
| 332 |
+
" <tr style=\"text-align: right;\">\n",
|
| 333 |
+
" <th></th>\n",
|
| 334 |
+
" <th>ID</th>\n",
|
| 335 |
+
" <th>Location Name</th>\n",
|
| 336 |
+
" <th>Location Type</th>\n",
|
| 337 |
+
" </tr>\n",
|
| 338 |
+
" </thead>\n",
|
| 339 |
+
" <tbody>\n",
|
| 340 |
+
" <tr>\n",
|
| 341 |
+
" <th>11</th>\n",
|
| 342 |
+
" <td>way_430012316</td>\n",
|
| 343 |
+
" <td>track</td>\n",
|
| 344 |
+
" <td>Residential</td>\n",
|
| 345 |
+
" </tr>\n",
|
| 346 |
+
" <tr>\n",
|
| 347 |
+
" <th>12</th>\n",
|
| 348 |
+
" <td>way_430012318</td>\n",
|
| 349 |
+
" <td>Mumbai Refinery Mahul</td>\n",
|
| 350 |
+
" <td>Industrial</td>\n",
|
| 351 |
+
" </tr>\n",
|
| 352 |
+
" <tr>\n",
|
| 353 |
+
" <th>13</th>\n",
|
| 354 |
+
" <td>way_430012320</td>\n",
|
| 355 |
+
" <td>Mumbai Refinery</td>\n",
|
| 356 |
+
" <td>Industrial</td>\n",
|
| 357 |
+
" </tr>\n",
|
| 358 |
+
" </tbody>\n",
|
| 359 |
+
"</table>\n",
|
| 360 |
+
"</div>"
|
| 361 |
+
],
|
| 362 |
+
"text/plain": [
|
| 363 |
+
" ID Location Name Location Type\n",
|
| 364 |
+
"11 way_430012316 track Residential\n",
|
| 365 |
+
"12 way_430012318 Mumbai Refinery Mahul Industrial\n",
|
| 366 |
+
"13 way_430012320 Mumbai Refinery Industrial"
|
| 367 |
+
]
|
| 368 |
+
},
|
| 369 |
+
"execution_count": 94,
|
| 370 |
+
"metadata": {},
|
| 371 |
+
"output_type": "execute_result"
|
| 372 |
+
}
|
| 373 |
+
],
|
| 374 |
+
"source": [
|
| 375 |
+
"labelled_df = result_df[result_df['Location Type'] != 'Other']\n",
|
| 376 |
+
"labelled_df = labelled_df[labelled_df['Location Type'] != 'Religious']\n",
|
| 377 |
+
"labelled_df = labelled_df[labelled_df['Location Type'] != 'Transportation']\n",
|
| 378 |
+
"labelled_df.head(10)"
|
| 379 |
+
]
|
| 380 |
+
},
|
| 381 |
+
{
|
| 382 |
+
"cell_type": "code",
|
| 383 |
+
"execution_count": 95,
|
| 384 |
+
"metadata": {},
|
| 385 |
+
"outputs": [
|
| 386 |
+
{
|
| 387 |
+
"data": {
|
| 388 |
+
"text/html": [
|
| 389 |
+
"<div>\n",
|
| 390 |
+
"<style scoped>\n",
|
| 391 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 392 |
+
" vertical-align: middle;\n",
|
| 393 |
+
" }\n",
|
| 394 |
+
"\n",
|
| 395 |
+
" .dataframe tbody tr th {\n",
|
| 396 |
+
" vertical-align: top;\n",
|
| 397 |
+
" }\n",
|
| 398 |
+
"\n",
|
| 399 |
+
" .dataframe thead th {\n",
|
| 400 |
+
" text-align: right;\n",
|
| 401 |
+
" }\n",
|
| 402 |
+
"</style>\n",
|
| 403 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 404 |
+
" <thead>\n",
|
| 405 |
+
" <tr style=\"text-align: right;\">\n",
|
| 406 |
+
" <th></th>\n",
|
| 407 |
+
" <th>Location Name</th>\n",
|
| 408 |
+
" <th>Location Type</th>\n",
|
| 409 |
+
" </tr>\n",
|
| 410 |
+
" </thead>\n",
|
| 411 |
+
" <tbody>\n",
|
| 412 |
+
" <tr>\n",
|
| 413 |
+
" <th>0</th>\n",
|
| 414 |
+
" <td>track</td>\n",
|
| 415 |
+
" <td>Residential</td>\n",
|
| 416 |
+
" </tr>\n",
|
| 417 |
+
" <tr>\n",
|
| 418 |
+
" <th>1</th>\n",
|
| 419 |
+
" <td>Mumbai Refinery Mahul</td>\n",
|
| 420 |
+
" <td>Industrial</td>\n",
|
| 421 |
+
" </tr>\n",
|
| 422 |
+
" <tr>\n",
|
| 423 |
+
" <th>2</th>\n",
|
| 424 |
+
" <td>Mumbai Refinery</td>\n",
|
| 425 |
+
" <td>Industrial</td>\n",
|
| 426 |
+
" </tr>\n",
|
| 427 |
+
" </tbody>\n",
|
| 428 |
+
"</table>\n",
|
| 429 |
+
"</div>"
|
| 430 |
+
],
|
| 431 |
+
"text/plain": [
|
| 432 |
+
" Location Name Location Type\n",
|
| 433 |
+
"0 track Residential\n",
|
| 434 |
+
"1 Mumbai Refinery Mahul Industrial\n",
|
| 435 |
+
"2 Mumbai Refinery Industrial"
|
| 436 |
+
]
|
| 437 |
+
},
|
| 438 |
+
"execution_count": 95,
|
| 439 |
+
"metadata": {},
|
| 440 |
+
"output_type": "execute_result"
|
| 441 |
+
}
|
| 442 |
+
],
|
| 443 |
+
"source": [
|
| 444 |
+
"## removing duplicates\n",
|
| 445 |
+
"\n",
|
| 446 |
+
"loc_types = []\n",
|
| 447 |
+
"for row in labelled_df.iterrows():\n",
|
| 448 |
+
" loc_type = (row[1]['Location Name'], row[1]['Location Type'])\n",
|
| 449 |
+
" if loc_type not in loc_types:\n",
|
| 450 |
+
" loc_types.append(loc_type)\n",
|
| 451 |
+
"\n",
|
| 452 |
+
"labelled_df = pd.DataFrame(loc_types, columns=['Location Name', 'Location Type'])\n",
|
| 453 |
+
"labelled_df.head(20)"
|
| 454 |
+
]
|
| 455 |
+
},
|
| 456 |
+
{
|
| 457 |
+
"cell_type": "code",
|
| 458 |
+
"execution_count": 58,
|
| 459 |
+
"metadata": {},
|
| 460 |
+
"outputs": [],
|
| 461 |
+
"source": [
|
| 462 |
+
"row_of_dataset = ''\n",
|
| 463 |
+
"\n",
|
| 464 |
+
"for row in labelled_df.iterrows():\n",
|
| 465 |
+
" row_text = row[1]['Location Name'] + ' is a ' + row[1]['Location Type']\n",
|
| 466 |
+
" row_of_dataset += row_text + ', '"
|
| 467 |
+
]
|
| 468 |
+
},
|
| 469 |
+
{
|
| 470 |
+
"cell_type": "code",
|
| 471 |
+
"execution_count": 59,
|
| 472 |
+
"metadata": {},
|
| 473 |
+
"outputs": [
|
| 474 |
+
{
|
| 475 |
+
"data": {
|
| 476 |
+
"text/plain": [
|
| 477 |
+
"'Oswal Company Trees is a Natural, Newspaper stall is a Commercial, Shiv Polyclinic and Nursing Home is a Healthcare, राजपूत मेडिकल is a Healthcare, Bhabha Atomic Research Centre - BARC is a Industrial, BPCL Sports Club is a Leisure & Entertainment, New Bharat Nagar, Banjara tanda, Hasina Nagar is a Residential, Old Bharat Nagar is a Residential, Rashtriya Chemicals & Fertilizers is a Industrial, Koyna Colony is a Residential, D is a Residential, A-2 is a Residential, flip card is a Commercial, track is a Residential, Mumbai Refinery Mahul is a Industrial, Mumbai Refinery is a Industrial, Trombay Thermal Power Station is a Industrial, Vitta Sanchay Society is a Residential, E is a Residential, Acharya Sharad Narayan Udyan is a Leisure & Entertainment, bmc park is a Leisure & Entertainment, Mysore Colony Central Garden is a Leisure & Entertainment, BMC owned trees is a Natural, BMC PARK is a Leisure & Entertainment, Mysore colony eastern park is a Leisure & Entertainment, Trees owned by RCF is a Natural, Mysore Colony trees is a Natural, NAVAL KG School, TS MAHUL is a Educational, '"
|
| 478 |
+
]
|
| 479 |
+
},
|
| 480 |
+
"execution_count": 59,
|
| 481 |
+
"metadata": {},
|
| 482 |
+
"output_type": "execute_result"
|
| 483 |
+
}
|
| 484 |
+
],
|
| 485 |
+
"source": [
|
| 486 |
+
"row_of_dataset"
|
| 487 |
+
]
|
| 488 |
+
},
|
| 489 |
+
{
|
| 490 |
+
"cell_type": "markdown",
|
| 491 |
+
"metadata": {},
|
| 492 |
+
"source": [
|
| 493 |
+
"This is one row of the dataset, now writing a function to extract all these rows from a given large map area"
|
| 494 |
+
]
|
| 495 |
+
},
|
| 496 |
+
{
|
| 497 |
+
"cell_type": "code",
|
| 498 |
+
"execution_count": 61,
|
| 499 |
+
"metadata": {},
|
| 500 |
+
"outputs": [],
|
| 501 |
+
"source": [
|
| 502 |
+
"## input point is at the bottom left of the map\n",
|
| 503 |
+
"\n",
|
| 504 |
+
"def calculate_distant_points(lat: float, lon: float, distance: float) -> tuple:\n",
|
| 505 |
+
" # Earth's radius in meters\n",
|
| 506 |
+
" R = 6371000\n",
|
| 507 |
+
"\n",
|
| 508 |
+
" # Convert latitude and longitude to radians\n",
|
| 509 |
+
" lat_rad = math.radians(lat)\n",
|
| 510 |
+
" lon_rad = math.radians(lon)\n",
|
| 511 |
+
"\n",
|
| 512 |
+
" # Calculate the point with the same latitude (moving east-west)\n",
|
| 513 |
+
" delta_lon = distance / (R * math.cos(lat_rad))\n",
|
| 514 |
+
" lon1 = lon + math.degrees(delta_lon)\n",
|
| 515 |
+
" \n",
|
| 516 |
+
" # Calculate the point with the same longitude (moving north-south)\n",
|
| 517 |
+
" delta_lat = distance / R\n",
|
| 518 |
+
" lat2 = lat + math.degrees(delta_lat)\n",
|
| 519 |
+
"\n",
|
| 520 |
+
" return ((lat, lon1), (lat2, lon))"
|
| 521 |
+
]
|
| 522 |
+
},
|
| 523 |
+
{
|
| 524 |
+
"cell_type": "code",
|
| 525 |
+
"execution_count": 66,
|
| 526 |
+
"metadata": {},
|
| 527 |
+
"outputs": [
|
| 528 |
+
{
|
| 529 |
+
"name": "stdout",
|
| 530 |
+
"output_type": "stream",
|
| 531 |
+
"text": [
|
| 532 |
+
"Original point: (40.7128, -74.006)\n",
|
| 533 |
+
"Point 1000m east: (40.712800, -73.709386)\n",
|
| 534 |
+
"Point 1000m north: (40.937630, -74.006000)\n"
|
| 535 |
+
]
|
| 536 |
+
}
|
| 537 |
+
],
|
| 538 |
+
"source": [
|
| 539 |
+
"if __name__ == \"__main__\":\n",
|
| 540 |
+
" latitude = 40.7128 # New York City latitude\n",
|
| 541 |
+
" longitude = -74.0060 # New York City longitude\n",
|
| 542 |
+
" distance = 1000*25 # 1000 meters\n",
|
| 543 |
+
"\n",
|
| 544 |
+
" result = calculate_distant_points(latitude, longitude, distance)\n",
|
| 545 |
+
" print(f\"Original point: ({latitude}, {longitude})\")\n",
|
| 546 |
+
" print(f\"Point 1000m east: ({result[0][0]:.6f}, {result[0][1]:.6f})\")\n",
|
| 547 |
+
" print(f\"Point 1000m north: ({result[1][0]:.6f}, {result[1][1]:.6f})\")"
|
| 548 |
+
]
|
| 549 |
+
},
|
| 550 |
+
{
|
| 551 |
+
"cell_type": "code",
|
| 552 |
+
"execution_count": 69,
|
| 553 |
+
"metadata": {},
|
| 554 |
+
"outputs": [
|
| 555 |
+
{
|
| 556 |
+
"name": "stdout",
|
| 557 |
+
"output_type": "stream",
|
| 558 |
+
"text": [
|
| 559 |
+
"Bottom Left: (40.7128, -74.006)\n",
|
| 560 |
+
"Top Left: (40.93763040147969, -74.006)\n",
|
| 561 |
+
"Bottom Right: (40.7128, -73.7093855252233)\n",
|
| 562 |
+
"Top Right: (40.93763040147969, -73.7093855252233)\n"
|
| 563 |
+
]
|
| 564 |
+
}
|
| 565 |
+
],
|
| 566 |
+
"source": [
|
| 567 |
+
"bottom_left_latitude = 40.7128\n",
|
| 568 |
+
"bottom_left_longitude = -74.0060\n",
|
| 569 |
+
"\n",
|
| 570 |
+
"result = calculate_distant_points(bottom_left_latitude, bottom_left_longitude, 1000*25)\n",
|
| 571 |
+
"\n",
|
| 572 |
+
"top_left_latitude = result[1][0]\n",
|
| 573 |
+
"top_left_longitude = result[1][1]\n",
|
| 574 |
+
"\n",
|
| 575 |
+
"bottom_right_latitude = result[0][0]\n",
|
| 576 |
+
"bottom_right_longitude = result[0][1]\n",
|
| 577 |
+
"\n",
|
| 578 |
+
"top_right_latitude = top_left_latitude\n",
|
| 579 |
+
"top_right_longitude = bottom_right_longitude\n",
|
| 580 |
+
"\n",
|
| 581 |
+
"print(f\"Bottom Left: ({bottom_left_latitude}, {bottom_left_longitude})\")\n",
|
| 582 |
+
"print(f\"Top Left: ({top_left_latitude}, {top_left_longitude})\")\n",
|
| 583 |
+
"print(f\"Bottom Right: ({bottom_right_latitude}, {bottom_right_longitude})\")\n",
|
| 584 |
+
"print(f\"Top Right: ({top_right_latitude}, {top_right_longitude})\")"
|
| 585 |
+
]
|
| 586 |
+
},
|
| 587 |
+
{
|
| 588 |
+
"cell_type": "code",
|
| 589 |
+
"execution_count": 71,
|
| 590 |
+
"metadata": {},
|
| 591 |
+
"outputs": [
|
| 592 |
+
{
|
| 593 |
+
"data": {
|
| 594 |
+
"text/plain": [
|
| 595 |
+
"(0.008993216059187433, 0.01186457899106813)"
|
| 596 |
+
]
|
| 597 |
+
},
|
| 598 |
+
"execution_count": 71,
|
| 599 |
+
"metadata": {},
|
| 600 |
+
"output_type": "execute_result"
|
| 601 |
+
}
|
| 602 |
+
],
|
| 603 |
+
"source": [
|
| 604 |
+
"latitude_shift = top_left_latitude - bottom_left_latitude\n",
|
| 605 |
+
"longitude_shift = bottom_right_longitude - bottom_left_longitude\n",
|
| 606 |
+
"\n",
|
| 607 |
+
"latitude_unit = latitude_shift / 25\n",
|
| 608 |
+
"longitude_unit = longitude_shift / 25\n",
|
| 609 |
+
"\n",
|
| 610 |
+
"latitude_unit, longitude_unit"
|
| 611 |
+
]
|
| 612 |
+
},
|
| 613 |
+
{
|
| 614 |
+
"cell_type": "code",
|
| 615 |
+
"execution_count": 73,
|
| 616 |
+
"metadata": {},
|
| 617 |
+
"outputs": [],
|
| 618 |
+
"source": [
|
| 619 |
+
"## 2d map grid (0,0) --> bottom left\n",
|
| 620 |
+
"\n",
|
| 621 |
+
"def create_map_grid(bottom_left: Tuple[float, float], top_right: Tuple[float, float], rows: int, cols: int) -> List[List[Tuple[float, float]]]:\n",
|
| 622 |
+
" grid = []\n",
|
| 623 |
+
" lat_unit = (top_right[0] - bottom_left[0]) / rows\n",
|
| 624 |
+
" lon_unit = (top_right[1] - bottom_left[1]) / cols\n",
|
| 625 |
+
" \n",
|
| 626 |
+
" for i in range(rows):\n",
|
| 627 |
+
" row = []\n",
|
| 628 |
+
" for j in range(cols):\n",
|
| 629 |
+
" lat = bottom_left[0] + i * lat_unit\n",
|
| 630 |
+
" lon = bottom_left[1] + j * lon_unit\n",
|
| 631 |
+
" lat = lat + lat_unit / 2\n",
|
| 632 |
+
" lon = lon + lon_unit / 2\n",
|
| 633 |
+
" row.append((lat, lon))\n",
|
| 634 |
+
" grid.append(row)\n",
|
| 635 |
+
" \n",
|
| 636 |
+
" return grid"
|
| 637 |
+
]
|
| 638 |
+
},
|
| 639 |
+
{
|
| 640 |
+
"cell_type": "code",
|
| 641 |
+
"execution_count": 79,
|
| 642 |
+
"metadata": {},
|
| 643 |
+
"outputs": [],
|
| 644 |
+
"source": [
|
| 645 |
+
"grid = create_map_grid((bottom_left_latitude, bottom_left_longitude), (top_right_latitude, top_right_longitude), 25, 25)"
|
| 646 |
+
]
|
| 647 |
+
},
|
| 648 |
+
{
|
| 649 |
+
"cell_type": "code",
|
| 650 |
+
"execution_count": 108,
|
| 651 |
+
"metadata": {},
|
| 652 |
+
"outputs": [],
|
| 653 |
+
"source": [
|
| 654 |
+
"grid_dataset = []\n",
|
| 655 |
+
"for i, row in enumerate(grid):\n",
|
| 656 |
+
" for j, point in enumerate(row):\n",
|
| 657 |
+
" \n",
|
| 658 |
+
" grid_row = {\"row\": i, \"col\": j, \"latitude\": point[0], \"longitude\": point[1]}\n",
|
| 659 |
+
" grid_dataset.append(grid_row)\n",
|
| 660 |
+
"\n",
|
| 661 |
+
"grid_df = pd.DataFrame(grid_dataset)"
|
| 662 |
+
]
|
| 663 |
+
},
|
| 664 |
+
{
|
| 665 |
+
"cell_type": "code",
|
| 666 |
+
"execution_count": 83,
|
| 667 |
+
"metadata": {},
|
| 668 |
+
"outputs": [],
|
| 669 |
+
"source": [
|
| 670 |
+
"left_lat = 18.889833\n",
|
| 671 |
+
"left_lon = 72.779844"
|
| 672 |
+
]
|
| 673 |
+
},
|
| 674 |
+
{
|
| 675 |
+
"cell_type": "code",
|
| 676 |
+
"execution_count": 84,
|
| 677 |
+
"metadata": {},
|
| 678 |
+
"outputs": [],
|
| 679 |
+
"source": [
|
| 680 |
+
"res1 = calculate_distant_points(left_lat, left_lon, 1000*35)\n",
|
| 681 |
+
"\n",
|
| 682 |
+
"right_lat = res1[1][0]\n",
|
| 683 |
+
"right_lon = res1[0][1]"
|
| 684 |
+
]
|
| 685 |
+
},
|
| 686 |
+
{
|
| 687 |
+
"cell_type": "code",
|
| 688 |
+
"execution_count": 85,
|
| 689 |
+
"metadata": {},
|
| 690 |
+
"outputs": [],
|
| 691 |
+
"source": [
|
| 692 |
+
"grid = create_map_grid((left_lat, left_lon), (right_lat, right_lon), 35, 35)"
|
| 693 |
+
]
|
| 694 |
+
},
|
| 695 |
+
{
|
| 696 |
+
"cell_type": "code",
|
| 697 |
+
"execution_count": null,
|
| 698 |
+
"metadata": {},
|
| 699 |
+
"outputs": [],
|
| 700 |
+
"source": [
|
| 701 |
+
"grid_dataset = []\n",
|
| 702 |
+
"for i, row in enumerate(grid):\n",
|
| 703 |
+
" for j, point in enumerate(row):\n",
|
| 704 |
+
" grid_row = {\"row\": i, \"col\": j, \"latitude\": point[0], \"longitude\": point[1]}\n",
|
| 705 |
+
" grid_dataset.append(grid_row)\n",
|
| 706 |
+
"\n",
|
| 707 |
+
"grid_df = pd.DataFrame(grid_dataset)\n",
|
| 708 |
+
"grid_df.head(25)"
|
| 709 |
+
]
|
| 710 |
+
},
|
| 711 |
+
{
|
| 712 |
+
"cell_type": "code",
|
| 713 |
+
"execution_count": 106,
|
| 714 |
+
"metadata": {},
|
| 715 |
+
"outputs": [],
|
| 716 |
+
"source": [
|
| 717 |
+
"## entire pipeline\n",
|
| 718 |
+
"\n",
|
| 719 |
+
"left_lat = 18.889833\n",
|
| 720 |
+
"left_lon = 72.779844\n",
|
| 721 |
+
"dist = 35\n",
|
| 722 |
+
"\n",
|
| 723 |
+
"res1 = calculate_distant_points(left_lat, left_lon, 1000*dist)\n",
|
| 724 |
+
"\n",
|
| 725 |
+
"right_lat = res1[1][0]\n",
|
| 726 |
+
"right_lon = res1[0][1]\n",
|
| 727 |
+
"grid = create_map_grid((left_lat, left_lon), (right_lat, right_lon), dist, dist)\n",
|
| 728 |
+
"\n",
|
| 729 |
+
"grid_dataset = []\n",
|
| 730 |
+
"for i, row in enumerate(grid):\n",
|
| 731 |
+
" for j, point in enumerate(row):\n",
|
| 732 |
+
" result_df = get_osm_data(point[0], point[1], 710)\n",
|
| 733 |
+
" # print(result_df.head(3))\n",
|
| 734 |
+
" labelled_df = result_df[result_df['Location Type'] != 'Other']\n",
|
| 735 |
+
" labelled_df = labelled_df[labelled_df['Location Type'] != 'Religious']\n",
|
| 736 |
+
" labelled_df = labelled_df[labelled_df['Location Type'] != 'Transportation']\n",
|
| 737 |
+
" loc_types = []\n",
|
| 738 |
+
" for row in labelled_df.iterrows():\n",
|
| 739 |
+
" loc_type = (row[1]['Location Name'], row[1]['Location Type'])\n",
|
| 740 |
+
" if loc_type not in loc_types:\n",
|
| 741 |
+
" loc_types.append(loc_type)\n",
|
| 742 |
+
"\n",
|
| 743 |
+
" labelled_df = pd.DataFrame(loc_types, columns=['Location Name', 'Location Type'])\n",
|
| 744 |
+
"\n",
|
| 745 |
+
" row_of_dataset = ''\n",
|
| 746 |
+
"\n",
|
| 747 |
+
" for row in labelled_df.iterrows():\n",
|
| 748 |
+
" row_text = row[1]['Location Name'] + ' is a ' + row[1]['Location Type']\n",
|
| 749 |
+
" row_of_dataset += row_text + '; '\n",
|
| 750 |
+
" ## replacing any coma in the text with a blank space\n",
|
| 751 |
+
"\n",
|
| 752 |
+
" row_of_dataset = row_of_dataset.replace(',', ' ')\n",
|
| 753 |
+
" \n",
|
| 754 |
+
" grid_row = {\"row\": i, \"col\": j, \"latitude\": point[0], \"longitude\": point[1], \"Map Data\": row_of_dataset}\n",
|
| 755 |
+
" grid_dataset.append(grid_row)\n",
|
| 756 |
+
"\n",
|
| 757 |
+
"grid_df = pd.DataFrame(grid_dataset)\n",
|
| 758 |
+
"grid_df.to_csv('MMR_DATASET.csv', index=False)"
|
| 759 |
+
]
|
| 760 |
+
},
|
| 761 |
+
{
|
| 762 |
+
"cell_type": "code",
|
| 763 |
+
"execution_count": 107,
|
| 764 |
+
"metadata": {},
|
| 765 |
+
"outputs": [
|
| 766 |
+
{
|
| 767 |
+
"data": {
|
| 768 |
+
"text/html": [
|
| 769 |
+
"<div>\n",
|
| 770 |
+
"<style scoped>\n",
|
| 771 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 772 |
+
" vertical-align: middle;\n",
|
| 773 |
+
" }\n",
|
| 774 |
+
"\n",
|
| 775 |
+
" .dataframe tbody tr th {\n",
|
| 776 |
+
" vertical-align: top;\n",
|
| 777 |
+
" }\n",
|
| 778 |
+
"\n",
|
| 779 |
+
" .dataframe thead th {\n",
|
| 780 |
+
" text-align: right;\n",
|
| 781 |
+
" }\n",
|
| 782 |
+
"</style>\n",
|
| 783 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 784 |
+
" <thead>\n",
|
| 785 |
+
" <tr style=\"text-align: right;\">\n",
|
| 786 |
+
" <th></th>\n",
|
| 787 |
+
" <th>row</th>\n",
|
| 788 |
+
" <th>col</th>\n",
|
| 789 |
+
" <th>latitude</th>\n",
|
| 790 |
+
" <th>longitude</th>\n",
|
| 791 |
+
" <th>Map Data</th>\n",
|
| 792 |
+
" </tr>\n",
|
| 793 |
+
" </thead>\n",
|
| 794 |
+
" <tbody>\n",
|
| 795 |
+
" <tr>\n",
|
| 796 |
+
" <th>0</th>\n",
|
| 797 |
+
" <td>0</td>\n",
|
| 798 |
+
" <td>0</td>\n",
|
| 799 |
+
" <td>18.894330</td>\n",
|
| 800 |
+
" <td>72.784597</td>\n",
|
| 801 |
+
" <td></td>\n",
|
| 802 |
+
" </tr>\n",
|
| 803 |
+
" <tr>\n",
|
| 804 |
+
" <th>1</th>\n",
|
| 805 |
+
" <td>0</td>\n",
|
| 806 |
+
" <td>1</td>\n",
|
| 807 |
+
" <td>18.894330</td>\n",
|
| 808 |
+
" <td>72.794102</td>\n",
|
| 809 |
+
" <td>Prongs Reef is a Natural,</td>\n",
|
| 810 |
+
" </tr>\n",
|
| 811 |
+
" <tr>\n",
|
| 812 |
+
" <th>2</th>\n",
|
| 813 |
+
" <td>0</td>\n",
|
| 814 |
+
" <td>2</td>\n",
|
| 815 |
+
" <td>18.894330</td>\n",
|
| 816 |
+
" <td>72.803607</td>\n",
|
| 817 |
+
" <td>United Services Club Golf Course is a Leisure ...</td>\n",
|
| 818 |
+
" </tr>\n",
|
| 819 |
+
" <tr>\n",
|
| 820 |
+
" <th>3</th>\n",
|
| 821 |
+
" <td>0</td>\n",
|
| 822 |
+
" <td>3</td>\n",
|
| 823 |
+
" <td>18.894330</td>\n",
|
| 824 |
+
" <td>72.813112</td>\n",
|
| 825 |
+
" <td>Indian Meterological Department is a Commercia...</td>\n",
|
| 826 |
+
" </tr>\n",
|
| 827 |
+
" <tr>\n",
|
| 828 |
+
" <th>4</th>\n",
|
| 829 |
+
" <td>1</td>\n",
|
| 830 |
+
" <td>0</td>\n",
|
| 831 |
+
" <td>18.903323</td>\n",
|
| 832 |
+
" <td>72.784597</td>\n",
|
| 833 |
+
" <td></td>\n",
|
| 834 |
+
" </tr>\n",
|
| 835 |
+
" <tr>\n",
|
| 836 |
+
" <th>5</th>\n",
|
| 837 |
+
" <td>1</td>\n",
|
| 838 |
+
" <td>1</td>\n",
|
| 839 |
+
" <td>18.903323</td>\n",
|
| 840 |
+
" <td>72.794102</td>\n",
|
| 841 |
+
" <td></td>\n",
|
| 842 |
+
" </tr>\n",
|
| 843 |
+
" <tr>\n",
|
| 844 |
+
" <th>6</th>\n",
|
| 845 |
+
" <td>1</td>\n",
|
| 846 |
+
" <td>2</td>\n",
|
| 847 |
+
" <td>18.903323</td>\n",
|
| 848 |
+
" <td>72.803607</td>\n",
|
| 849 |
+
" <td>Jagadish Canteen is a Food & Drink, Maratha St...</td>\n",
|
| 850 |
+
" </tr>\n",
|
| 851 |
+
" <tr>\n",
|
| 852 |
+
" <th>7</th>\n",
|
| 853 |
+
" <td>1</td>\n",
|
| 854 |
+
" <td>3</td>\n",
|
| 855 |
+
" <td>18.903323</td>\n",
|
| 856 |
+
" <td>72.813112</td>\n",
|
| 857 |
+
" <td>Indian Meterological Department is a Commercia...</td>\n",
|
| 858 |
+
" </tr>\n",
|
| 859 |
+
" <tr>\n",
|
| 860 |
+
" <th>8</th>\n",
|
| 861 |
+
" <td>2</td>\n",
|
| 862 |
+
" <td>0</td>\n",
|
| 863 |
+
" <td>18.912316</td>\n",
|
| 864 |
+
" <td>72.784597</td>\n",
|
| 865 |
+
" <td></td>\n",
|
| 866 |
+
" </tr>\n",
|
| 867 |
+
" <tr>\n",
|
| 868 |
+
" <th>9</th>\n",
|
| 869 |
+
" <td>2</td>\n",
|
| 870 |
+
" <td>1</td>\n",
|
| 871 |
+
" <td>18.912316</td>\n",
|
| 872 |
+
" <td>72.794102</td>\n",
|
| 873 |
+
" <td></td>\n",
|
| 874 |
+
" </tr>\n",
|
| 875 |
+
" <tr>\n",
|
| 876 |
+
" <th>10</th>\n",
|
| 877 |
+
" <td>2</td>\n",
|
| 878 |
+
" <td>2</td>\n",
|
| 879 |
+
" <td>18.912316</td>\n",
|
| 880 |
+
" <td>72.803607</td>\n",
|
| 881 |
+
" <td>Jagadish Canteen is a Food & Drink, Maratha St...</td>\n",
|
| 882 |
+
" </tr>\n",
|
| 883 |
+
" <tr>\n",
|
| 884 |
+
" <th>11</th>\n",
|
| 885 |
+
" <td>2</td>\n",
|
| 886 |
+
" <td>3</td>\n",
|
| 887 |
+
" <td>18.912316</td>\n",
|
| 888 |
+
" <td>72.813112</td>\n",
|
| 889 |
+
" <td>Cafe Coffee Day is a Food & Drink, King Plaza ...</td>\n",
|
| 890 |
+
" </tr>\n",
|
| 891 |
+
" <tr>\n",
|
| 892 |
+
" <th>12</th>\n",
|
| 893 |
+
" <td>3</td>\n",
|
| 894 |
+
" <td>0</td>\n",
|
| 895 |
+
" <td>18.921309</td>\n",
|
| 896 |
+
" <td>72.784597</td>\n",
|
| 897 |
+
" <td></td>\n",
|
| 898 |
+
" </tr>\n",
|
| 899 |
+
" <tr>\n",
|
| 900 |
+
" <th>13</th>\n",
|
| 901 |
+
" <td>3</td>\n",
|
| 902 |
+
" <td>1</td>\n",
|
| 903 |
+
" <td>18.921309</td>\n",
|
| 904 |
+
" <td>72.794102</td>\n",
|
| 905 |
+
" <td></td>\n",
|
| 906 |
+
" </tr>\n",
|
| 907 |
+
" <tr>\n",
|
| 908 |
+
" <th>14</th>\n",
|
| 909 |
+
" <td>3</td>\n",
|
| 910 |
+
" <td>2</td>\n",
|
| 911 |
+
" <td>18.921309</td>\n",
|
| 912 |
+
" <td>72.803607</td>\n",
|
| 913 |
+
" <td></td>\n",
|
| 914 |
+
" </tr>\n",
|
| 915 |
+
" <tr>\n",
|
| 916 |
+
" <th>15</th>\n",
|
| 917 |
+
" <td>3</td>\n",
|
| 918 |
+
" <td>3</td>\n",
|
| 919 |
+
" <td>18.921309</td>\n",
|
| 920 |
+
" <td>72.813112</td>\n",
|
| 921 |
+
" <td>Cafe Coffee Day is a Food & Drink, King Plaza ...</td>\n",
|
| 922 |
+
" </tr>\n",
|
| 923 |
+
" </tbody>\n",
|
| 924 |
+
"</table>\n",
|
| 925 |
+
"</div>"
|
| 926 |
+
],
|
| 927 |
+
"text/plain": [
|
| 928 |
+
" row col latitude longitude \\\n",
|
| 929 |
+
"0 0 0 18.894330 72.784597 \n",
|
| 930 |
+
"1 0 1 18.894330 72.794102 \n",
|
| 931 |
+
"2 0 2 18.894330 72.803607 \n",
|
| 932 |
+
"3 0 3 18.894330 72.813112 \n",
|
| 933 |
+
"4 1 0 18.903323 72.784597 \n",
|
| 934 |
+
"5 1 1 18.903323 72.794102 \n",
|
| 935 |
+
"6 1 2 18.903323 72.803607 \n",
|
| 936 |
+
"7 1 3 18.903323 72.813112 \n",
|
| 937 |
+
"8 2 0 18.912316 72.784597 \n",
|
| 938 |
+
"9 2 1 18.912316 72.794102 \n",
|
| 939 |
+
"10 2 2 18.912316 72.803607 \n",
|
| 940 |
+
"11 2 3 18.912316 72.813112 \n",
|
| 941 |
+
"12 3 0 18.921309 72.784597 \n",
|
| 942 |
+
"13 3 1 18.921309 72.794102 \n",
|
| 943 |
+
"14 3 2 18.921309 72.803607 \n",
|
| 944 |
+
"15 3 3 18.921309 72.813112 \n",
|
| 945 |
+
"\n",
|
| 946 |
+
" Map Data \n",
|
| 947 |
+
"0 \n",
|
| 948 |
+
"1 Prongs Reef is a Natural, \n",
|
| 949 |
+
"2 United Services Club Golf Course is a Leisure ... \n",
|
| 950 |
+
"3 Indian Meterological Department is a Commercia... \n",
|
| 951 |
+
"4 \n",
|
| 952 |
+
"5 \n",
|
| 953 |
+
"6 Jagadish Canteen is a Food & Drink, Maratha St... \n",
|
| 954 |
+
"7 Indian Meterological Department is a Commercia... \n",
|
| 955 |
+
"8 \n",
|
| 956 |
+
"9 \n",
|
| 957 |
+
"10 Jagadish Canteen is a Food & Drink, Maratha St... \n",
|
| 958 |
+
"11 Cafe Coffee Day is a Food & Drink, King Plaza ... \n",
|
| 959 |
+
"12 \n",
|
| 960 |
+
"13 \n",
|
| 961 |
+
"14 \n",
|
| 962 |
+
"15 Cafe Coffee Day is a Food & Drink, King Plaza ... "
|
| 963 |
+
]
|
| 964 |
+
},
|
| 965 |
+
"execution_count": 107,
|
| 966 |
+
"metadata": {},
|
| 967 |
+
"output_type": "execute_result"
|
| 968 |
+
}
|
| 969 |
+
],
|
| 970 |
+
"source": [
|
| 971 |
+
"grid_df.head(20)"
|
| 972 |
+
]
|
| 973 |
+
}
|
| 974 |
+
],
|
| 975 |
+
"metadata": {
|
| 976 |
+
"kernelspec": {
|
| 977 |
+
"display_name": "Python 3",
|
| 978 |
+
"language": "python",
|
| 979 |
+
"name": "python3"
|
| 980 |
+
},
|
| 981 |
+
"language_info": {
|
| 982 |
+
"codemirror_mode": {
|
| 983 |
+
"name": "ipython",
|
| 984 |
+
"version": 3
|
| 985 |
+
},
|
| 986 |
+
"file_extension": ".py",
|
| 987 |
+
"mimetype": "text/x-python",
|
| 988 |
+
"name": "python",
|
| 989 |
+
"nbconvert_exporter": "python",
|
| 990 |
+
"pygments_lexer": "ipython3",
|
| 991 |
+
"version": "3.12.0"
|
| 992 |
+
}
|
| 993 |
+
},
|
| 994 |
+
"nbformat": 4,
|
| 995 |
+
"nbformat_minor": 2
|
| 996 |
+
}
|
src/main.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
######################################## IMPORTING REQUIRED LIBRARIES ####################################
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 5 |
+
data_folder = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data')
|
| 6 |
+
from utilities import get_data, input_filter, clean_data
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
################################################## INPUTS ################################################
|
| 10 |
+
|
| 11 |
+
left_lat = 18.889833
|
| 12 |
+
left_lon = 72.779844
|
| 13 |
+
dist = 35
|
| 14 |
+
|
| 15 |
+
def data_sourcing():
|
| 16 |
+
lat, lon = input_filter(lat = left_lat, lon=left_lon)
|
| 17 |
+
df = get_data(lat, lon, dist)
|
| 18 |
+
df.to_csv(f'{data_folder}/MMR_DATA.csv', index=False)
|
| 19 |
+
return df
|
| 20 |
+
|
| 21 |
+
def data_clean_for_training(df):
|
| 22 |
+
df = clean_data(df)
|
| 23 |
+
df.to_csv(f'{data_folder}/MMR_DATA_CLEAN.csv', index=False)
|
| 24 |
+
return df
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
if __name__ == '__main__':
|
| 28 |
+
|
| 29 |
+
df = data_sourcing() ## testing the data sourcing endpoint
|
| 30 |
+
if df:
|
| 31 |
+
print("Data loaded successfully !!")
|
| 32 |
+
|
| 33 |
+
clean_df = data_clean_for_training(df)
|
utilities/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .data_loader import get_data, input_filter
|
| 2 |
+
from .data_cleaner import clean_data
|
utilities/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (277 Bytes). View file
|
|
|
utilities/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (251 Bytes). View file
|
|
|
utilities/__pycache__/data_loader.cpython-311.pyc
ADDED
|
Binary file (10.1 kB). View file
|
|
|
utilities/__pycache__/data_loader.cpython-312.pyc
ADDED
|
Binary file (8.92 kB). View file
|
|
|
utilities/data_cleaner.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import nltk
|
| 3 |
+
from nltk.corpus import stopwords
|
| 4 |
+
from nltk.stem import PorterStemmer
|
| 5 |
+
from nltk.stem import WordNetLemmatizer
|
| 6 |
+
|
| 7 |
+
def clean_text(text):
|
| 8 |
+
nltk.download('stopwords')
|
| 9 |
+
nltk.download('wordnet')
|
| 10 |
+
|
| 11 |
+
stop_words = set(stopwords.words('english'))
|
| 12 |
+
stemmer = PorterStemmer()
|
| 13 |
+
lemmatizer = WordNetLemmatizer()
|
| 14 |
+
|
| 15 |
+
text = re.sub(r'[^\w\s]', '', text)
|
| 16 |
+
text = text.lower()
|
| 17 |
+
text = [word for word in text.split() if word not in stop_words]
|
| 18 |
+
text = [stemmer.stem(word) for word in text]
|
| 19 |
+
text = [lemmatizer.lemmatize(word) for word in text]
|
| 20 |
+
return ' '.join(text)
|
| 21 |
+
|
| 22 |
+
def clean_data(df):
|
| 23 |
+
df['Map Data'] = df['Map Data'].fillna('')
|
| 24 |
+
df = df[df['Map Data'].str.len() > 0]
|
| 25 |
+
df = df[df['Map Data'].str.len() < 5000]
|
| 26 |
+
# df['Map Data'] = df['Map Data'].apply(clean_text)
|
| 27 |
+
return df
|
utilities/data_loader.py
ADDED
|
@@ -0,0 +1,222 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import re
|
| 4 |
+
import math
|
| 5 |
+
from typing import Tuple, List, Dict
|
| 6 |
+
|
| 7 |
+
def fetch_osm_data(lat: float, lon: float, radius: int) -> List[Dict]:
|
| 8 |
+
overpass_url = "http://overpass-api.de/api/interpreter"
|
| 9 |
+
overpass_query = f"""
|
| 10 |
+
[out:json];
|
| 11 |
+
(
|
| 12 |
+
node["name"](around:{radius},{lat},{lon});
|
| 13 |
+
way["name"](around:{radius},{lat},{lon});
|
| 14 |
+
relation["name"](around:{radius},{lat},{lon});
|
| 15 |
+
);
|
| 16 |
+
out center;
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
response = requests.get(overpass_url, params={'data': overpass_query})
|
| 20 |
+
data = response.json()
|
| 21 |
+
return data['elements']
|
| 22 |
+
|
| 23 |
+
def determine_location_type(tags: Dict[str, str]) -> str:
|
| 24 |
+
# Residential
|
| 25 |
+
if 'building' in tags and tags['building'] in ['residential', 'house', 'apartments', 'detached', 'terrace', 'dormitory', 'bungalow']:
|
| 26 |
+
return 'Residential'
|
| 27 |
+
|
| 28 |
+
# Commercial
|
| 29 |
+
if any(key in tags for key in ['shop', 'office', 'craft']):
|
| 30 |
+
return 'Commercial'
|
| 31 |
+
if 'building' in tags and tags['building'] in ['commercial', 'office', 'retail', 'supermarket', 'kiosk']:
|
| 32 |
+
return 'Commercial'
|
| 33 |
+
|
| 34 |
+
# Industrial
|
| 35 |
+
if 'building' in tags and tags['building'] in ['industrial', 'warehouse', 'factory', 'manufacture']:
|
| 36 |
+
return 'Industrial'
|
| 37 |
+
if 'industrial' in tags or 'industry' in tags:
|
| 38 |
+
return 'Industrial'
|
| 39 |
+
|
| 40 |
+
# Educational
|
| 41 |
+
if 'amenity' in tags and tags['amenity'] in ['school', 'university', 'college', 'library', 'kindergarten', 'language_school']:
|
| 42 |
+
return 'Educational'
|
| 43 |
+
|
| 44 |
+
# Healthcare
|
| 45 |
+
if 'amenity' in tags and tags['amenity'] in ['hospital', 'clinic', 'doctors', 'dentist', 'pharmacy', 'veterinary']:
|
| 46 |
+
return 'Healthcare'
|
| 47 |
+
|
| 48 |
+
# Food & Drink
|
| 49 |
+
if 'amenity' in tags and tags['amenity'] in ['restaurant', 'cafe', 'bar', 'fast_food', 'pub', 'food_court']:
|
| 50 |
+
return 'Food & Drink'
|
| 51 |
+
|
| 52 |
+
# Leisure & Entertainment
|
| 53 |
+
if 'leisure' in tags or 'tourism' in tags:
|
| 54 |
+
return 'Leisure & Entertainment'
|
| 55 |
+
if 'amenity' in tags and tags['amenity'] in ['theatre', 'cinema', 'nightclub', 'arts_centre', 'community_centre']:
|
| 56 |
+
return 'Leisure & Entertainment'
|
| 57 |
+
|
| 58 |
+
# Transportation
|
| 59 |
+
if 'amenity' in tags and tags['amenity'] in ['parking', 'bicycle_parking', 'bus_station', 'ferry_terminal']:
|
| 60 |
+
return 'Transportation'
|
| 61 |
+
if 'highway' in tags or 'railway' in tags or 'aeroway' in tags:
|
| 62 |
+
return 'Transportation'
|
| 63 |
+
|
| 64 |
+
# Religious
|
| 65 |
+
if 'amenity' in tags and tags['amenity'] in ['place_of_worship', 'monastery']:
|
| 66 |
+
return 'Religious'
|
| 67 |
+
|
| 68 |
+
# Government & Public Services
|
| 69 |
+
if 'amenity' in tags and tags['amenity'] in ['townhall', 'courthouse', 'police', 'fire_station', 'post_office']:
|
| 70 |
+
return 'Government & Public Services'
|
| 71 |
+
|
| 72 |
+
# Parks & Recreation
|
| 73 |
+
if 'leisure' in tags and tags['leisure'] in ['park', 'playground', 'sports_centre', 'stadium', 'garden']:
|
| 74 |
+
return 'Parks & Recreation'
|
| 75 |
+
|
| 76 |
+
# Natural
|
| 77 |
+
if 'natural' in tags:
|
| 78 |
+
return 'Natural'
|
| 79 |
+
|
| 80 |
+
# Landuse
|
| 81 |
+
if 'landuse' in tags:
|
| 82 |
+
landuse = tags['landuse'].capitalize()
|
| 83 |
+
if landuse in ['Residential', 'Commercial', 'Industrial', 'Retail']:
|
| 84 |
+
return landuse
|
| 85 |
+
else:
|
| 86 |
+
return f'Landuse: {landuse}'
|
| 87 |
+
|
| 88 |
+
# If no specific category is found, return 'Other'
|
| 89 |
+
return 'Other'
|
| 90 |
+
|
| 91 |
+
def parse_osm_data(elements: List[Dict]) -> pd.DataFrame:
|
| 92 |
+
parsed_data = []
|
| 93 |
+
for element in elements:
|
| 94 |
+
tags = element.get('tags', {})
|
| 95 |
+
parsed_element = {
|
| 96 |
+
'ID': f"{element['type']}_{element['id']}",
|
| 97 |
+
'Location Name': tags.get('name', ''),
|
| 98 |
+
'Location Type': determine_location_type(tags)
|
| 99 |
+
}
|
| 100 |
+
parsed_data.append(parsed_element)
|
| 101 |
+
if len(parsed_data) == 0:
|
| 102 |
+
return pd.DataFrame(columns=['ID', 'Location Name', 'Location Type'])
|
| 103 |
+
return pd.DataFrame(parsed_data)
|
| 104 |
+
|
| 105 |
+
def get_osm_data(lat: float, lon: float, radius: int) -> pd.DataFrame:
|
| 106 |
+
raw_data = fetch_osm_data(lat, lon, radius)
|
| 107 |
+
return parse_osm_data(raw_data)
|
| 108 |
+
|
| 109 |
+
def dms_to_decimal(coord_str):
|
| 110 |
+
# Regular expression to match the coordinate format
|
| 111 |
+
pattern = r'(\d+)°(\d+)\'([\d.]+)"([NS])\s*(\d+)°(\d+)\'([\d.]+)"([EW])'
|
| 112 |
+
|
| 113 |
+
match = re.match(pattern, coord_str)
|
| 114 |
+
if not match:
|
| 115 |
+
raise ValueError("Invalid coordinate format. Expected format: 19°03'08.6\"N 72°54'06.0\"E")
|
| 116 |
+
|
| 117 |
+
lat_deg, lat_min, lat_sec, lat_dir, lon_deg, lon_min, lon_sec, lon_dir = match.groups()
|
| 118 |
+
|
| 119 |
+
# Convert to decimal degrees
|
| 120 |
+
lat = float(lat_deg) + float(lat_min)/60 + float(lat_sec)/3600
|
| 121 |
+
lon = float(lon_deg) + float(lon_min)/60 + float(lon_sec)/3600
|
| 122 |
+
|
| 123 |
+
# Adjust sign based on direction
|
| 124 |
+
if lat_dir == 'S':
|
| 125 |
+
lat = -lat
|
| 126 |
+
if lon_dir == 'W':
|
| 127 |
+
lon = -lon
|
| 128 |
+
|
| 129 |
+
return lat, lon
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def calculate_distant_points(lat: float, lon: float, distance: float) -> tuple:
|
| 133 |
+
# Earth's radius in meters
|
| 134 |
+
R = 6371000
|
| 135 |
+
|
| 136 |
+
# Convert latitude and longitude to radians
|
| 137 |
+
lat_rad = math.radians(lat)
|
| 138 |
+
lon_rad = math.radians(lon)
|
| 139 |
+
|
| 140 |
+
# Calculate the point with the same latitude (moving east-west)
|
| 141 |
+
delta_lon = distance / (R * math.cos(lat_rad))
|
| 142 |
+
lon1 = lon + math.degrees(delta_lon)
|
| 143 |
+
|
| 144 |
+
# Calculate the point with the same longitude (moving north-south)
|
| 145 |
+
delta_lat = distance / R
|
| 146 |
+
lat2 = lat + math.degrees(delta_lat)
|
| 147 |
+
|
| 148 |
+
return ((lat, lon1), (lat2, lon))
|
| 149 |
+
|
| 150 |
+
## 2d map grid (0,0) --> bottom left
|
| 151 |
+
|
| 152 |
+
def create_map_grid(bottom_left: Tuple[float, float], top_right: Tuple[float, float], rows: int, cols: int) -> List[List[Tuple[float, float]]]:
|
| 153 |
+
grid = []
|
| 154 |
+
lat_unit = (top_right[0] - bottom_left[0]) / rows
|
| 155 |
+
lon_unit = (top_right[1] - bottom_left[1]) / cols
|
| 156 |
+
|
| 157 |
+
for i in range(rows):
|
| 158 |
+
row = []
|
| 159 |
+
for j in range(cols):
|
| 160 |
+
lat = bottom_left[0] + i * lat_unit
|
| 161 |
+
lon = bottom_left[1] + j * lon_unit
|
| 162 |
+
lat = lat + lat_unit / 2
|
| 163 |
+
lon = lon + lon_unit / 2
|
| 164 |
+
row.append((lat, lon))
|
| 165 |
+
grid.append(row)
|
| 166 |
+
|
| 167 |
+
return grid
|
| 168 |
+
|
| 169 |
+
## entire pipeline
|
| 170 |
+
|
| 171 |
+
left_lat = 18.889833
|
| 172 |
+
left_lon = 72.779844
|
| 173 |
+
dist = 35
|
| 174 |
+
|
| 175 |
+
def input_filter(lat=None, lon=None, string=None):
|
| 176 |
+
if lat != None:
|
| 177 |
+
return (lat, lon)
|
| 178 |
+
elif string != None:
|
| 179 |
+
latitude, longitude = dms_to_decimal(string)
|
| 180 |
+
return (latitude, longitude)
|
| 181 |
+
else:
|
| 182 |
+
return None
|
| 183 |
+
|
| 184 |
+
def get_data(bottom_left_lat, bottom_left_lon, dist):
|
| 185 |
+
|
| 186 |
+
result = calculate_distant_points(bottom_left_lat, bottom_left_lon, 1000*dist)
|
| 187 |
+
|
| 188 |
+
top_right_lat = result[1][0]
|
| 189 |
+
top_right_lon = result[0][1]
|
| 190 |
+
grid = create_map_grid((left_lat, left_lon), (top_right_lat, top_right_lon), dist, dist)
|
| 191 |
+
|
| 192 |
+
grid_dataset = []
|
| 193 |
+
for i, row in enumerate(grid):
|
| 194 |
+
for j, point in enumerate(row):
|
| 195 |
+
result_df = get_osm_data(point[0], point[1], 710)
|
| 196 |
+
# print(result_df.head(3))
|
| 197 |
+
labelled_df = result_df[result_df['Location Type'] != 'Other']
|
| 198 |
+
labelled_df = labelled_df[labelled_df['Location Type'] != 'Religious']
|
| 199 |
+
labelled_df = labelled_df[labelled_df['Location Type'] != 'Transportation']
|
| 200 |
+
loc_types = []
|
| 201 |
+
for row in labelled_df.iterrows():
|
| 202 |
+
loc_type = (row[1]['Location Name'], row[1]['Location Type'])
|
| 203 |
+
if loc_type not in loc_types:
|
| 204 |
+
loc_types.append(loc_type)
|
| 205 |
+
|
| 206 |
+
labelled_df = pd.DataFrame(loc_types, columns=['Location Name', 'Location Type'])
|
| 207 |
+
|
| 208 |
+
row_of_dataset = ''
|
| 209 |
+
|
| 210 |
+
for row in labelled_df.iterrows():
|
| 211 |
+
row_text = row[1]['Location Name'] + ' is a ' + row[1]['Location Type']
|
| 212 |
+
row_of_dataset += row_text + '; '
|
| 213 |
+
## replacing any coma in the text with a blank space
|
| 214 |
+
|
| 215 |
+
row_of_dataset = row_of_dataset.replace(',', ' ')
|
| 216 |
+
|
| 217 |
+
grid_row = {"row": i, "col": j, "latitude": point[0], "longitude": point[1], "Map Data": row_of_dataset}
|
| 218 |
+
grid_dataset.append(grid_row)
|
| 219 |
+
|
| 220 |
+
grid_df = pd.DataFrame(grid_dataset)
|
| 221 |
+
return grid_df
|
| 222 |
+
# grid_df.to_csv('MMR_DATASET.csv', index=False)
|
utils/__init__.py
DELETED
|
File without changes
|