Spaces:
Sleeping
Sleeping
Commit
·
1662e26
1
Parent(s):
28aa6d6
feat: streamlit-app
Browse files- src/app.py +40 -0
- src/main.py +4 -4
- utilities/__pycache__/data_loader.cpython-312.pyc +0 -0
- utilities/data_loader.py +4 -4
src/app.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
######################################## IMPORTING REQUIRED LIBRARIES ####################################
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import streamlit as st
|
| 6 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 7 |
+
data_folder = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data')
|
| 8 |
+
from utilities import get_data, input_filter, clean_data, autogenerate_labels
|
| 9 |
+
|
| 10 |
+
def data_sourcing(left_lat, left_lon, dist, loc_name):
|
| 11 |
+
lat, lon = input_filter(lat = left_lat, lon=left_lon)
|
| 12 |
+
df = get_data(lat, lon, dist)
|
| 13 |
+
df.to_csv(f'{data_folder}/LOCATION_{loc_name}_DATA.csv', index=False)
|
| 14 |
+
return df
|
| 15 |
+
|
| 16 |
+
def data_clean_for_training(df):
|
| 17 |
+
df = clean_data(df)
|
| 18 |
+
df.to_csv(f'{data_folder}/MMR_DATA_CLEAN.csv', index=False)
|
| 19 |
+
return df
|
| 20 |
+
|
| 21 |
+
st.title("Map Data Analysis - ETL Pipeline")
|
| 22 |
+
|
| 23 |
+
left_lat = st.number_input("Enter the left latitude", value=18.889833)
|
| 24 |
+
left_lon = st.number_input("Enter the left longitude", value=72.779844)
|
| 25 |
+
print(left_lat, left_lon)
|
| 26 |
+
loc_name = st.text_input("Enter the location name", value="Mumbai")
|
| 27 |
+
dist = st.number_input("Enter the distance", value=35)
|
| 28 |
+
|
| 29 |
+
if st.button("Run ETL Pipeline"):
|
| 30 |
+
df = data_sourcing(left_lat, left_lon, dist)
|
| 31 |
+
if df:
|
| 32 |
+
st.write("Data loaded successfully !!")
|
| 33 |
+
|
| 34 |
+
df = clean_data(df)
|
| 35 |
+
labelled_df, embeddings_df = autogenerate_labels(df)
|
| 36 |
+
|
| 37 |
+
labelled_df.to_csv(f'{data_folder}/DATA_{loc_name}_CLEAN_LABELLED.csv', index=False)
|
| 38 |
+
embeddings_df.to_csv(f'{data_folder}/DATA_{loc_name}_CLEAN_EMBEDDINGS.csv', index=False)
|
| 39 |
+
|
| 40 |
+
st.write("ETL Pipeline executed successfully !!")
|
src/main.py
CHANGED
|
@@ -9,11 +9,11 @@ from utilities import get_data, input_filter, clean_data, autogenerate_labels
|
|
| 9 |
|
| 10 |
################################################## INPUTS ################################################
|
| 11 |
|
| 12 |
-
left_lat = 18.889833
|
| 13 |
-
left_lon = 72.779844
|
| 14 |
-
dist = 35
|
| 15 |
|
| 16 |
-
def data_sourcing():
|
| 17 |
lat, lon = input_filter(lat = left_lat, lon=left_lon)
|
| 18 |
df = get_data(lat, lon, dist)
|
| 19 |
df.to_csv(f'{data_folder}/MMR_DATA.csv', index=False)
|
|
|
|
| 9 |
|
| 10 |
################################################## INPUTS ################################################
|
| 11 |
|
| 12 |
+
# left_lat = 18.889833
|
| 13 |
+
# left_lon = 72.779844
|
| 14 |
+
# dist = 35
|
| 15 |
|
| 16 |
+
def data_sourcing(left_lat, left_lon, dist):
|
| 17 |
lat, lon = input_filter(lat = left_lat, lon=left_lon)
|
| 18 |
df = get_data(lat, lon, dist)
|
| 19 |
df.to_csv(f'{data_folder}/MMR_DATA.csv', index=False)
|
utilities/__pycache__/data_loader.cpython-312.pyc
CHANGED
|
Binary files a/utilities/__pycache__/data_loader.cpython-312.pyc and b/utilities/__pycache__/data_loader.cpython-312.pyc differ
|
|
|
utilities/data_loader.py
CHANGED
|
@@ -168,9 +168,9 @@ def create_map_grid(bottom_left: Tuple[float, float], top_right: Tuple[float, fl
|
|
| 168 |
|
| 169 |
## entire pipeline
|
| 170 |
|
| 171 |
-
left_lat = 18.889833
|
| 172 |
-
left_lon = 72.779844
|
| 173 |
-
dist = 35
|
| 174 |
|
| 175 |
def input_filter(lat=None, lon=None, string=None):
|
| 176 |
if lat != None:
|
|
@@ -187,7 +187,7 @@ def get_data(bottom_left_lat, bottom_left_lon, dist):
|
|
| 187 |
|
| 188 |
top_right_lat = result[1][0]
|
| 189 |
top_right_lon = result[0][1]
|
| 190 |
-
grid = create_map_grid((
|
| 191 |
|
| 192 |
grid_dataset = []
|
| 193 |
for i, row in enumerate(grid):
|
|
|
|
| 168 |
|
| 169 |
## entire pipeline
|
| 170 |
|
| 171 |
+
# left_lat = 18.889833
|
| 172 |
+
# left_lon = 72.779844
|
| 173 |
+
# dist = 35
|
| 174 |
|
| 175 |
def input_filter(lat=None, lon=None, string=None):
|
| 176 |
if lat != None:
|
|
|
|
| 187 |
|
| 188 |
top_right_lat = result[1][0]
|
| 189 |
top_right_lon = result[0][1]
|
| 190 |
+
grid = create_map_grid((bottom_left_lat, bottom_left_lon), (top_right_lat, top_right_lon), dist, dist)
|
| 191 |
|
| 192 |
grid_dataset = []
|
| 193 |
for i, row in enumerate(grid):
|