From c860a556c8b4bc6ba483dc06734f5553ce540cca Mon Sep 17 00:00:00 2001 From: Chanin Nantasenamat <51851491+dataprofessor@users.noreply.github.com> Date: Tue, 7 Jul 2020 16:45:55 +0700 Subject: [PATCH] Add files via upload --- streamlit/basketball_app.py | 68 +++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 streamlit/basketball_app.py diff --git a/streamlit/basketball_app.py b/streamlit/basketball_app.py new file mode 100644 index 0000000..9dc7283 --- /dev/null +++ b/streamlit/basketball_app.py @@ -0,0 +1,68 @@ +import streamlit as st +import pandas as pd +import base64 +import matplotlib.pyplot as plt +import seaborn as sns +import numpy as np + +st.title('NBA Player Stats Explorer') + +st.markdown(""" +This app performs simple webscraping of NBA player stats data! +* **Python libraries:** base64, pandas, streamlit +* **Data source:** [Basketball-reference.com](https://www.basketball-reference.com/). +""") + +st.sidebar.header('User Input Features') +selected_year = st.sidebar.selectbox('Year', list(reversed(range(1950,2020)))) + +# Web scraping of NBA player stats +@st.cache +def load_data(year): + url = "https://www.basketball-reference.com/leagues/NBA_" + str(year) + "_per_game.html" + html = pd.read_html(url, header = 0) + df = html[0] + raw = df.drop(df[df.Age == 'Age'].index) # Deletes repeating headers in content + raw = raw.fillna(0) + playerstats = raw.drop(['Rk'], axis=1) + return playerstats +playerstats = load_data(selected_year) + +# Sidebar - Team selection +sorted_unique_team = sorted(playerstats.Tm.unique()) +selected_team = st.sidebar.multiselect('Team', sorted_unique_team, sorted_unique_team) + +# Sidebar - Position selection +unique_pos = ['C','PF','SF','PG','SG'] +selected_pos = st.sidebar.multiselect('Position', unique_pos, unique_pos) + +# Filtering data +df_selected_team = playerstats[(playerstats.Tm.isin(selected_team)) & (playerstats.Pos.isin(selected_pos))] + +st.header('Display Player Stats of Selected Team(s)') +st.write('Data Dimension: ' + str(df_selected_team.shape[0]) + ' rows and ' + str(df_selected_team.shape[1]) + ' columns.') +st.dataframe(df_selected_team) + +# Download NBA player stats data +# https://discuss.streamlit.io/t/how-to-download-file-in-streamlit/1806 +def filedownload(df): + csv = df.to_csv(index=False) + b64 = base64.b64encode(csv.encode()).decode() # strings <-> bytes conversions + href = f'Download CSV File' + return href + +st.markdown(filedownload(df_selected_team), unsafe_allow_html=True) + +# Heatmap +if st.button('Intercorrelation Heatmap'): + st.header('Intercorrelation Matrix Heatmap') + df_selected_team.to_csv('output.csv',index=False) + df = pd.read_csv('output.csv') + + corr = df.corr() + mask = np.zeros_like(corr) + mask[np.triu_indices_from(mask)] = True + with sns.axes_style("white"): + f, ax = plt.subplots(figsize=(7, 5)) + ax = sns.heatmap(corr, mask=mask, vmax=1, square=True) + st.pyplot()