Load data from CSV and plot using Seaborn barplot method
For my PDS class!
%matplotlib inline
import mysql.connector
from datetime import date, datetime
import sys
import pandas as pd
import seaborn as sns
user,pw, host,db = 'root','it8701','127.0.0.1','anotherdatabase2'
cnx = mysql.connector.connect(user=user, password=pw, host=host, database=db)
cursor = cnx.cursor()
select_stmt = ("SELECT * FROM cea_salespersons")
try:
cursor.execute(select_stmt)
df = pd.DataFrame(cursor.fetchall(), columns = ['cea_salesperson_id','salesperson_name', 'registration_no',
'registration_start_date', 'registration_end_date',
'estate_agent_name','estate_agent_license_no'])
df["registration_start_date"] = pd.to_datetime(df["registration_start_date"], errors="coerce")
# Create a new DataFrameGroupBy object that groups the data by registration year
group_by_years = df.groupby(df["registration_start_date"].dt.year)
# Let's count how many real estate agents registered in each year
group_by_years_count = group_by_years.count()
# We have too many columns to display, let's reduce the number of columns to just one
group_by_years_count = group_by_years_count[["cea_salesperson_id"]]
# rename the column so it looks more proper on the barplot later
group_by_years_count.rename(columns={'cea_salesperson_id':'count'},inplace=True)
# currently the index is the year, but we want to pass it as x value later, so reset it
group_by_years_count.reset_index(inplace=True)
# https://seaborn.pydata.org/generated/seaborn.barplot.html#seaborn.barplot
sns.set(style="whitegrid")
ax = sns.barplot(x="registration_start_date", y="count", data=group_by_years_count)
except:
print("Unexpected error:", sys.exc_info()[0])
print("Unexpected error:", sys.exc_info()[1])
exit()
finally:
cursor.close()
cnx.close()
import mysql.connector
from datetime import date, datetime
import sys
import pandas as pd
import seaborn as sns
user,pw, host,db = 'root','it8701','127.0.0.1','anotherdatabase2'
cnx = mysql.connector.connect(user=user, password=pw, host=host, database=db)
cursor = cnx.cursor()
select_stmt = ("SELECT * FROM cea_salespersons")
try:
cursor.execute(select_stmt)
df = pd.DataFrame(cursor.fetchall(), columns = ['cea_salesperson_id','salesperson_name', 'registration_no',
'registration_start_date', 'registration_end_date',
'estate_agent_name','estate_agent_license_no'])
df["registration_start_date"] = pd.to_datetime(df["registration_start_date"], errors="coerce")
# Create a new DataFrameGroupBy object that groups the data by registration year
group_by_years = df.groupby(df["registration_start_date"].dt.year)
# Let's count how many real estate agents registered in each year
group_by_years_count = group_by_years.count()
# We have too many columns to display, let's reduce the number of columns to just one
group_by_years_count = group_by_years_count[["cea_salesperson_id"]]
# rename the column so it looks more proper on the barplot later
group_by_years_count.rename(columns={'cea_salesperson_id':'count'},inplace=True)
# currently the index is the year, but we want to pass it as x value later, so reset it
group_by_years_count.reset_index(inplace=True)
# https://seaborn.pydata.org/generated/seaborn.barplot.html#seaborn.barplot
sns.set(style="whitegrid")
ax = sns.barplot(x="registration_start_date", y="count", data=group_by_years_count)
except:
print("Unexpected error:", sys.exc_info()[0])
print("Unexpected error:", sys.exc_info()[1])
exit()
finally:
cursor.close()
cnx.close()
This is the graph produced
Comments
Post a Comment