import pandas as pd
import numpy as np
directory = "~/Desktop/specdata/"
pollut_data = pd.read_csv("~/Desktop/specdata/001.csv")
for i in range(2,333):
if i <10:
filename = "00"+"%i.csv"%i
elif (10 <= i) & (i <100):
filename = "0"+"%i.csv"%i
else:
filename = "%i.csv"%i
path = directory + filename
data = pd.read_csv(path)
pollut_data = pd.merge(pollut_data,data, how = 'outer')
df = pollut_data.dropna()
df = df.set_index('Date')
df.head()
import matplotlib.pyplot as plt
%matplotlib notebook
df_new = df[['sulfate','nitrate']]
df.head()
plt.style.available
plt.style.use('seaborn-dark')
df_new.plot()
df.plot.scatter("sulfate","nitrate",c='ID', colormap='viridis')
ax = df_new.plot.box();
df_new.plot.kde(alpha=0.7);
import seaborn as sns
sns.jointplot?
df2 = pollut_data.dropna()
df2.head()
df2['Year'] = df2['Date'].apply(lambda x: x[:4])
df2['Date2'] = df2['Date'].apply(lambda x: x[-5:])
df2.head()
sulfate_mean = df2.groupby('Date2').agg({'sulfate':np.mean})
nitrate_mean = df2.groupby('Date2').agg({'nitrate':np.mean})
%matplotlib inline
plt.figure(figsize=(16,10))
color_gray = 'lightslategrey'
months = [d.strftime('%b') for d in pd.date_range('2017-01-01','2017-12-31' , freq='1M')]
plt.xticks(np.arange(15,365,30), months, color = color_gray)
plt.yticks(np.arange(0,8.5, 0.5), np.arange(0,8.5, 0.5), color = color_gray)
plt.plot(sulfate_mean, c = 'red', label ='Sulfate Mean Value',linewidth=1)
plt.plot(nitrate_mean, c = 'blue', label ='Nitrate Mean Value',linewidth=1)
plt.title('Mean Value of Sulfate/Nitrate in United States',fontsize=30)
plt.xlabel('Date', fontsize=25)
plt.ylabel('Value', fontsize=25)
plt.tick_params(top='off', bottom='off', left='on', right='off', labelleft='on', labelbottom='on', color = color_gray)
plt.legend(loc = 1, fontsize=15, frameon = False)
import mpl_toolkits.axes_grid1.inset_locator as mpl_il
ax2 = mpl_il.inset_axes(plt.gca(), width='40%', height='30%', loc=2);
ax2.hist(df['sulfate'], bins=1000);
ax2.margins(x=0.5);