In [1]:
import pandas as pd
import numpy as np
In [152]:
directory = "~/Desktop/specdata/"
pollut_data = pd.read_csv("~/Desktop/specdata/001.csv")
for i in range(2,333):
    if i <10:
        filename = "00"+"%i.csv"%i
    elif (10 <= i) & (i <100):
        filename = "0"+"%i.csv"%i
    else:
        filename = "%i.csv"%i
    path = directory + filename
    data = pd.read_csv(path)
    pollut_data = pd.merge(pollut_data,data, how = 'outer')
In [281]:
df = pollut_data.dropna()
In [282]:
df = df.set_index('Date')
df.head()
Out[282]:
sulfate nitrate ID
Date
2003-10-06 7.21 0.651 1
2003-10-12 5.99 0.428 1
2003-10-18 4.68 1.040 1
2003-10-24 3.47 0.363 1
2003-10-30 2.42 0.507 1
In [72]:
import matplotlib.pyplot as plt
%matplotlib notebook
In [91]:
df_new = df[['sulfate','nitrate']]
In [89]:
df.head()
Out[89]:
sulfate nitrate ID
Date
2003-10-06 7.21 0.651 1
2003-10-12 5.99 0.428 1
2003-10-18 4.68 1.040 1
2003-10-24 3.47 0.363 1
2003-10-30 2.42 0.507 1
In [74]:
plt.style.available
Out[74]:
['Solarize_Light2',
 'seaborn-pastel',
 'ggplot',
 'classic',
 'grayscale',
 'seaborn-poster',
 'seaborn-whitegrid',
 'fivethirtyeight',
 'bmh',
 'seaborn-dark',
 'seaborn-colorblind',
 'seaborn-talk',
 'seaborn-white',
 'seaborn-paper',
 'seaborn-deep',
 'seaborn-muted',
 'fast',
 'seaborn-darkgrid',
 'seaborn-notebook',
 '_classic_test',
 'seaborn-ticks',
 'seaborn-dark-palette',
 'dark_background',
 'seaborn',
 'seaborn-bright']
In [110]:
plt.style.use('seaborn-dark')
In [111]:
df_new.plot()
Out[111]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f8123960cf8>
In [117]:
df.plot.scatter("sulfate","nitrate",c='ID', colormap='viridis')
Out[117]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f806e4460b8>
In [122]:
ax = df_new.plot.box();
In [124]:
df_new.plot.kde(alpha=0.7);
In [125]:
import seaborn as sns
In [139]:
sns.jointplot?
In [154]:
df2 = pollut_data.dropna()
df2.head()
Out[154]:
Date sulfate nitrate ID
278 2003-10-06 7.21 0.651 1
284 2003-10-12 5.99 0.428 1
290 2003-10-18 4.68 1.040 1
296 2003-10-24 3.47 0.363 1
302 2003-10-30 2.42 0.507 1
In [155]:
df2['Year'] = df2['Date'].apply(lambda x: x[:4])
df2['Date2'] = df2['Date'].apply(lambda x: x[-5:])
/home/sabodhapati/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
/home/sabodhapati/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
In [156]:
df2.head()
Out[156]:
Date sulfate nitrate ID Year Date2
278 2003-10-06 7.21 0.651 1 2003 10-06
284 2003-10-12 5.99 0.428 1 2003 10-12
290 2003-10-18 4.68 1.040 1 2003 10-18
296 2003-10-24 3.47 0.363 1 2003 10-24
302 2003-10-30 2.42 0.507 1 2003 10-30
In [157]:
sulfate_mean = df2.groupby('Date2').agg({'sulfate':np.mean})
In [158]:
nitrate_mean = df2.groupby('Date2').agg({'nitrate':np.mean})
In [280]:
%matplotlib inline
plt.figure(figsize=(16,10))
color_gray = 'lightslategrey'
months = [d.strftime('%b') for d in pd.date_range('2017-01-01','2017-12-31' , freq='1M')]
plt.xticks(np.arange(15,365,30), months, color = color_gray)
plt.yticks(np.arange(0,8.5, 0.5), np.arange(0,8.5, 0.5), color = color_gray)
plt.plot(sulfate_mean, c = 'red', label ='Sulfate Mean Value',linewidth=1)
plt.plot(nitrate_mean, c = 'blue', label ='Nitrate Mean Value',linewidth=1)

plt.title('Mean Value of Sulfate/Nitrate in United States',fontsize=30)
plt.xlabel('Date',  fontsize=25)
plt.ylabel('Value',  fontsize=25)
plt.tick_params(top='off', bottom='off', left='on', right='off', labelleft='on', labelbottom='on', color = color_gray)

plt.legend(loc = 1, fontsize=15, frameon = False)


import mpl_toolkits.axes_grid1.inset_locator as mpl_il
ax2 = mpl_il.inset_axes(plt.gca(), width='40%', height='30%', loc=2);
ax2.hist(df['sulfate'], bins=1000);
ax2.margins(x=0.5);