import pandas as pd
import numpy as np

directory = "~/Desktop/specdata/"
pollut_data = pd.read_csv("~/Desktop/specdata/001.csv")
for i in range(2,333):
    if i <10:
        filename = "00"+"%i.csv"%i
    elif (10 <= i) & (i <100):
        filename = "0"+"%i.csv"%i
    else:
        filename = "%i.csv"%i
    path = directory + filename
    data = pd.read_csv(path)
    pollut_data = pd.merge(pollut_data,data, how = 'outer')

df = pollut_data.dropna()

df = df.set_index('Date')
df.head()

import matplotlib.pyplot as plt
%matplotlib notebook

df_new = df[['sulfate','nitrate']]

df.head()

plt.style.available

['Solarize_Light2',
 'seaborn-pastel',
 'ggplot',
 'classic',
 'grayscale',
 'seaborn-poster',
 'seaborn-whitegrid',
 'fivethirtyeight',
 'bmh',
 'seaborn-dark',
 'seaborn-colorblind',
 'seaborn-talk',
 'seaborn-white',
 'seaborn-paper',
 'seaborn-deep',
 'seaborn-muted',
 'fast',
 'seaborn-darkgrid',
 'seaborn-notebook',
 '_classic_test',
 'seaborn-ticks',
 'seaborn-dark-palette',
 'dark_background',
 'seaborn',
 'seaborn-bright']

plt.style.use('seaborn-dark')

df_new.plot()

<matplotlib.axes._subplots.AxesSubplot at 0x7f8123960cf8>

df.plot.scatter("sulfate","nitrate",c='ID', colormap='viridis')

<matplotlib.axes._subplots.AxesSubplot at 0x7f806e4460b8>

ax = df_new.plot.box();

df_new.plot.kde(alpha=0.7);

import seaborn as sns

sns.jointplot?

df2 = pollut_data.dropna()
df2.head()

df2['Year'] = df2['Date'].apply(lambda x: x[:4])
df2['Date2'] = df2['Date'].apply(lambda x: x[-5:])

/home/sabodhapati/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
/home/sabodhapati/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

df2.head()

sulfate_mean = df2.groupby('Date2').agg({'sulfate':np.mean})

nitrate_mean = df2.groupby('Date2').agg({'nitrate':np.mean})

%matplotlib inline
plt.figure(figsize=(16,10))
color_gray = 'lightslategrey'
months = [d.strftime('%b') for d in pd.date_range('2017-01-01','2017-12-31' , freq='1M')]
plt.xticks(np.arange(15,365,30), months, color = color_gray)
plt.yticks(np.arange(0,8.5, 0.5), np.arange(0,8.5, 0.5), color = color_gray)
plt.plot(sulfate_mean, c = 'red', label ='Sulfate Mean Value',linewidth=1)
plt.plot(nitrate_mean, c = 'blue', label ='Nitrate Mean Value',linewidth=1)

plt.title('Mean Value of Sulfate/Nitrate in United States',fontsize=30)
plt.xlabel('Date',  fontsize=25)
plt.ylabel('Value',  fontsize=25)
plt.tick_params(top='off', bottom='off', left='on', right='off', labelleft='on', labelbottom='on', color = color_gray)

plt.legend(loc = 1, fontsize=15, frameon = False)


import mpl_toolkits.axes_grid1.inset_locator as mpl_il
ax2 = mpl_il.inset_axes(plt.gca(), width='40%', height='30%', loc=2);
ax2.hist(df['sulfate'], bins=1000);
ax2.margins(x=0.5);

	sulfate	nitrate	ID
Date
2003-10-06	7.21	0.651	1
2003-10-12	5.99	0.428	1
2003-10-18	4.68	1.040	1
2003-10-24	3.47	0.363	1
2003-10-30	2.42	0.507	1

	sulfate	nitrate	ID
Date
2003-10-06	7.21	0.651	1
2003-10-12	5.99	0.428	1
2003-10-18	4.68	1.040	1
2003-10-24	3.47	0.363	1
2003-10-30	2.42	0.507	1

	Date	sulfate	nitrate	ID
278	2003-10-06	7.21	0.651	1
284	2003-10-12	5.99	0.428	1
290	2003-10-18	4.68	1.040	1
296	2003-10-24	3.47	0.363	1
302	2003-10-30	2.42	0.507	1

	Date	sulfate	nitrate	ID	Year	Date2
278	2003-10-06	7.21	0.651	1	2003	10-06
284	2003-10-12	5.99	0.428	1	2003	10-12
290	2003-10-18	4.68	1.040	1	2003	10-18
296	2003-10-24	3.47	0.363	1	2003	10-24
302	2003-10-30	2.42	0.507	1	2003	10-30