import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
In [6]:
data = pd.read_csv('./daily_weather.csv')
In [8]:
data.describe()
Out[8]:
In [9]:
data.isnull().any()
Out[9]:
In [24]:
data[data.isnull().any(axis = 1)].count()
Out[24]:
In [25]:
del data['number']
In [26]:
#Store number of rows
before_rows = data.shape[0]
before_rows
Out[26]:
In [28]:
data = data.dropna()
In [29]:
before_rows - data.shape[0]
Out[29]:
In [30]:
clean_data = data.copy()
In [31]:
#Turn Boolean Value To Integer My Multipling 1
clean_data['high_humidity_label'] = (clean_data['relative_humidity_3pm'] > 24.99 ) * 1
clean_data['high_humidity_label']
Out[31]:
In [36]:
# df[] = Serise , df[[]] = dataframe
y = clean_data[['high_humidity_label']].copy()
type(y)
Out[36]:
In [37]:
y.head()
Out[37]:
In [38]:
clean_data['relative_humidity_3pm'].head()
Out[38]:
In [40]:
data.columns
Out[40]:
In [41]:
morning_features = ['air_pressure_9am','air_temp_9am','avg_wind_direction_9am',
'avg_wind_speed_9am','max_wind_direction_9am','max_wind_speed_9am',
'rain_accumulation_9am','rain_duration_9am']
In [42]:
clean_data.columns
Out[42]:
In [43]:
x = clean_data[morning_features].copy()
In [44]:
x.columns
Out[44]:
In [45]:
y.columns
Out[45]:
In [46]:
# Take Two DataFrame And Split Those Into Four
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.33, random_state = 324 )
In [47]:
type(x_train)
Out[47]:
In [48]:
type(x_test)
Out[48]:
In [49]:
type(y_train)
Out[49]:
In [50]:
type(y_test)
Out[50]:
In [51]:
x_train.head()
Out[51]:
In [52]:
y_train.describe()
Out[52]:
In [53]:
humidity_classifier = DecisionTreeClassifier(max_leaf_nodes = 10 , random_state = 0)
humidity_classifier.fit(x_train, y_train)
Out[53]:
In [54]:
type(humidity_classifier)
Out[54]:
In [55]:
predictions = humidity_classifier.predict(x_test)
In [56]:
predictions[ : 10]
Out[56]:
In [57]:
y_test[ : 10]
Out[57]:
In [58]:
accuracy_score( y_true = y_test, y_pred = predictions )
Out[58]:
In [ ]:
'Python Library > Pandas' 카테고리의 다른 글
Day 7. Machine Learning [ Linear Regression ] ( European Soccer Data ) (0) | 2019.06.16 |
---|---|
Day 7. Machine Learning [ K - Means ] ( Local Clustering ) (0) | 2019.06.16 |
Day 6. Handling Timestamps with Pandas (0) | 2019.06.16 |
Day 6. String Operations with Pandas (0) | 2019.06.16 |
Day 6. Frequent operations with pandas -Summary (0) | 2019.06.16 |