1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 | # -*- coding: utf-8 -*- """Day 20_house_price.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/13AQ4xoIUMEpJA1zQHEV95Ebhh48pHccm """ from google.colab import drive drive.mount('/gdrive') PATH = "/gdrive/My Drive/Colab Notebooks/resources/" import matplotlib.pyplot as plt from tensorflow import keras import tensorflow as tf import numpy as np import pandas as pd print(tf.__version__) df = pd.read_csv(PATH + "house/train.csv") df[:2] df = df[['sqft_living','bedrooms', 'bathrooms', 'sqft_lot','floors','zipcode','price']] np.sum(df.isna()) ## 카테고리로 바꿀만한 컬럼 확인 df.apply(lambda x : x.value_counts().size, axis = 0) df[['bedrooms',"bathrooms", 'floors', 'zipcode']] = df[['bedrooms',"bathrooms", 'floors', 'zipcode']].astype('category') df2 = pd.get_dummies(df,prefix = ['bedrooms',"bathrooms", 'floors', 'zipcode']) df2[:2] ## 나중에 복원용 target_median = np.percentile(df2['price'], 50) target_Q1 = np.percentile(df2['price'], 25) target_Q3 = np.percentile(df2['price'], 75) target_IQR = target_Q3 - target_Q1 df.apply(lambda x : x.value_counts().size, axis = 0) from sklearn.preprocessing import RobustScaler df3 = df2.copy() df3[['sqft_living','sqft_lot','price']] = RobustScaler().fit_transform(df2[['sqft_living','sqft_lot','price']]) df3[:2] ydata = df3['price'] xdata = df3.drop(['price'],axis = 1) from sklearn.model_selection import train_test_split xtrain, xtest, ytrain, ytest = train_test_split(xdata, ydata) xtrain.shape , xtest.shape, ytrain.shape, ytest.shape ytrain = np.array(ytrain).reshape(-1,1) ytest = np.array(ytest).reshape(-1,1) x = tf.placeholder(tf.float32, shape = (None, 118)) y = tf.placeholder(tf.float32, shape = (None, 1)) w = tf.Variable(tf.random.normal([118,1]), name = "weight") b = tf.Variable(tf.random.normal([1]) , name = 'bias') hf = x @ w + b cost = tf.reduce_mean(tf.square(hf - y)) train = tf.train.GradientDescentOptimizer(1e-3).minimize(cost) sess = tf.Session() sess.run(tf.global_variables_initializer()) for step in range(50000): cv, hv, _ = sess.run([cost, hf, train], feed_dict = {x : xtrain , y : ytrain}) if step % 500 == 0: print(step, "cost : ", cv, "prediction : ", hv) if cv < 0.001: break import matplotlib.pyplot as plt import seaborn as sns plt.scatter(x = hv, y = ytrain) xy = np.linspace(-30, 10, 41) sns.lineplot(x = xy, y = xy) plt.xlim(-5, 10) predict = sess.run([hf], feed_dict = {x : xtest }) ## 예측 predict2 = predict[0] * target_IQR + target_median ytest2 = ytest * target_IQR + target_median plt.scatter(x = predict2, y = ytest2) xy = np.linspace(-0, 35e+10, 2) sns.lineplot(x = xy, y = xy) predict2.shape ytest.shape | cs |
'딥러닝 모델 설계 > Machine Learning' 카테고리의 다른 글
Day 15_Diabetes_House_XGBoost (0) | 2019.07.31 |
---|---|
Day 15_Model_Evaluation_diabetesDataset (0) | 2019.07.31 |
Day 13_bikeShare_DataSet (0) | 2019.07.25 |
Day 12_RandomForeset_MushroomDataset (0) | 2019.07.19 |
Day 11_Web Scraping (0) | 2019.07.17 |