from pandas import *
In [2]:
tags = read_csv("./ml/tags.csv")
In [3]:
# Unit Of The Input Declared To Be Second
tags['parsed_tim'] = to_datetime(tags['timestamp'], unit = 's')
In [4]:
tags.head()
Out[4]:
In [5]:
greater_than_t = tags['parsed_tim'] > '2015-02-01'
In [6]:
selected_rows = tags[greater_than_t]
In [7]:
selected_rows.head()
Out[7]:
In [9]:
# Sort Values By Pasred_Time
tags.sort_values(by = 'parsed_tim', ascending = True)[ : 10]
Out[9]:
In [11]:
tags = read_csv("./ml/tags.csv")
tags.dtypes
Out[11]:
In [14]:
tags['parsed_time'] = to_datetime(tags['timestamp'], unit = 's')
In [17]:
tags['parsed_time'].dtype
# M8 is a data type for indicatng date type
Out[17]:
In [18]:
tags.sort_values(by = 'parsed_time', ascending = True)[ : 10]
Out[18]:
In [33]:
ratings = read_csv("./ml/ratings.csv")
average_rating = ratings[['movieId', 'rating']].groupby('movieId', as_index = False).mean()
In [34]:
average_rating.tail()
Out[34]:
In [35]:
movies = read_csv("./ml/movies.csv")
joined = movies.merge(average_rating, on = 'movieId', how = 'inner')
joined.head()
Out[35]:
In [36]:
joined.corr()
Out[36]:
In [37]:
joined.head()
Out[37]:
In [38]:
joined['year'] = joined['title'].str.extract(".*\((.*)\).*")
In [39]:
joined
Out[39]:
In [40]:
yearly_average = joined[['year','rating']].groupby('year', as_index = False).mean()
In [41]:
yearly_average.head()
Out[41]:
In [43]:
yearly_average[-20 : ].plot(x = 'year' , y = 'rating' , figsize = (15,10), grid = True)
Out[43]:
'Python Library > Pandas' 카테고리의 다른 글
Day 7. Machine Learning [ K - Means ] ( Local Clustering ) (0) | 2019.06.16 |
---|---|
Day 7. Machine Learning [ Decision Trees ] ( Weather Classification ) (0) | 2019.06.16 |
Day 6. String Operations with Pandas (0) | 2019.06.16 |
Day 6. Frequent operations with pandas -Summary (0) | 2019.06.16 |
Day 6. Frequent operations with pandas - merging (0) | 2019.06.15 |