from pandas import *
ratings = read_csv("./ml/ratings.csv")
movies = read_csv("./ml/movies.csv")
tags = read_csv("./ml/tags.csv")
#as_index = False generate new sequential indices
avg_ratings = ratings.groupby('movieId', as_index = False).mean()
avg_ratings.head()
del avg_ratings[ 'userId']
del avg_ratings[ 'timestamp']
In [7]:
avg_ratings.head()
Out[7]:
In [8]:
box_office = movies.merge(avg_ratings, on = 'movieId', how = 'inner')
box_office.tail()
Out[8]:
In [10]:
is_highly_rated = box_office['rating'] >= 4.0
is_highly_rated.head()
Out[10]:
In [13]:
box_office[is_highly_rated][ : 10]
Out[13]:
In [15]:
is_commedy = box_office['genres'].str.contains('Comedy')
box_office[is_commedy & is_highly_rated][-10 : ]
Out[15]:
'Python Library > Pandas' 카테고리의 다른 글
Day 6. Handling Timestamps with Pandas (0) | 2019.06.16 |
---|---|
Day 6. String Operations with Pandas (0) | 2019.06.16 |
Day 6. Frequent operations with pandas - merging (0) | 2019.06.15 |
Day 6. Frequent operations with pandas - aggregation (0) | 2019.06.15 |
Day 6. Frequent operations with pandas - subsetting, filtering, delegation (0) | 2019.06.15 |