tags = read_csv("./ml/tags.csv", sep = ",")
tags.head()
Out[26]:
In [27]:
tags.describe()
Out[27]:
In [28]:
tags.shape
Out[28]:
In [29]:
movies = read_csv("./ml/movies.csv", sep = ",")
movies.head()
Out[29]:
In [30]:
movies.shape
Out[30]:
In [31]:
ratings = read_csv("./ml/ratings.csv")
ratings.head()
Out[31]:
In [32]:
ratings.shape
Out[32]:
In [38]:
#ratings[1000:1010]
ratings[-10 : ] #last ten rows
Out[38]:
In [39]:
tag_counts = tags['tag'].value_counts() #counts values in tag column
tag_counts[ : 10] # most ten values
Out[39]:
In [40]:
tag_counts[-10 : ] #least ten values
Out[40]:
In [42]:
tag_counts['sci-fi'] # it's series type, so find value with idx
Out[42]:
In [44]:
tag_counts[ : 10].plot(kind = 'bar', figsize = (15, 10))
Out[44]:
In [45]:
is_highly_rated = ratings['rating'] >= 4.0
is_highly_rated.head()
Out[45]:
In [46]:
ratings[is_highly_rated][-5 : ]
Out[46]:
In [49]:
is_animation = movies['genres'].str.contains('Animation')
is_animation.head()
Out[49]:
In [50]:
movies[is_animation][5:15]
Out[50]:
In [51]:
ratings_count = ratings[['movieId', 'rating']].groupby('rating').count()
ratings_count.head()
Out[51]:
In [54]:
average_rating = ratings[['movieId', 'rating']].groupby('movieId').mean()
average_rating.tail()
Out[54]:
In [55]:
movie_count = ratings[['movieId', 'rating']].groupby('movieId').count()
movie_count.head() #how many movies in the movie.csv
Out[55]:
In [ ]:
'Python Library > Pandas' 카테고리의 다른 글
Day 6. Frequent operations with pandas -Summary (0) | 2019.06.16 |
---|---|
Day 6. Frequent operations with pandas - merging (0) | 2019.06.15 |
Day 6. Frequent operations with pandas - subsetting, filtering, delegation (0) | 2019.06.15 |
Day 6. Simple visualization with pandas (0) | 2019.06.15 |
Day 6.Movie Data Analysis Part.2 (0) | 2019.06.15 |