from pandas import *
In [42]:
ratings.describe()
Out[42]:
userIdmovieIdrating
count100836.000000100836.000000100836.000000
mean326.12756419435.2957183.501557
std182.61849135530.9871991.042529
min1.0000001.0000000.500000
25%177.0000001199.0000003.000000
50%325.0000002991.0000003.500000
75%477.0000008122.0000004.000000
max610.000000193609.0000005.000000
In [44]:
ratings.corr()
Out[44]:
userIdmovieIdrating
userId1.0000000.006773-0.049348
movieId0.0067731.000000-0.004061
rating-0.049348-0.0040611.000000
In [48]:
ratings['rating'].describe()
Out[48]:
count    100836.000000
mean          3.501557
std           1.042529
min           0.500000
25%           3.000000
50%           3.500000
75%           4.000000
max           5.000000
Name: rating, dtype: float64
In [49]:
ratings['rating'].mean()
Out[49]:
3.501556983616962
In [50]:
ratings.mean()
Out[50]:
userId       326.127564
movieId    19435.295718
rating         3.501557
dtype: float64
In [51]:
ratings['rating'].min()
Out[51]:
0.5
In [52]:
ratings['rating'].std()
Out[52]:
1.0425292390605359
In [53]:
ratings['rating'].mode()  # what occur most frequantly
Out[53]:
0    4.0
dtype: float64
In [54]:
ratings.corr()
Out[54]:
userIdmovieIdrating
userId1.0000000.006773-0.049348
movieId0.0067731.000000-0.004061
rating-0.049348-0.0040611.000000
In [56]:
filter_l = ratings['rating'] > 5  # create boolean series
filter_l.any()                    #check if any true in series
Out[56]:
False
In [62]:
movies.shape
Out[62]:
(9742, 3)
In [63]:
movies.head()
Out[63]:
movieIdtitlegenres
01Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy
12Jumanji (1995)Adventure|Children|Fantasy
23Grumpier Old Men (1995)Comedy|Romance
34Waiting to Exhale (1995)Comedy|Drama|Romance
45Father of the Bride Part II (1995)Comedy
In [65]:
movies.isnull().any()
Out[65]:
movieId    False
title      False
genres     False
dtype: bool
In [66]:
ratings.shape
Out[66]:
(100836, 3)
In [67]:
ratings.isnull().any()
Out[67]:
userId     False
movieId    False
rating     False
dtype: bool
In [68]:
tags.shape
Out[68]:
(3683, 3)
In [69]:
tags.isnull().any()
Out[69]:
userId     False
movieId    False
tag        False
dtype: bool
In [70]:
tags = tags.dropna()
tags.head()
Out[70]:
userIdmovieIdtag
0260756funny
1260756Highly quotable
2260756will ferrell
3289774Boxing story
4289774MMA
In [ ]:
 


+ Recent posts