from pandas import *
In [2]:
df = read_csv("./ml/movies.csv", sep = ",")
In [4]:
df.head()
Out[4]:
movieIdtitlegenres
01Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy
12Jumanji (1995)Adventure|Children|Fantasy
23Grumpier Old Men (1995)Comedy|Romance
34Waiting to Exhale (1995)Comedy|Drama|Romance
45Father of the Bride Part II (1995)Comedy
In [6]:
df.plot.bar()
Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x7ff7b2355e48>
In [8]:
df.plot.box()
Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x7ff7a3f3bef0>
In [9]:
df.plot.hist()
Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x7ff7a3feb0f0>
In [10]:
df.plot()
Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x7ff7a4357f60>
In [12]:
%matplotlib inline # % means it is a magic function
In [13]:
ratings = read_csv("./ml/ratings.csv", sep = ",")
ratings.head()
Out[13]:
userIdmovieIdratingtimestamp
0114.0964982703
1134.0964981247
2164.0964982224
31475.0964983815
41505.0964982931
In [14]:
ratings.describe()
Out[14]:
userIdmovieIdratingtimestamp
count100836.000000100836.000000100836.0000001.008360e+05
mean326.12756419435.2957183.5015571.205946e+09
std182.61849135530.9871991.0425292.162610e+08
min1.0000001.0000000.5000008.281246e+08
25%177.0000001199.0000003.0000001.019124e+09
50%325.0000002991.0000003.5000001.186087e+09
75%477.0000008122.0000004.0000001.435994e+09
max610.000000193609.0000005.0000001.537799e+09
In [15]:
ratings.hist(column = 'rating', figsize = (15,10))
Out[15]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7ff7a3dc8358>]],
      dtype=object)
In [16]:
ratings.boxplot(column = "rating", figsize = (15,20))
Out[16]:
<matplotlib.axes._subplots.AxesSubplot at 0x7ff793e789b0>
In [ ]:


+ Recent posts