from pandas import *
In [2]:
tags = read_csv("./ml/tags.csv")
In [3]:
# Unit Of The Input Declared To Be Second
tags['parsed_tim'] = to_datetime(tags['timestamp'], unit = 's')
In [4]:
tags.head()
Out[4]:
userIdmovieIdtagtimestampparsed_tim
0260756funny14457149942015-10-24 19:29:54
1260756Highly quotable14457149962015-10-24 19:29:56
2260756will ferrell14457149922015-10-24 19:29:52
3289774Boxing story14457152072015-10-24 19:33:27
4289774MMA14457152002015-10-24 19:33:20
In [5]:
greater_than_t = tags['parsed_tim'] > '2015-02-01'
In [6]:
selected_rows = tags[greater_than_t]
In [7]:
selected_rows.head()
Out[7]:
userIdmovieIdtagtimestampparsed_tim
0260756funny14457149942015-10-24 19:29:54
1260756Highly quotable14457149962015-10-24 19:29:56
2260756will ferrell14457149922015-10-24 19:29:52
3289774Boxing story14457152072015-10-24 19:33:27
4289774MMA14457152002015-10-24 19:33:20
In [9]:
# Sort Values By Pasred_Time
tags.sort_values(by = 'parsed_tim', ascending = True)[ : 10]
Out[9]:
userIdmovieIdtagtimestampparsed_tim
17564743181Shakespeare11371793522006-01-13 19:09:12
22124746912Rita Hayworth can dance!11371793712006-01-13 19:09:31
16364742494Hungary11371794262006-01-13 19:10:26
16354742494Holocaust11371794262006-01-13 19:10:26
14974741836No DVD at Netflix11371794442006-01-13 19:10:44
19614744969In Netflix queue11371795632006-01-13 19:12:43
240947426242In Netflix queue11371795702006-01-13 19:12:50
241347427741In Netflix queue11371795872006-01-13 19:13:07
22314747025In Netflix queue11371795932006-01-13 19:13:13
248547441997In Netflix queue11371796032006-01-13 19:13:23
In [11]:
tags = read_csv("./ml/tags.csv")
tags.dtypes
Out[11]:
userId        int64
movieId       int64
tag          object
timestamp     int64
dtype: object
In [14]:
tags['parsed_time'] = to_datetime(tags['timestamp'], unit = 's')
In [17]:
tags['parsed_time'].dtype
# M8 is a data type for indicatng date type
Out[17]:
dtype('<M8[ns]')
In [18]:
tags.sort_values(by = 'parsed_time', ascending = True)[ : 10]
Out[18]:
userIdmovieIdtagtimestampparsed_time
17564743181Shakespeare11371793522006-01-13 19:09:12
22124746912Rita Hayworth can dance!11371793712006-01-13 19:09:31
16364742494Hungary11371794262006-01-13 19:10:26
16354742494Holocaust11371794262006-01-13 19:10:26
14974741836No DVD at Netflix11371794442006-01-13 19:10:44
19614744969In Netflix queue11371795632006-01-13 19:12:43
240947426242In Netflix queue11371795702006-01-13 19:12:50
241347427741In Netflix queue11371795872006-01-13 19:13:07
22314747025In Netflix queue11371795932006-01-13 19:13:13
248547441997In Netflix queue11371796032006-01-13 19:13:23
In [33]:
ratings = read_csv("./ml/ratings.csv")
average_rating = ratings[['movieId', 'rating']].groupby('movieId', as_index = False).mean()
In [34]:
average_rating.tail()
Out[34]:
movieIdrating
97191935814.0
97201935833.5
97211935853.5
97221935873.5
97231936094.0
In [35]:
movies = read_csv("./ml/movies.csv")
joined = movies.merge(average_rating, on = 'movieId', how = 'inner')
joined.head()
Out[35]:
movieIdtitlegenresrating
01Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy3.920930
12Jumanji (1995)Adventure|Children|Fantasy3.431818
23Grumpier Old Men (1995)Comedy|Romance3.259615
34Waiting to Exhale (1995)Comedy|Drama|Romance2.357143
45Father of the Bride Part II (1995)Comedy3.071429
In [36]:
joined.corr()
Out[36]:
movieIdrating
movieId1.0000000.027841
rating0.0278411.000000
In [37]:
joined.head()
Out[37]:
movieIdtitlegenresrating
01Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy3.920930
12Jumanji (1995)Adventure|Children|Fantasy3.431818
23Grumpier Old Men (1995)Comedy|Romance3.259615
34Waiting to Exhale (1995)Comedy|Drama|Romance2.357143
45Father of the Bride Part II (1995)Comedy3.071429
In [38]:
joined['year'] = joined['title'].str.extract(".*\((.*)\).*")
In [39]:
joined
Out[39]:
movieIdtitlegenresratingyear
01Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy3.9209301995
12Jumanji (1995)Adventure|Children|Fantasy3.4318181995
23Grumpier Old Men (1995)Comedy|Romance3.2596151995
34Waiting to Exhale (1995)Comedy|Drama|Romance2.3571431995
45Father of the Bride Part II (1995)Comedy3.0714291995
56Heat (1995)Action|Crime|Thriller3.9460781995
67Sabrina (1995)Comedy|Romance3.1851851995
78Tom and Huck (1995)Adventure|Children2.8750001995
89Sudden Death (1995)Action3.1250001995
910GoldenEye (1995)Action|Adventure|Thriller3.4962121995
1011American President, The (1995)Comedy|Drama|Romance3.6714291995
1112Dracula: Dead and Loving It (1995)Comedy|Horror2.4210531995
1213Balto (1995)Adventure|Animation|Children3.1250001995
1314Nixon (1995)Drama3.8333331995
1415Cutthroat Island (1995)Action|Adventure|Romance3.0000001995
1516Casino (1995)Crime|Drama3.9268291995
1617Sense and Sensibility (1995)Drama|Romance3.7761191995
1718Four Rooms (1995)Comedy3.7000001995
1819Ace Ventura: When Nature Calls (1995)Comedy2.7272731995
1920Money Train (1995)Action|Comedy|Crime|Drama|Thriller2.5000001995
2021Get Shorty (1995)Comedy|Crime|Thriller3.4943821995
2122Copycat (1995)Crime|Drama|Horror|Mystery|Thriller3.2222221995
2223Assassins (1995)Action|Crime|Thriller3.1250001995
2324Powder (1995)Drama|Sci-Fi3.1250001995
2425Leaving Las Vegas (1995)Drama|Romance3.6250001995
2526Othello (1995)Drama3.5000001995
2627Now and Then (1995)Children|Drama3.3333331995
2728Persuasion (1995)Drama|Romance4.2272731995
2829City of Lost Children, The (Cité des enfants p...Adventure|Drama|Fantasy|Mystery|Sci-Fi4.0131581995
2930Shanghai Triad (Yao a yao yao dao waipo qiao) ...Crime|Drama3.0000001995
..................
9694188189Sorry to Bother You (2018)Comedy|Fantasy|Sci-Fi4.5000002018
9695188301Ant-Man and the Wasp (2018)Action|Adventure|Comedy|Fantasy|Sci-Fi3.6666672018
9696188675Dogman (2018)Crime|Drama3.5000002018
9697188751Mamma Mia: Here We Go Again! (2018)Comedy|Romance4.5000002018
9698188797Tag (2018)Comedy4.0000002018
9699188833The Man Who Killed Don Quixote (2018)Adventure|Comedy|Fantasy4.5000002018
9700189043Boundaries (2018)Comedy|Drama2.5000002018
9701189111Spiral (2018)Documentary3.0000002018
9702189333Mission: Impossible - Fallout (2018)Action|Adventure|Thriller3.7500002018
9703189381SuperFly (2018)Action|Crime|Thriller2.5000002018
9704189547Iron Soldier (2010)Action|Sci-Fi1.0000002010
9705189713BlacKkKlansman (2018)Comedy|Crime|Drama2.5000002018
9706190183The Darkest Minds (2018)Sci-Fi|Thriller3.5000002018
9707190207Tilt (2011)Drama|Romance1.5000002011
9708190209Jeff Ross Roasts the Border (2017)Comedy4.0000002017
9709190213John From (2015)Drama1.0000002015
9710190215Liquid Truth (2017)Drama1.5000002017
9711190219Bunny (1998)Animation1.0000001998
9712190221Hommage à Zgougou (et salut à Sabine Mamou) (2...Documentary1.0000002002
9713191005Gintama (2017)Action|Adventure|Comedy|Sci-Fi4.5000002017
9714193565Gintama: The Movie (2010)Action|Animation|Comedy|Sci-Fi3.5000002010
9715193567anohana: The Flower We Saw That Day - The Movi...Animation|Drama3.0000002013
9716193571Silver Spoon (2014)Comedy|Drama4.0000002014
9717193573Love Live! The School Idol Movie (2015)Animation4.0000002015
9718193579Jon Stewart Has Left the Building (2015)Documentary3.5000002015
9719193581Black Butler: Book of the Atlantic (2017)Action|Animation|Comedy|Fantasy4.0000002017
9720193583No Game No Life: Zero (2017)Animation|Comedy|Fantasy3.5000002017
9721193585Flint (2017)Drama3.5000002017
9722193587Bungo Stray Dogs: Dead Apple (2018)Action|Animation3.5000002018
9723193609Andrew Dice Clay: Dice Rules (1991)Comedy4.0000001991

9724 rows × 5 columns

In [40]:
yearly_average = joined[['year','rating']].groupby('year', as_index = False).mean()
In [41]:
yearly_average.head()
Out[41]:
yearrating
019023.5000
119032.5000
219084.0000
319152.0000
419163.5625
In [43]:
yearly_average[-20 : ].plot(x = 'year' , y = 'rating' , figsize = (15,10), grid = True)
Out[43]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f2b533f77b8>


+ Recent posts