!ls
'Day 1.introduction.ipynb'		     'Day 3.Pandas.ipynb'
'Day 2. numpy.ipynb'			      ml
'Day 2. satellite image data analyis.ipynb'   wifire
'Day 3.Movie Data Analysis.ipynb'
In [2]:
!ls ./ml
links.csv  movies.csv  ratings.csv  README.txt	tags.csv
In [4]:
!cat ./ml/movies.csv | wc -l
9743
In [6]:
!head -5 ./ml/movies.csv
In [7]:
!tail -5 ./ml/movies.csv
In [8]:
!head -5 ./ml/ratings.csv
In [10]:
from pandas import *
In [11]:
movies = read_csv('./ml/movies.csv', sep = ',')
In [12]:
type(movies)
Out[12]:
pandas.core.frame.DataFrame
In [14]:
movies.head(15)
Out[14]:
movieIdtitlegenres
01Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy
12Jumanji (1995)Adventure|Children|Fantasy
23Grumpier Old Men (1995)Comedy|Romance
34Waiting to Exhale (1995)Comedy|Drama|Romance
45Father of the Bride Part II (1995)Comedy
56Heat (1995)Action|Crime|Thriller
67Sabrina (1995)Comedy|Romance
78Tom and Huck (1995)Adventure|Children
89Sudden Death (1995)Action
910GoldenEye (1995)Action|Adventure|Thriller
1011American President, The (1995)Comedy|Drama|Romance
1112Dracula: Dead and Loving It (1995)Comedy|Horror
1213Balto (1995)Adventure|Animation|Children
1314Nixon (1995)Drama
1415Cutthroat Island (1995)Action|Adventure|Romance
In [20]:
tags = read_csv('./ml/tags.csv', sep = ',')
tags.head()
Out[20]:
userIdmovieIdtagtimestamp
0260756funny1445714994
1260756Highly quotable1445714996
2260756will ferrell1445714992
3289774Boxing story1445715207
4289774MMA1445715200
In [18]:
ratings = read_csv("./ml/ratings.csv", sep = ",", parse_dates =['timestamp'])
ratings.head()
Out[18]:
userIdmovieIdratingtimestamp
0114.0964982703
1134.0964981247
2164.0964982224
31475.0964983815
41505.0964982931
In [24]:
del ratings['timestamp']
del tags['timestamp']
In [25]:
row_0 = tags.iloc[0]
type(row_0)
Out[25]:
pandas.core.series.Series
In [26]:
row_0
Out[26]:
userId         2
movieId    60756
tag        funny
Name: 0, dtype: object
In [28]:
row_0.index
Out[28]:
Index(['userId', 'movieId', 'tag'], dtype='object')
In [29]:
row_0['userId']
Out[29]:
2
In [30]:
'rating' in row_0
Out[30]:
False
In [32]:
tags.head()
Out[32]:
userIdmovieIdtag
0260756funny
1260756Highly quotable
2260756will ferrell
3289774Boxing story
4289774MMA
In [33]:
tags.index
Out[33]:
RangeIndex(start=0, stop=3683, step=1)
In [34]:
tags.columns
Out[34]:
Index(['userId', 'movieId', 'tag'], dtype='object')
In [37]:
tags.iloc[[0, 11, 2000]]
Out[37]:
userIdmovieIdtag
0260756funny
1118431gangster
20004745450women
In [ ]:


+ Recent posts