#subsetting , filtering , insertion, deletion, aggregation
In [2]:
from pandas import *
In [3]:
df = read_csv("./ml/movies.csv", sep = ",")
df.head()
Out[3]:
movieIdtitlegenres
01Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy
12Jumanji (1995)Adventure|Children|Fantasy
23Grumpier Old Men (1995)Comedy|Romance
34Waiting to Exhale (1995)Comedy|Drama|Romance
45Father of the Bride Part II (1995)Comedy
In [6]:
df[['title', 'genres']].head()  # extract specific coulmns
Out[6]:
titlegenres
0Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy
1Jumanji (1995)Adventure|Children|Fantasy
2Grumpier Old Men (1995)Comedy|Romance
3Waiting to Exhale (1995)Comedy|Drama|Romance
4Father of the Bride Part II (1995)Comedy
In [9]:
df[ df['movieId'] > 20 ].head()  #filtering out based on conditions
Out[9]:
movieIdtitlegenres
2021Get Shorty (1995)Comedy|Crime|Thriller
2122Copycat (1995)Crime|Drama|Horror|Mystery|Thriller
2223Assassins (1995)Action|Crime|Thriller
2324Powder (1995)Drama|Sci-Fi
2425Leaving Las Vegas (1995)Drama|Romance
In [10]:
df['movieId2'] = df['movieId'] + 1  #adding a new column
df.head()
Out[10]:
movieIdtitlegenresmovieId2
01Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy2
12Jumanji (1995)Adventure|Children|Fantasy3
23Grumpier Old Men (1995)Comedy|Romance4
34Waiting to Exhale (1995)Comedy|Drama|Romance5
45Father of the Bride Part II (1995)Comedy6
In [14]:
df.loc[0] = [1, "newRow", "newGenres",None] #replacing first row with new contents
df.head()
Out[14]:
movieIdtitlegenresmovieId2
01newRownewGenresNaN
12Jumanji (1995)Adventure|Children|Fantasy3.0
23Grumpier Old Men (1995)Comedy|Romance4.0
34Waiting to Exhale (1995)Comedy|Drama|Romance5.0
45Father of the Bride Part II (1995)Comedy6.0
In [19]:
df = df.drop(df.index[[0]])  #drop rows
df.head()
Out[19]:
movieIdtitlegenresmovieId2
12Jumanji (1995)Adventure|Children|Fantasy3.0
23Grumpier Old Men (1995)Comedy|Romance4.0
34Waiting to Exhale (1995)Comedy|Drama|Romance5.0
45Father of the Bride Part II (1995)Comedy6.0
56Heat (1995)Action|Crime|Thriller7.0
In [20]:
del df['movieId2'] #delete a column
df.head()
Out[20]:
movieIdtitlegenres
12Jumanji (1995)Adventure|Children|Fantasy
23Grumpier Old Men (1995)Comedy|Romance
34Waiting to Exhale (1995)Comedy|Drama|Romance
45Father of the Bride Part II (1995)Comedy
56Heat (1995)Action|Crime|Thriller
In [24]:
df['groupName'] = df['movieId'] % 10
df.head()
Out[24]:
movieIdtitlegenresgroupName
12Jumanji (1995)Adventure|Children|Fantasy2
23Grumpier Old Men (1995)Comedy|Romance3
34Waiting to Exhale (1995)Comedy|Drama|Romance4
45Father of the Bride Part II (1995)Comedy5
56Heat (1995)Action|Crime|Thriller6
In [25]:
df.groupby('groupName').mean()
Out[25]:
movieId
groupName
042215.188199
144646.119171
240424.210953
342186.694530
440383.010989
545821.195762
639748.518363
744495.280773
840300.453027
941742.014706
In [ ]:


+ Recent posts