from pandas import *
In [2]:
d = { 'one' : Series(['city_0', 'city_1']), 'two' : Series(['user_0', 'user_1'])}

df = DataFrame(d)
df
Out[2]:
onetwo
0city_0user_0
1city_1user_1
In [3]:
df['one'].str.split('_')
Out[3]:
0    [city, 0]
1    [city, 1]
Name: one, dtype: object
In [4]:
type(df['one'].str.split('_'))  #Series that contain indices and String List
Out[4]:
pandas.core.series.Series
In [6]:
df['one'].str.contains('1') #Check If There is '1' in each rows
Out[6]:
0    False
1     True
Name: one, dtype: bool
In [8]:
df['one'].str.replace('_', '##')
Out[8]:
0    city##0
1    city##1
Name: one, dtype: object
In [9]:
df['one'].str.extract('(_[0-9])')
Out[9]:
0
0_0
1_1
In [11]:
movies = read_csv("./ml/movies.csv")
In [12]:
movies.head()
Out[12]:
movieIdtitlegenres
01Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy
12Jumanji (1995)Adventure|Children|Fantasy
23Grumpier Old Men (1995)Comedy|Romance
34Waiting to Exhale (1995)Comedy|Drama|Romance
45Father of the Bride Part II (1995)Comedy
In [18]:
movies['genres'].str.split("|", expand = True).head()
Out[18]:
0123456789
0AdventureAnimationChildrenComedyFantasyNoneNoneNoneNoneNone
1AdventureChildrenFantasyNoneNoneNoneNoneNoneNoneNone
2ComedyRomanceNoneNoneNoneNoneNoneNoneNoneNone
3ComedyDramaRomanceNoneNoneNoneNoneNoneNoneNone
4ComedyNoneNoneNoneNoneNoneNoneNoneNoneNone
In [20]:
movie_genres = movies['genres'].str.split("|", expand = True)
movie_genres['isComedy'] = movies['genres'].str.contains('Comedy')
In [21]:
movie_genres[ : 10]
Out[21]:
0123456789isComedy
0AdventureAnimationChildrenComedyFantasyNoneNoneNoneNoneNoneTrue
1AdventureChildrenFantasyNoneNoneNoneNoneNoneNoneNoneFalse
2ComedyRomanceNoneNoneNoneNoneNoneNoneNoneNoneTrue
3ComedyDramaRomanceNoneNoneNoneNoneNoneNoneNoneTrue
4ComedyNoneNoneNoneNoneNoneNoneNoneNoneNoneTrue
5ActionCrimeThrillerNoneNoneNoneNoneNoneNoneNoneFalse
6ComedyRomanceNoneNoneNoneNoneNoneNoneNoneNoneTrue
7AdventureChildrenNoneNoneNoneNoneNoneNoneNoneNoneFalse
8ActionNoneNoneNoneNoneNoneNoneNoneNoneNoneFalse
9ActionAdventureThrillerNoneNoneNoneNoneNoneNoneNoneFalse
In [22]:
movies[:5]
Out[22]:
movieIdtitlegenres
01Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy
12Jumanji (1995)Adventure|Children|Fantasy
23Grumpier Old Men (1995)Comedy|Romance
34Waiting to Exhale (1995)Comedy|Drama|Romance
45Father of the Bride Part II (1995)Comedy
In [28]:
movies['title'].str.extract('.*\((.*)\).*', expand = True).head()
Out[28]:
0
01995
11995
21995
31995
41995
In [29]:
movies['year'] = movies['title'].str.extract('.*\((.*)\).*', expand = True)
In [30]:
movies.tail()
Out[30]:
movieIdtitlegenresyear
9737193581Black Butler: Book of the Atlantic (2017)Action|Animation|Comedy|Fantasy2017
9738193583No Game No Life: Zero (2017)Animation|Comedy|Fantasy2017
9739193585Flint (2017)Drama2017
9740193587Bungo Stray Dogs: Dead Apple (2018)Action|Animation2018
9741193609Andrew Dice Clay: Dice Rules (1991)Comedy1991
In [31]:


+ Recent posts