tapply를 활용한 단어 빈도 조사

hellobird 2019. 5. 22. 14:55

#tapply(<vector> , <group condition> , func) wl = c("the", "is", "a", "the") doc1freq = c(3,4,2,4) doc2freq = rep(1,4) #tapply를 활용한 빈도표 tapply(doc1freq, wl, length) #wl의 각 인덱스가 그룹이 되고 그 그룹 각각이 length의 인자가 된다. tapply(doc2freq, wl, length) tapply(doc1freq, wl, sum) tapply(doc2freq, wl, sum) tapply(1:10, rep(1,10), sum) tapply(1:10 , 1:10 %% 2 == 1 , sum) #true 그룹과 false 그룹 두개에 대한 sum함수 적용. #문자열에 대한 적용 sent1 = c("earth", "to", "earth") sent2 = c("ashes", "to", "ashes") sent3 = c("dust", "to", "dust") #총 3개 문장에서 등장한 빈도 조사. rep(1,length(sent1)) # 벡터 1의 길이.(즉 단어의 수) c(rep(1,length(sent1)),rep(1,length(sent2)),rep(1,length(sent3))) #즉 1이 9개가 된다. myfreq = c(rep(1,length(sent1)),rep(1,length(sent2)),rep(1,length(sent3))) tapply(myfreq,c(sent1,sent2,sent3),sum ) # 9개의 1에 대해서 각각 3개의 sent변수에 있는 단어로 그룹화. df = as.data.frame(cbind(c(sent1,sent2,sent3), as.numeric(myfreq) )) as.numeric(df[0,2]) df %>% group_by(V1) %>% summarise(sum = n())

저작자표시 (새창열림)