rbind, apply, sample,split, subset, names, merge

2019. 5. 16. 09:56

rbind(c(1,2,3),c(5,6,7))

x = data.frame(id=c(1,2),name=c('a','b'),stringsAsFactors = F)

str(x)

y = rbind(x,c(3,"c"))

cbind(c(1,2,3),c(5,6,7))

a = matrix(1:9, ncol = 3)

cbind(a, apply(a, 1,sum))

head(iris)

str(iris)

temp_iris = iris

library(dplyr)

#apply 행, 열 데이터를 받아서 함수를 통한후 그 결과를 벡터로 리턴.

apply(temp_iris %>% select(-Species), 2, sum) # 1~4열

apply(temp_iris[ , 1:4], 2, sum) # 1~4열 열단위 적용

rowSums(iris[ , 1:4]) #행단위 적용.

res = lapply(1:3,function(x) {x * 2}) # 1~3이 함수에 입력되고 리스트로 출력

res

res[[3]] #리스트 요소 접근

class(res)

res = unlist(res) #리스트를 벡터로

class(res)

x = list(a=1:3 , b=4:6)

lapply(x , mean)

lapply(temp_iris[,1:4], mean)

class(temp_iris[,1:4])

class(lapply(temp_iris[,1:4], mean))

class(as.data.frame(lapply(temp_iris[,1:4], mean)))

class(sapply(temp_iris[,1:4], mean)) # 결과를 벡터또는 메트릭스로.

sapply(iris,class)

sapply(iris[, 1:4],function(x) { x > 3 }) # 모든 데이터에 대헤서 함수를 적용

rep(1,10) # 1을 10번 반복 리핏.

#그룹 단위 작업 중요

tapply(1:10,rep(1,10), sum ) # 10개의 데이터 모두가 그룹 1번에 넣어짐.

tapply(1:10,1:10 %% 2 == 1, sum ) # true 그룹과 false 그룹으로 분류

tapply(iris$Sepal.Length, iris$Species , mean) # 종별 Sepal.Length 평균 확인

install.packages("doBy")

library(doBy)

summary(iris)

quantile(iris$Sepal.Length) # five numbers 최소 최대 1분위 ~4분위 확인 가능

seq(0, 1, 0.1 ) #시퀀스

seq(0, 1, by=0.1) #위와 같다

quantile(iris$Sepal.Length, seq(0,1, 0.1)) # 10분위수

summaryBy( Sepal.Length + Sepal.Width ~ Species, iris ) # 컬럼 ~ 기준

order(iris$Sepal.Length) # 14번에 있는 데이터가 가장 작다 , 인덱스로 리턴 된다 .

iris[order(iris$Sepal.Length),] #order로 나온 인덱스 번호를 행자릴 넣어서 그것을 기준으로 출력 한다.

iris[order(iris$Sepal.Length,iris$Sepal.Width,decreasing = TRUE ),] # 두번쨰 정렬 기준과 내림차순 적용.

orderBy( ~Sepal.Length + Sepal.Width ,iris) # 두개 기준으로 정렬

sample(1:45, 6) # 1~45수 중에 6개 샘플링 비복원 추출

#데이터 무작위 섞기 , 샘플링

sample(1:45, replace = TRUE) # 1~45수 중에 6개 샘플링 비복원 추출

NROW(iris)

iris[sample(NROW(iris), NROW(iris)),] #150개 행 무작위 섞기 df[벡터 ,벡터 ] 행벡터에 있는 수만큼의 행과 열벡터에 있는 벡터에 있는 수만큼의 열을 출력.

sampleBy( ~Species,frac=0.1 ,data=iris) #10퍼센트 만큼 추출 # 각 그룹별로 10%씩 샘플링.

///////////////////////

split(iris, iris$Species) // 종으로 구분해서 리스트로 출력

split(iris, iris$Species)[1] // 첫번째 리스트

class(split(iris, iris$Species))

subset(iris, Species=="setosa") ## 종이 setosa 인것만 추출 다양한 조건식 가능

subset(iris, Species=="setosa" & Sepal.Length > 5.0 )

subset(iris, select = Species) # 종만 추출

subset(iris, select = c(Species, Sepal.Length)) # 종과 꽃 받침 길이

subset(iris, select = -c(Species, Sepal.Length)) # 종과 꽃 받침 길이뺸 나머지

names(iris) #컬럼의 이름 출력

iris[ , names(iris) %in% c("Species", "Sepal.Length")] #종과 꽃 받침 길이

iris[ , !names(iris) %in% c("Species", "Sepal.Length")] #종과 꽃 받침 길이뺀 나머지

x = data.frame(name=c("a","b","c"), math=c(1,2,3))

y = data.frame(name=c("a","b","c"), eng =c(1,2,3))

cbind(x,y) #단순 연결

merge(x,y) #같은 컬럼 인석

x = data.frame(name=c("a","b","c"), math=c(1,2,3))

y = data.frame(name=c("a","b","d"), eng =c(1,2,3))

merge(x,y)

merge(x,y,all=TRUE)

x = c(5,2,1,4,3)

sort(x) #오름차순

sort(x, decreasing = T) # 내림 차순.

x = c(5,2,1,4,3)

order(x) #오름차순 제일 작은 수의 인덱스만을 알려준다

order(x, decreasing = T) # 내림 차순.

data = list()

class(data)

letters #a~z 까지 알파벳 출력

runif(10) # 0~10까지의 숫자 하나 랜덤출력

n = 10

for(c in 1:n){

data[[c]] = data.frame(index = c,

myChar = sample(letters, 1),

z=runif(1))

}

data

do.call(rbind,data)

install.packages("plyr")

library(plyr)

#2. ldply

ldply(data,rbind)

#3. rbind_list

install.packages("data.table")

library(data.table)

rbindlist(data)

#######################################

with(iris,{

mean(Sepal.Length)

})

########################################

which(iris$Species == "setosa") #조건이 참인 행들의 인덱스 벡터를 리턴.

which.min(iris$Sepal.Length) #최소값에 해당하는 값의 인덱스를 리턴.

which.max(iris$Sepal.Length)

x = c(1,1,2,2,2,3,3,1,3,3)

names(which.max(table(x)))

install.packages("RMySQL")

library(RMySQL)

con = dbConnect(MySQL(), user = "root" , password = "1234",

host = "127.0.0.1", dbname="rprogramming")

dbListTables(con)

df = dbGetQuery( con, "select * from rtest2" )

str(df)

class(df)

################

install.packages("mlbench")

library(mlbench)

data(Ozone)

head(Ozone)

# cex = 점 크기 , pch = 점 모양 , lab = 축 이름

plot(Ozone$V8, Ozone$V9, xlab = "Temp1" , ylab = "Temp2", main = "Ozone", pch = "*",cex = 2, col="#ff0000",col.axis="#0000ff", xlim = c(0,100), ylim = c(0,100)) #scatter plot x,y

help(par)

min(Ozone$V8, na.rm=T) #x축 길이 y축이 정할때 참고

min(Ozone$V9, na.rm=T)

max(Ozone$V8, na.rm=T)

max(Ozone$V9, na.rm=T)

str(cars)

plot(cars)

plot(cars, type="l") #선그래프

plot(cars, type="b") #선그래프 중복 확인 점그래프로 동시에 출력.

plot(cars, type="o") #선그래프 중복 확인 더 보기 쉽게

#스피드로 그룹화 -> 디스트 평균

#tapply

tapp_cars = tapply(cars$dist, cars$speed, mean)

table (tapp_cars)

plot(tapp_cars,xlab = "speed" , ylab = "dist", type = "o", cex = 0.5)

plot(tapp_cars,xlab = "speed" , ylab = "dist", type = "o", cex = 0.5, lty = "dashed")

par() #리셋 plots

myPar = par(mfrow =c(1,2) )# 1행 2열로 figure( MainFrame)를 나눈다.

plot( Ozone$V8, Ozone$V9, main="Ozone" )

plot( Ozone$V8, Ozone$V9, main="Ozone2")

par(myPar) # myPar를 설정하기 전으로 되돌아 간다.

///////////////////////////////////////ggplot2 /////////////

저작자표시

'딥러닝 모델 설계 > R STUDIO' 카테고리의 다른 글

라이브러리, ggplot x축 분할, Flip, Join, excel, discrete, fill, position (0)	2019.05.20
library, wordcloud, str_split, paste, nchar, str_replace, ggplot , wefare (0)	2019.05.17
R <---> Mysql 연동 (0)	2019.05.16
head, view , summary, dim. summarize, rename, mutate, group_by (0)	2019.05.15
패키지 추가, 조건 결합, 범주형, qplot, excel, filter, select, arrange (0)	2019.05.13

Software knowledge worth spreading

rbind, apply, sample,split, subset, names, merge

'딥러닝 모델 설계 > R STUDIO' 카테고리의 다른 글

+ Recent posts

티스토리툴바