라이브러리, ggplot x축 분할, Flip, Join, excel, discrete, fill, position

2019. 5. 20. 14:07

library(foreign)

library(readxl)

library(ggplot2)

library(dplyr)

install.packages("ggiraphExtra")

library(ggiraphExtra)

raw_welfare<-read.spss(file="koweps.sav", to.data.frame = T)

welfare<-raw_welfare

str(welfare)

View(welfare)

summary(welfare)

wf = welfare

wf = rename(

welfare,

gender = h10_g3,

birth = h10_g4,

marriage = h10_g10,

religion = h10_g11,

income = p1002_8aq1,

code_job = h10_eco9,

code_region = h10_reg7

)

wf$age = 2019 - wf$birth + 1

wf = wf %>% mutate(ag = ifelse(age < 30, "young",

ifelse(age < 59, "middle",

"old")))

table(wf$ag)

qplot(wf$ag)

boxplot(wf$age)$stats

wf_Ic = wf %>%

filter(!is.na(income)) %>%

group_by(ag) %>%

summarise(meanIc = mean(income))

wf$gender<-ifelse(wf$gender==1,"male","female")

#나이순으로 정렬

ggplot(wf , aes(x = ag , y = income)) +

geom_col() +

scale_x_discrete(limits = c("young", "middle", "old"))

#성별과 연령대 별로 차이가 있는지?

gen_Ic = wf %>%

filter(!is.na(income)) %>%

group_by(ag, gender) %>%

summarise(meanIc = mean(income))

ggplot(data=gen_Ic, aes(x=ag, y=meanIc, fill=gender))+

geom_col(position = "dodge")+

scale_x_discrete(limits=c("young", "middle", "old"))

str(gen_Ic)

head(gen_Ic)

dim(gen_Ic)

gen_age = wf %>%

filter(!is.na(income)) %>%

group_by(age, gender) %>%

summarise(meanIc = mean(income))

head(gen_age)

ggplot(data=gen_age, aes(x = age , y = meanIc, col = gender)) +

geom_line()

#어떤 직업이 가장 많은 급여 ?

table(is.na(wf$code_job))

install.packages("raedxl")

library(readxl)

#첫번쨰 줄은 헤드로 , 2번째 시트를 읽기

list_jobs = read_excel("Data/Koweps_Codebook.xlsx",col_names = T, sheet = 2)

dim(list_jobs)

str(list_jobs)

wf$code_job

#list_jobs와 wf를 code_job을 기준으로 조인

wf = left_join(wf, list_jobs, id="code_job")

wf %>%

filter(!is.na(code_job)) %>%

select(code_job, job) %>%

head

#직업별 급여 평균 출력

#직업에 따른 평균 급여 추출 (NA포함 )

tapply(wf$income, wf$job, mean)

#직업에 따른 평균 급여 추출

jobIc = wf %>%

filter(!is.na(income) & !is.na(job)) %>%

group_by(job) %>%

summarise(meanIc = mean(income))

#탑텐 추출

top10 = jobIc %>%

arrange(desc(meanIc)) %>%

head(10)

top10

#가로막대화

ggplot(top10 , aes( x = job , y = meanIc)) +

coord_flip() +

geom_col()

#x축 막대의 길이에 따라서 정렬. job은 연속형 변수여서는 안되고 펙터 형태여야 한다.

#reorder(정렬대상변수(펙터) , 연속형 변수) : 즉 job을 meanIc에 대해서 정렬한후 출력하라.

ggplot(top10 , aes( x = reorder(job,meanIc), y = meanIc)) +

coord_flip() +

geom_col()

#오름차순 형태로 출력. 수치형 데이터 앞에 -를 붙일것.

ggplot(top10 , aes( x = reorder(job,-meanIc), y = meanIc)) +

coord_flip() +

geom_col()

#하위 10위에 해당하는 직업 추출후 직업명 , 평균임금 출력.

str(wf$income)

table(is.na(wf$income))

bottom_10 = jobIc %>%

arrange(meanIc) %>%

head(10)

bottom_10

ggplot(bottom_10, aes(x = reorder(job, -meanIc), y = meanIc)) +

coord_flip() +

geom_col() +

ylim(0, 150)

#남성 직업 -> 상위 10개 추출

#남성 직업의 빈도 10개

job_female = welfare %>%

filter(!is.na(job) & gender == "female") %>%

group_by(job) %>%

summarise(n=n()) %>%

arrange(desc(n)) %>%

head(10)

job_male = welfare %>%

filter(!is.na(job) & gender == "male") %>%

group_by(job) %>%

summarise(n=n()) %>%

arrange(desc(n)) %>%

head(10)

#종교의 유무에 따른 이혼율?

class(wf$religion)

table(wf$religion)

wf$religion = ifelse( wf$religion == 1 , "yes", "no")

table(wf$religion)

qplot(wf$religion)

#혼인상태

table(wf$marriage)

wf$group_marrage = ifelse(wf$marriage == 1, "marriage", ifelse(

wf$marriage == 3, "divorce", NA

))

table(wf$group_marrage) #주의!!! table함수는 NA가 나오지 않는다!!!!!!

table(is.na(wf$group_marrage))

qplot(wf$group_marrage)

round(5/8, 1) #숫자 반올림, 소수이하 1째자리 까지.

religion_marriage = wf %>%

filter(!is.na(group_marrage)) %>%

group_by(religion, group_marrage) %>%

summarise(n = n()) %>%

mutate(tot_group = sum(n)) %>% # 그룹별 n의 합.

mutate(pct = round(n/tot_group * 100,1))

religion_marriage

저작자표시 (새창열림)

'딥러닝 모델 설계 > R STUDIO' 카테고리의 다른 글

국내지도그래프,시계열 그래프 , ggChoropleth , plotly , dodge (0)	2019.05.22
USArrests, 행->컬럼. 지도 시각화, (0)	2019.05.20
library, wordcloud, str_split, paste, nchar, str_replace, ggplot , wefare (0)	2019.05.17
R <---> Mysql 연동 (0)	2019.05.16
rbind, apply, sample,split, subset, names, merge (0)	2019.05.16

Software knowledge worth spreading

라이브러리, ggplot x축 분할, Flip, Join, excel, discrete, fill, position

'딥러닝 모델 설계 > R STUDIO' 카테고리의 다른 글

+ Recent posts

티스토리툴바