library(haven) library(dplyr) library(ggplot2) raw_welfare<-read_dta(file="D:/R/Koweps/koweps_h15_2020_beta1.dta", encoding = "latin1") welfare<-raw_welfare #str(welfare) welfare<-rename(welfare, gender=h1501_4, birth=h1501_5, edu=h1501_6, marriage=h1501_11, income=h15_cin, code_region=h15_reg7) welfare<-mutate(welfare, salary=(income/12)) welfare$salary class(welfare$gender) table(welfare$gender) # 성별에 따른 월급 차이 welfare$gender<-ifelse(welfare$gender==1, "male","female") table(welfare$gender) qplot(welfare$gender) summary(welfare$salary) qplot(welfare$salary, bins=30) qplot(welfare$salary)+xlim(0,1000) gender_salary<-welfare %>% group_by(gender) %>% summarise(mean_salary=mean(salary)) gender_salary ggplot(data=gender_salary, aes(x=gender, y=mean_salary))+geom_col() # 나이와 월급의 관계 summary(welfare$birth) qplot(welfare$birth) welfare$age<-2020-welfare$birth+1 # 나이 summary(welfare$age) qplot(welfare$age) age_salary<-welfare %>% group_by(age) %>% summarise(mean_salary=mean(salary)) ggplot(data=age_salary, aes(x=age, y=mean_salary))+geom_line() # 나이에 따른 월급 평균 # 연령대와 월급의 관계 welfare$ageg<-ifelse(welfare$age < 30, "청소년", ifelse(welfare$age <= 60, "중장년", "노년")) welfare$age welfare$ageg ageg_salary<-welfare %>% group_by(ageg) %>% summarise(mean_salary=mean(salary)) ageg_salary ggplot(data=ageg_salary, aes(x=ageg, y=mean_salary))+geom_col() # 성별 및 연령대 월급의 관계 gender_ageg_salary<-welfare %>% group_by(ageg,gender) %>% summarise(mean_salary=mean(salary),.groups='drop') gender_ageg_salary ggplot(data=gender_ageg_salary, aes(x=ageg, y=mean_salary, fill=gender))+geom_col() ggplot(data=gender_ageg_salary, aes(x=ageg, y=mean_salary, fill=gender))+geom_col(position="dodge") # 성별 및 연령별 월급 평균 gender_age<-welfare %>% group_by(age, gender) %>% summarise(mean_salary=mean(salary),.groups='drop') gender_age ggplot(data=gender_age, aes(x=age, y=mean_salary, col=gender))+geom_line() # 교육수준 및 월급의 관계 summary(welfare$edu) welfare$edug<-ifelse(welfare$edu >= 8, "대학원", ifelse(welfare$edu >= 6, "대학", ifelse(welfare$edu >= 4, "중등", "초등"))) edug_salary<-welfare %>% group_by(edug) %>% summarise(mean_salary=mean(salary)) edug_salary ggplot(data=edug_salary, aes(x=edug, y=mean_salary))+geom_col() # 성별 및 교육수준별 월급 평균 gender_edug_salary<-welfare %>% group_by(edug,gender) %>% summarise(mean_salary=mean(salary),.groups='drop') gender_edug_salary ggplot(data=gender_edug_salary, aes(x=edug, y=mean_salary, fill=gender))+geom_col()+ scale_x_discrete(limit=c("대학","대학원","중등","초등")) ggplot(data=gender_edug_salary, aes(x=edug, y=mean_salary, fill=gender))+geom_col(position="dodge")+ scale_x_discrete(limit=c("대학","대학원","중등","초등")) # 지역별 연령대 비율 class(welfare$code_region) table(welfare$code_region) list_region <- data.frame(code_region = c(1:7), region= c("서울", "수도권(인천.경기)", "부산/경남/울산", "대구/경북", "대전/충남", "강원/충북", "광주/전남/전북/제주")) list_region welfare <- left_join(welfare, list_region, id="code_region") welfare %>% select(code_region, region) region_ageg_1 <- welfare %>% group_by(region,ageg) %>% summarise(n=n(),.groups='drop') %>% mutate(tot_group = sum(n)) %>% mutate(pct=round(n/tot_group*100, 2)) head(region_ageg_1) ggplot(data=region_ageg_1, aes(x=region, y=pct, fill=ageg))+geom_col()+coord_flip() region_ageg_2 <- welfare %>% count(region, ageg) %>% group_by(region) %>% mutate(pct=round(n/sum(n)*100, 2)) head(region_ageg_2) ggplot(data=region_ageg_2, aes(x=region, y=pct, fill=ageg))+geom_col()+coord_flip() # 노년층 비율 높은 순으로 정렬하기 list_order_old_1 <- region_ageg_1 %>% filter(ageg =="노년") %>% arrange(pct) list_order_old_1 ggplot(data=list_order_old_1, aes(x=reorder(region,pct), y=pct, fill=ageg))+geom_col()+coord_flip() list_order_old_2 <- region_ageg_2 %>% filter(ageg =="노년") %>% arrange(pct) list_order_old_2 ggplot(data=list_order_old_2, aes(x=region, y=pct, fill=ageg))+geom_col()+coord_flip() ggplot(data=list_order_old_2, aes(x=reorder(region,pct), y=pct, fill=ageg))+geom_col()+coord_flip() # 청소년층 비율 높은 순으로 정렬하기 list_order_young_1 <- region_ageg_1 %>% filter(ageg =="청소년") %>% arrange(pct) list_order_young_1 ggplot(data=list_order_young_1, aes(x=region, y=pct, fill=ageg))+geom_col()+coord_flip() ggplot(data=list_order_young_1, aes(x=reorder(region,pct), y=pct, fill=ageg))+geom_col()+coord_flip() list_order_young_2 <- region_ageg_2 %>% filter(ageg =="청소년") %>% arrange(pct) list_order_young_2 ggplot(data=list_order_young_2, aes(x=region, y=pct, fill=ageg))+geom_col()+coord_flip() ggplot(data=list_order_young_2, aes(x=reorder(region,pct), y=pct, fill=ageg))+geom_col()+coord_flip()