r - Remove missing values from frequency distributions in ggplot -
r - Remove missing values from frequency distributions in ggplot -
my data
dsl<-readrds("./data/derived/dsl.rds") # color palette outcome attcol8<-c("never"="#4575b4", "once or twice"="#74add1", "less once/month"="#abd9e9", "about once/month"="#e0f3f8", "about twice/month"="#fee090", "about once/week"="#fdae61", "several times/week"="#f46d43", "everyday"="#d73027") # view 1 respondent print (dsl[dsl$id==1,c("id","year","attend","attendf")]) id year attend attendf 1 1 1997 na <na> 2 1 1998 na <na> 3 1 1999 na <na> 4 1 2000 1 never 5 1 2001 6 once/week 6 1 2002 2 1 time or twice 7 1 2003 1 never 8 1 2004 1 never 9 1 2005 1 never 10 1 2006 1 never 11 1 2007 1 never 12 1 2008 1 never 13 1 2009 1 never 14 1 2010 1 never 15 1 2011 1 never creating frequency distributions each of measurement wave have:
ds<- dsl p<-ggplot(ds, aes(x=yearf, fill=attendf)) p<-p+ geom_bar(position="fill") p<-p+ scale_fill_manual(values = attcol8, name="response category" ) p<-p+ scale_y_continuous("prevalence: proportion of total", limits=c(0, 1), breaks=c(.1,.2,.3,.4,.5,.6,.7,.8,.9,1)) p<-p+ scale_x_discrete("waves of measurement", limits=as.character(c(2000:2011))) p<-p+ labs(title=paste0("in past year, how have attended worship service?")) p missing values used in calculation of total responses show natural attrition in study. assumming attrition not associated outcome measure, can remove missing values calculation of total of responses , @ percentages each response endorsed @ each time point.
the question iswhat can done produce graph described? , efficiently? tried na.rm=true in various places, didn't go trick. ideas?
ds<- dsl ### ??? p<-ggplot(ds, aes(x=yearf, fill=attendf)) p<-p+ geom_bar(position="fill") p<-p+ scale_fill_manual(values = attcol8, name="response category" ) p<-p+ scale_y_continuous("prevalence: proportion of total", limits=c(0, 1), breaks=c(.1,.2,.3,.4,.5,.6,.7,.8,.9,1)) p<-p+ scale_x_discrete("waves of measurement", limits=as.character(c(2000:2011))) p<-p+ labs(title=paste0("in past year, how have attended worship service?")) #p update after @mrflick solution:
ds<- dsl p<-ggplot(subset(ds, !is.na(attendf)), aes(x=yearf, fill=attendf)) p<-p+ geom_bar(position="fill") p<-p+ scale_fill_manual(values = attcol8, name="response category" ) p<-p+ scale_y_continuous("prevalence: proportion of total", limits=c(0, 1), breaks=c(.1,.2,.3,.4,.5,.6,.7,.8,.9,1)) p<-p+ scale_x_discrete("waves of measurement", limits=as.character(c(2000:2011))) p<-p+ labs(title=paste0("in past year, how have attended worship service?")) #p
the easiest place drop them when set info set plot
p <- ggplot(subset(ds, !is.na(attendf)), aes(x=yearf, fill=attendf)) here i've created sample info (which have been helpful in initial question) , re-ran plotting commands after subsetting
ds<-data.frame( id=rep(1:100, each=4), yearf=factor(rep(2001:2004, 100)), attendf=sample(1:8, 400, t, c(.2,.2,.15,.10,.10, .20, .15, .02)) ) ds[sample(which(ds$year==2002), 5), "attendf"]<-na ds[sample(which(ds$year==2003), 15), "attendf"]<-na ds[sample(which(ds$year==2004), 40), "attendf"]<-na attcol8<-c("never"="#4575b4", "once or twice"="#74add1", "less once/month"="#abd9e9", "about once/month"="#e0f3f8", "about twice/month"="#fee090", "about once/week"="#fdae61", "several times/week"="#f46d43", "everyday"="#d73027") ds$attendf<-factor(ds$attendf, levels=1:8, labels=names(attcol8)) library(ggplot2) p<-ggplot(subset(ds, !is.na(attendf)), aes(x=yearf, fill=attendf)) p<-p+ geom_bar(position="fill") p<-p+ scale_fill_manual(values = attcol8, name="response category" ) p<-p+ scale_y_continuous("prevalence: proportion of total", limits=c(0, 1), breaks=c(.1,.2,.3,.4,.5,.6,.7,.8,.9,1)) p<-p+ scale_x_discrete("waves of measurement", limits=as.character(c(2001:2004))) p<-p+ labs(title=paste0("in past year, how have attended worship service?")) p this gave next plot
r ggplot2 rstudio frequency-distribution stackedbarseries
Comments
Post a Comment