Monday, November 11, 2013

Creating multi-page pdf documents in R with ggplot2

So I was charting some data - basically looking at the Omega3:6 ratios of some foods (data from foodcomp.dk). To get what I was looking for I had to mash together some ratio information from underlying constituent data - the whole set was about a thousand items, so it was useful for specific subsets, but I thought it might be useful to write a script to create a properly legible multipage pdf. Here's the pdf I ended up with:




< and you can download the pdf here






I found it a useful exercise - so I walk through the code below, but here are the source documents that I worked with for reference:

< This is the image file that I used as a PDF cover page.

And here is the source data file which I had already transformed.





OK - here's the script. I've tried to put in as much commenting as possible to go through my thought process:

## required libraries
require(ggplot2)
require(grid)
require(reshape2)
 
## load file from the working directory into new data frame OMEGAS
tfile<-"OmegaTable.txt"
OMEGAS<-read.table(tfile,header=TRUE,sep="\t",fill=TRUE)
 
## set various variables for the process
strSearch<-""             # regexp expression for filtering the list
maxRows<-30               # number of rows in the table
ratioLimit<-15            # limits the max/min O3:6 ratio to display
                            # this is to make the visual scale work: anything over 15 is just "high"
                            # rather than overscaling
xLabelLength<-40          # the label length for the items
pngCover<-"Cover.png"     # name of the cover image in the wd [I made this A4 ratio to help fit the doc]
px=11.692                 # page width (a4)
py=8.267                  # page height (a4)
 
## filter data
OMEGAS<- OMEGAS[grep(strSearch, OMEGAS$EngName),]
## calculate the number of pages in the subset
numPages<-floor(nrow(OMEGAS)/maxRows)+1 
 
## function to scale ratios for charting
scaleRatio<-function(xR,xMax) {
                              xScaled<-ifelse(xR >= 1,xR,-1/xR)       # change the sub-1 fractions to ratio numbers
                              return (pmin(xMax,pmax(xScaled,-xMax))) # apply the ratio limits
                              }
 
## Update OmegaRatios so that ratios less than 1 are reflected as their negative inverse
OMEGAS$OmegaRatio<-scaleRatio(OMEGAS$OmegaRatio,15)
OMEGAS<- OMEGAS[order(-OMEGAS$OmegaRatio),] 
 
 
## create placeholder frame for common rows which are applied to each page
## in order to add scale and ensure common scales for each plot
## these put a max and min bar on the graph so that ggplot scales each page the same
placeRows<-c("                                                                            ",0,0,"",0,
             "_UPPER LIMIT 15:1 W3-W6 RATIO",0,0,"",ratioLimit+0.001,
             "_LOWER LIMIT 1:15 W3-W6 RATIO",0,0,"",-ratioLimit-0.001)
placeRows<-data.frame(matrix(placeRows,nrow=(3),byrow=TRUE))
colnames(placeRows)<-colnames(OMEGAS)
 
## function to write the title page as a graph Put in the title page
makeCover<-function(strPNG)    {
                    imgCover<-readPNG(strPNG) # set the image
                    # create a blank plot, and make the dimensions a4 proportion
                    plot(px/2, py/2,xlim=c(0,px),ylim=c(0,py), xaxt='n', yaxt='n', ann=FALSE)
                    # write the PNG file on the graph using raster images
                    rasterImage(imgCover,xleft=2,ybottom=0,xright=px,ytop=py)
                    }
 
#################
## here I call the pdf device
#################
 
# this gives the filename a unique suffix each time the script is run
pdf(gsub("([: ])",replacement="",paste("OMEGAS",date(),".pdf")), px, py)
 
## Make the Cover by calling the function - this prints the cover to the first page
makeCover(pngCover)
 
## run through the table in page-blocks of the size defined earlier
for (pg in 1:numPages) {
 
                        # subset the rows out of the main OMEGAS table for the page
                        plotOMEGAS<-OMEGAS[(1+(maxRows*(pg-1))):(min((pg*maxRows),nrow(OMEGAS))),]
                        # bind in the scaling rows
                        plotOMEGAS<-data.frame(rbind(plotOMEGAS,placeRows))
                        # trim the x labels to size
                        plotOMEGAS$EngName <- substr(plotOMEGAS$EngName,1,xLabelLength)
                        # force the Ratio and Total columns to numeric and sort them
                        plotOMEGAS$OmegaTotal<-as.numeric(plotOMEGAS$OmegaTotal)
                        plotOMEGAS$OmegaRatio<-as.numeric(plotOMEGAS$OmegaRatio)
                        plotOMEGAS <- transform(plotOMEGAS, EngName=reorder(EngName, OmegaRatio) ) 
 
 
                        bufferRows<-maxRows-nrow(plotOMEGAS)
 
                        # check if the page requires "buffer" rows to keep the scale correct, if so, add them to the plot dataframe
                        if (bufferRows>0){
                                          myNames=rep("X",bufferRows)
                                          for(buf in 1:bufferRows){myNames[buf]<-paste(rep(" ",times=buf),collapse="")}
                                          bRows<-rep(c("z",0,0,"",0),times=bufferRows)
                                          bRows<-data.frame(matrix(bRows,nrow=(bufferRows),byrow=TRUE))
                                          bRows[,1]<-myNames
                                          colnames(bRows)<-colnames(OMEGAS)
                                          plotOMEGAS<-data.frame(rbind(plotOMEGAS,bRows))
                                          plotOMEGAS$OmegaTotal<-as.numeric(plotOMEGAS$OmegaTotal)
                                          plotOMEGAS$OmegaRatio<-as.numeric(plotOMEGAS$OmegaRatio)
                                          } 
 
                        #That's it for the data
                        # now graphing
 
                        #create basic ggplot, map food name against ratio - color according to Ratio, alpha according to total omega content/100g
                        g.omega <- ggplot(plotOMEGAS,aes(x=EngName,y=OmegaRatio, 
                                          fill=OmegaRatio, alpha=OmegaTotal)) + 
 
                                  # flip coords so it reads vertically
                                  geom_bar(stat='identity') + coord_flip() +
                                  # add a color gradient that reflects the ratio
                                  scale_fill_continuous(guide=FALSE, low="orangered",high="green3",
                                  space="Lab",na.value="grey") +
                                  # scale the alpha 0.4-1 against omega % by weight
                                  scale_alpha_continuous(guide=FALSE, range = c(0.4, 1)) + 
                                  # remove the legends and set a base theme
                                  theme_bw(base_family = "mono") + theme(legend.position="none") +
                                  # set the y scale
                                  scale_y_continuous(limits = c(-1.6-ratioLimit,ratioLimit+1), 
                                  breaks=seq(-ratioLimit-1, ratioLimit+1, 2) ) 
 
                        # add text labels reflecting the omega content and position on the left
                        g.omega <- g.omega + geom_text(aes(guide=FALSE,label = OmegaTotal, 
                                   y=-1.6-ratioLimit, size=14, font_face="bold"))
 
                        # write the title and axes - paste in the page numbers
                        g.omega <- g.omega +  xlab("Food Types\n[numbers indicate grams total Omega 3+6 per 100g Food]\n") + 
                                              ylab(paste("\n   1:15 <<                Omega 3:6 Ratio                >> 15:1   \n                                                 page",pg,"of",numPages,sep=" ")) + 
                                              ggtitle("OMEGA 3:6 RATIO OF VARIOUS FOODS\n(TECHNICAL UNIVERSITY OF DENMARK : NFI)\n") + 
                                              theme(plot.title = element_text(face="bold"), plot.margin=unit(c(1.5,1.5,1.5,1.5),"cm"))
 
                        # you HAVE to use print - this writes the plot to the next page on the pdf
                        print(g.omega)
 
}
 
# closes the pdf device
dev.off()
Created by Pretty R at inside-R.org

No comments:

Post a Comment