Functions in R answers

These exercises cover the sections of functions in R Introduction to R.

– Create a function which takes one argument and finds the smallest number whose factorial is greater than that argument.

findSmallestFactorial <- function(x){
  if(!is.numeric(x)){
    message("Please provide a numeric argument!")
  }else{
    factorialAnswer <- 0
    count <- 0
    while(factorialAnswer <= x){
      count <- count+1
      if(count == 1){
        factorialAnswer <- 1
      }else{
        factorialAnswer <- factorialAnswer * count 
      }
    }
    return(count)
  }
}

findSmallestFactorial(3000)

## [1] 7

findSmallestFactorial(10^100)

## [1] 70

– Create a function which takes a vector argument and returns both the number of even perfect squares and a vector of perfect squares.

findPerfectSquares <- function(x){
  vecOfSquares <- c()
  evenSquares <- 0
  for(i in x){
    if(sqrt(i) %% 1 == 0){
      vecOfSquares <- c(vecOfSquares,i)
      if(sqrt(i) %% 1 == 0 & i %% 2 == 0){
        evenSquares <- evenSquares+1
      }
    }
  }
  return(list(vectorOfSquares=vecOfSquares,nOfEvenSquares=evenSquares))
}

findPerfectSquares(1:100)

## $vectorOfSquares
##  [1]   1   4   9  16  25  36  49  64  81 100
## 
## $nOfEvenSquares
## [1] 5

#Or

altFindPerfectSquares <- function(x){
  perfectSquare <- sqrt(x) %% 1 == 0
  vecOfSquares <- x[perfectSquare]
  evenSquares <- table(perfectSquare & x %% 2 == 0)[T]
  return(list(vectorOfSquares=vecOfSquares,nOfEvenSquares=evenSquares))
}

altFindPerfectSquares(1:100)

## $vectorOfSquares
##  [1]   1   4   9  16  25  36  49  64  81 100
## 
## $nOfEvenSquares
## 
## FALSE  TRUE 
##    95     5

#Res <- findPerfectSquares(1:10^8)
#altRes <- altFindPerfectSquares(1:10^8)

– Create a function which takes an argument of the directory containing expression files and the name of the annotation file and writes a the table with all samples’ expression results and all annotation to file.

summariseResults <- function(dirName=getwd(),annotation){
  if(missing(annotation)){
    message("Annotation file must be provided")
    break()
  }
  filesToRead <- dir(dirName,pattern = "*\\.txt",full.names=T)
  if(length(filesToRead) == 0){
    message("No expression files found in ",dirName)
    break()
  }
  
  fileRead <- vector("list",length=length(filesToRead))
  for(i in 1:length(filesToRead)){
    fileRead[[i]] <- read.delim(filesToRead[i],header=F,sep="\t")
    colnames(fileRead[[i]]) <- c("GeneNames",basename(filesToRead[i]))
  }
  mergedTable <- NULL
  for(i in fileRead){
    if(is.null(mergedTable)){
      mergedTable <- i
    }else{
      mergedTable <- merge(mergedTable,i,by=1,all=T)
    }
  }
   if(!file.exists(annotation)){
    message("No annotation file found:",annotation)
    break()     
   }
   Annotation <- read.table(annotation,sep="\t",h=T)  
  annotatedExpression <- merge(Annotation,mergedTable,by=1,all.x=F,all.y=T)
  
  outName <- paste0(basename((normalizePath(dirName))),"_Summarised.csv")

  write.table(annotatedExpression,file=outName,sep=",",col.names=T,row.names=F,quote=F)
  
  return(outName)
}

summariseResults("../ExpressionResults/","../ExpressionResults/Annotation.ann")

## [1] "ExpressionResults_Summarised.csv"

– Adapt the above function to also write a t-test result table filtered by the p-value cut-off. An additional argument specifying the allocation a samples into groups must be specified.

To retrieve the indicies of the first occurence of every element in one vector in a second vector you can use the match() function.

allSamples <- c("sample1.txt","sample2.txt","sample3.txt","sample4.txt","sample5.txt","sample5.txt")
testSamples <- c("sample1.txt","sample5.txt")
match(testSamples,allSamples)

## [1] 1 5

allSamples[match(testSamples,allSamples)]

## [1] "sample1.txt" "sample5.txt"

To find all occurences of vector in another you can use the %in% operator

allSamples <- c("sample1.txt","sample2.txt","sample3.txt","sample4.txt","sample5.txt")
testSamples <- c("sample1.txt","sample5.txt")
allSamples %in% testSamples

## [1]  TRUE FALSE FALSE FALSE  TRUE

allSamples[allSamples %in% testSamples]

## [1] "sample1.txt" "sample5.txt"

summariseResults2 <- function(dirName=getwd(),annotation,sampleGroups=NULL){
  if(missing(annotation)){
    message("Annotation file must be provided")
    break()
  }
  filesToRead <- dir(dirName,pattern = "*\\.txt",full.names=T)
  if(length(filesToRead) == 0){
    message("No expression files found in ",dirName)
    break()
  }
  
  fileRead <- vector("list",length=length(filesToRead))
  for(i in 1:length(filesToRead)){
    fileRead[[i]] <- read.delim(filesToRead[i],header=F,sep="\t")
    colnames(fileRead[[i]]) <- c("GeneNames",basename(filesToRead[i]))
  }
  mergedTable <- NULL
  for(i in fileRead){
    if(is.null(mergedTable)){
      mergedTable <- i
    }else{
      mergedTable <- merge(mergedTable,i,by=1,all=T)
    }
  }
  
  if(!file.exists(annotation)){
    message("No annotation file found:",annotation)
    break()     
  }  
  Annotation <- read.table(annotation,sep="\t",h=T)  
  annotatedExpression <- merge(Annotation,mergedTable,by=1,all.x=F,all.y=T)
  
  outAnnotatedExpression <- paste0(basename((normalizePath(dirName))),"_Summarised.csv")

  write.table(annotatedExpression,file=outAnnotatedExpression,sep=",",col.names=T,row.names=F,quote=F)
  
  
  ### New section
  
  if(!is.null(sampleGroups)){
      groupAsamples <- unique(sampleGroups[[1]])
      groupBsamples <- unique(sampleGroups[[2]])
      groupA <- colnames(annotatedExpression) %in% groupAsamples
      groupB <- colnames(annotatedExpression) %in% groupBsamples

      indexGroupOne <- groupA
      indexGroupTwo <- groupB

      ttestResults <- apply(annotatedExpression,1,function(x)
        t.test(as.numeric(x[indexGroupOne]),as.numeric(x[indexGroupTwo]))
        )
      
      testResult <- sapply(ttestResults,function(x)
        c(log2(x$estimate[2]) - log2(x$estimate[1]), x$statistic,x$p.value)
        )
      testResult <- t(testResult)
      colnames(testResult) <- c("logFC","tStatistic","pValue")
      annotatedResult <- cbind(annotatedExpression[,1:3],testResult)
      annotatedResult <- annotatedResult[order(annotatedResult$tStatistic),]
      outAnnotatedTestResults <- paste0(basename((normalizePath(dirName))),"_ttestResults.csv")
      write.table(annotatedResult,file=outAnnotatedTestResults,sep=",",col.names=T,row.names=F,quote=F)    
  }
  
  
  
}
expressionFiles <- dir("../ExpressionResults/",pattern="*.txt")
groupA <- expressionFiles[grep("[1-5].txt",expressionFiles)]
groupB <- expressionFiles[grep("[6-9,0].txt",expressionFiles)]
myGroups <- list(groupA,groupB)
summariseResults2("../ExpressionResults/","../ExpressionResults/Annotation.ann",myGroups)

Functions in R answers

MRC CSC

01/01/2015