These exercises cover the sections of functions in R Introduction to R.
– Create a function which takes one argument and finds the smallest number whose factorial is greater than that argument.
findSmallestFactorial <- function(x){
if(!is.numeric(x)){
message("Please provide a numeric argument!")
}else{
factorialAnswer <- 0
count <- 0
while(factorialAnswer <= x){
count <- count+1
if(count == 1){
factorialAnswer <- 1
}else{
factorialAnswer <- factorialAnswer * count
}
}
return(count)
}
}
findSmallestFactorial(3000)
## [1] 7
findSmallestFactorial(10^100)
## [1] 70
– Create a function which takes a vector argument and returns both the number of even perfect squares and a vector of perfect squares.
findPerfectSquares <- function(x){
vecOfSquares <- c()
evenSquares <- 0
for(i in x){
if(sqrt(i) %% 1 == 0){
vecOfSquares <- c(vecOfSquares,i)
if(sqrt(i) %% 1 == 0 & i %% 2 == 0){
evenSquares <- evenSquares+1
}
}
}
return(list(vectorOfSquares=vecOfSquares,nOfEvenSquares=evenSquares))
}
findPerfectSquares(1:100)
## $vectorOfSquares
## [1] 1 4 9 16 25 36 49 64 81 100
##
## $nOfEvenSquares
## [1] 5
#Or
altFindPerfectSquares <- function(x){
perfectSquare <- sqrt(x) %% 1 == 0
vecOfSquares <- x[perfectSquare]
evenSquares <- table(perfectSquare & x %% 2 == 0)[T]
return(list(vectorOfSquares=vecOfSquares,nOfEvenSquares=evenSquares))
}
altFindPerfectSquares(1:100)
## $vectorOfSquares
## [1] 1 4 9 16 25 36 49 64 81 100
##
## $nOfEvenSquares
##
## FALSE TRUE
## 95 5
#Res <- findPerfectSquares(1:10^8)
#altRes <- altFindPerfectSquares(1:10^8)
– Create a function which takes an argument of the directory containing expression files and the name of the annotation file and writes a the table with all samples’ expression results and all annotation to file.
summariseResults <- function(dirName=getwd(),annotation){
if(missing(annotation)){
message("Annotation file must be provided")
break()
}
filesToRead <- dir(dirName,pattern = "*\\.txt",full.names=T)
if(length(filesToRead) == 0){
message("No expression files found in ",dirName)
break()
}
fileRead <- vector("list",length=length(filesToRead))
for(i in 1:length(filesToRead)){
fileRead[[i]] <- read.delim(filesToRead[i],header=F,sep="\t")
colnames(fileRead[[i]]) <- c("GeneNames",basename(filesToRead[i]))
}
mergedTable <- NULL
for(i in fileRead){
if(is.null(mergedTable)){
mergedTable <- i
}else{
mergedTable <- merge(mergedTable,i,by=1,all=T)
}
}
if(!file.exists(annotation)){
message("No annotation file found:",annotation)
break()
}
Annotation <- read.table(annotation,sep="\t",h=T)
annotatedExpression <- merge(Annotation,mergedTable,by=1,all.x=F,all.y=T)
outName <- paste0(basename((normalizePath(dirName))),"_Summarised.csv")
write.table(annotatedExpression,file=outName,sep=",",col.names=T,row.names=F,quote=F)
return(outName)
}
summariseResults("../ExpressionResults/","../ExpressionResults/Annotation.ann")
## [1] "ExpressionResults_Summarised.csv"
– Adapt the above function to also write a t-test result table filtered by the p-value cut-off. An additional argument specifying the allocation a samples into groups must be specified.
To retrieve the indicies of the first occurence of every element in one vector in a second vector you can use the match() function.
allSamples <- c("sample1.txt","sample2.txt","sample3.txt","sample4.txt","sample5.txt","sample5.txt")
testSamples <- c("sample1.txt","sample5.txt")
match(testSamples,allSamples)
## [1] 1 5
allSamples[match(testSamples,allSamples)]
## [1] "sample1.txt" "sample5.txt"
To find all occurences of vector in another you can use the %in% operator
allSamples <- c("sample1.txt","sample2.txt","sample3.txt","sample4.txt","sample5.txt")
testSamples <- c("sample1.txt","sample5.txt")
allSamples %in% testSamples
## [1] TRUE FALSE FALSE FALSE TRUE
allSamples[allSamples %in% testSamples]
## [1] "sample1.txt" "sample5.txt"
summariseResults2 <- function(dirName=getwd(),annotation,sampleGroups=NULL){
if(missing(annotation)){
message("Annotation file must be provided")
break()
}
filesToRead <- dir(dirName,pattern = "*\\.txt",full.names=T)
if(length(filesToRead) == 0){
message("No expression files found in ",dirName)
break()
}
fileRead <- vector("list",length=length(filesToRead))
for(i in 1:length(filesToRead)){
fileRead[[i]] <- read.delim(filesToRead[i],header=F,sep="\t")
colnames(fileRead[[i]]) <- c("GeneNames",basename(filesToRead[i]))
}
mergedTable <- NULL
for(i in fileRead){
if(is.null(mergedTable)){
mergedTable <- i
}else{
mergedTable <- merge(mergedTable,i,by=1,all=T)
}
}
if(!file.exists(annotation)){
message("No annotation file found:",annotation)
break()
}
Annotation <- read.table(annotation,sep="\t",h=T)
annotatedExpression <- merge(Annotation,mergedTable,by=1,all.x=F,all.y=T)
outAnnotatedExpression <- paste0(basename((normalizePath(dirName))),"_Summarised.csv")
write.table(annotatedExpression,file=outAnnotatedExpression,sep=",",col.names=T,row.names=F,quote=F)
### New section
if(!is.null(sampleGroups)){
groupAsamples <- unique(sampleGroups[[1]])
groupBsamples <- unique(sampleGroups[[2]])
groupA <- colnames(annotatedExpression) %in% groupAsamples
groupB <- colnames(annotatedExpression) %in% groupBsamples
indexGroupOne <- groupA
indexGroupTwo <- groupB
ttestResults <- apply(annotatedExpression,1,function(x)
t.test(as.numeric(x[indexGroupOne]),as.numeric(x[indexGroupTwo]))
)
testResult <- sapply(ttestResults,function(x)
c(log2(x$estimate[2]) - log2(x$estimate[1]), x$statistic,x$p.value)
)
testResult <- t(testResult)
colnames(testResult) <- c("logFC","tStatistic","pValue")
annotatedResult <- cbind(annotatedExpression[,1:3],testResult)
annotatedResult <- annotatedResult[order(annotatedResult$tStatistic),]
outAnnotatedTestResults <- paste0(basename((normalizePath(dirName))),"_ttestResults.csv")
write.table(annotatedResult,file=outAnnotatedTestResults,sep=",",col.names=T,row.names=F,quote=F)
}
}
expressionFiles <- dir("../ExpressionResults/",pattern="*.txt")
groupA <- expressionFiles[grep("[1-5].txt",expressionFiles)]
groupB <- expressionFiles[grep("[6-9,0].txt",expressionFiles)]
myGroups <- list(groupA,groupB)
summariseResults2("../ExpressionResults/","../ExpressionResults/Annotation.ann",myGroups)