n.b. Due to current setting in our Jupyter notebook on NeSI this won’t work and is provided as an example for how you would do it if you had R installed on your own computer.
# install package called `tidyverse`
> install.packages("tidyverse")
# get the installer package if you don't have
> install.packages("BiocManager")
# install bioconductor package "DESeq2"
> BiocManager::install("DESeq2")
# updating CRAN packages
update.packages()
# updating bioconductor packages
> if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
> BiocManager::install()
You can get help on functions by using help()
and ?package_name.
> ?hist
> help("hist")
Above all, google search can be your best friend to quickly find answers.
# create vectors named x and y with 5 elements
> x <- c(1,2,3,4,5)
[1] 1 2 3 4 5
> y <- 6:10
[1] 6 7 8 9 10
# scalar addition and multiplication
> y + 2
[1] 8 9 10 11 12
> x * 2
[1] 2 4 6 8 10
# create a vector of 1s, length 3
> r1 <- rep(1,3)
> length(r1)
[1] 3
> class(r1)
[1] "numeric"
# creating matrices from vectors
> x <- c(1,2,3,4,5)
> y <- c(7,7,8,9,10)
> m1 <- cbind(x,y)
> m1
x y
[1,] 1 6
[2,] 2 7
[3,] 3 8
[4,] 4 9
[5,] 5 10
> dim(m1)
[1] 5 2
#transpose m1
> m2 <- t(m1)
> m2
[,1] [,2] [,3] [,4] [,5]
x 1 2 3 4 5
y 6 7 8 9 10
> dim(m2)
[1] 2 5
# Creating matrices by directly adding elements
> m3<-matrix(c(1,3,2,5,-1,2,2,3,9),nrow=3)
> m3
[,1] [,2] [,3]
[1,] 1 5 2
[2,] 3 -1 3
[3,] 2 2 9
# creating data frame
> chr <- c("chr1", "chr1", "chr2", "chr2")
> strand <- c("-","-","+","+")
> start<- c(200,4000,100,400)
> end<-c(250,410,200,450)
> mydata <- data.frame(chr,start,end,strand)
#change column names
names(mydata) <- c("chr","start","end","strand")
> mydata
chr start end strand
1 chr1 200 250 -
2 chr1 4000 410 -
3 chr2 100 200 +
4 chr2 400 450 +
mydata[,2:4] start end strand 1 200 250 - 2 4000 410 - 3 100 200 + 4 400 450 +
mydata[,c(“chr”,”start”)] chr start 1 chr1 200 2 chr1 4000 3 chr2 100 4 chr2 400
mydata$start [1] 200 4000 100 400
mydata[c(1,3),] chr start end strand 1 chr1 200 250 - 3 chr2 100 200 +
mydata[mydata$start>400,] chr start end strand 2 chr1 4000 410 - ```
# creating a list with 4 objects
> mylist <- list(name="Bob", mynumbers=c(1,2,3), mymatrix=matrix(1:4,ncol = 2), age=5.3)
> mylist
$name
[1] "Bob"
$mynumbers
[1] 1 2 3
$mymatrix
[,1] [,2]
[1,] 1 3
[2,] 2 4
$age
[1] 5.3
# Objects in a list can be extracted using double square-bracket `[[]]` with either position or name in the brackets
> mylist[[2]]
[1] 1 2 3
> mylist[["mymatrix"]]
[,1] [,2]
[1,] 1 3
[2,] 2 4
# This also works
> mylist$age
[1] 5.3
# creating factors
> features=c("promoter","exon","intron")
> class(features)
[1] "character"
> f.feat=factor(features)
> class(f.feat)
[1] "factor"
#create a numeric vector x with 5 components
> x<-c(1,3,2,10,5)
> x
[1] 1 3 2 10 5
#create a logical vector x
> x<-c(TRUE,FALSE,TRUE)
> x
[1] TRUE FALSE TRUE
# create a character vector
> x<-c("sds","sd","as")
> x
[1] "sds" "sd" "as"
# create an integer vector
> x<-c(1L,2L,3L)
> x
[1] 1 2 3
We are going to read the count data file located in RNA-seq-workshop/Prep_Files/Files
> cd1 <- read.table("test_table.csv", sep=",", header=TRUE)
> head(cd1)
X WT1 WT2 WT3 MT1 MT2 MT3
1 YDL248W 52 46 36 65 70 78
2 YDL247W-A 0 0 0 0 1 0
3 YDL247W 2 4 2 6 8 5
4 YDL246C 0 0 1 1 2 0
5 YDL245C 0 3 0 5 7 4
6 YDL244W 6 6 5 20 30 19
> dim(cd1)
[1] 7127 7
# check column names
> names(cd1)
[1] "X" "WT1" "WT2" "WT3" "MT1" "MT2" "MT3"
# change column name
> names(cd1)[1] = c("Gene_Names")
> names(cd1)
[1] "Gene_Names" "WT1" "WT2" "WT3" "MT1" "MT2" "MT3"
We are also going to write out our data
> write.table(cd1, file="new_test_data.txt", row.names = FALSE, col.names = TRUE, sep = "\t")
> x <- rnorm(50)
> hist(x)
> hist(x,main="Hello histogram!!!",col="red")
We are going to create another vector y and compare with x
> y <- rnorm(50)
# plot scatter plot
> plot(x,y,main="scatterplot of random samples", ylab="y values",xlab="x values")
Using the same data from vectors x and y, we can plot a bocplot
> boxplot(x,y,main="boxplots of random samples")
# Creating vectors with the variables
> gene_len <- c(250, 400, 320, 100)
> gene_names <- c("lipase1","lipase2", "protease1", "protease2")
# plotting the barplot
> barplot(gene_len, names.arg=gene_names, ylab = "Gene Length", main = "Lengths of experimental genes", col = c("red", "red", "blue", "blue"))
# creating the legend
> legend("topright",legend=c("test","control"), fill=c("red","blue"))
par()
function for this# Combining 2 plots side-by-side
> par(mfrow=c(1,2))
> hist(x,main="Hello histogram!!!",col="red")
> plot(x,y,main="scatterplot", ylab="y values",xlab="x values")
# Open a graphics device
> pdf("myplot.pdf",width=5,height=5)
# Create the plot
> plot(x,y)
# Close the graphics device
> dev.off()
plot(x,y)
dev.copy(pdf,”myplot.pdf”,width=7,height=5)
dev.off() ```