1 + 1
[1] 2
<- 10
x print(x)
[1] 10
In the Console panel: Type the following commands and press Enter after each one:
1 + 1
[1] 2
<- 10
x print(x)
[1] 10
You should see the output of the commands printed in the console.
Create a new R Script:
lesson1.R
.Run the script:
R packages are collections of functions, data, and documentation that extend the capabilities of R. The tidyverse
package is a collection of popular packages for data science.
Install the tidyverse
package: In the Console, type the following command and press Enter:
install.packages("tidyverse")
R will download and install the tidyverse
package and its dependencies. This may take a few minutes.
Load the tidyverse
package: In the Console or in your script, type the following command and press Enter:
library(tidyverse)
This loads the tidyverse
package into your R session, making its functions available for use.
We’ll use a sample CSV file for demonstration.
exam_scores.csv
file from the course materials to your data directory. You can also copy this link for downloading directly into R.: Sample CSV DataNow, let’s read the exam_scores.csv
file into R and inspect it:
#Replace this link with your actual link to your data.
<- read.csv("https://raw.githubusercontent.com/sijuswamyresearch/R-for-Data-Analytics/refs/heads/main/data/exam_scores.csv")
exam_scores
#Display the first few rows.
head(exam_scores)
student_id study_hours score grade
1 1 NA 65 C
2 2 5 88 B
3 3 1 52 F
4 4 3 76 c
5 5 4 82 B
6 6 2 100 C
R supports several fundamental data types:
1
, 3.14
, -2.5
)."hello"
, "Data Analysis"
)."Low"
, "Medium"
, "High"
). Factors are important for statistical analysis.TRUE
or FALSE
."2023-10-27"
).The class()
function tells you the data type of a variable:
<- 10
x class(x)
[1] "numeric"
<- "hello"
y class(y)
[1] "character"
You can convert between data types using the as.*()
functions:
as.numeric()
as.character()
as.factor()
as.logical()
as.Date()
Example:
<- "123"
x class(x)
[1] "character"
<- as.numeric(x)
x_numeric class(x_numeric)
[1] "numeric"
Converting a character string that doesn’t represent a number to numeric will result in NA.
Lists are versatile data structures that can hold elements of different types. A list can contain numbers, strings, vectors, arrays, or even other lists.
You can create a list using the list()
function.
# Creating a simple list
<- list(name = "John", age = 30, grades = c(85, 90, 78))
my_list print(my_list)
$name
[1] "John"
$age
[1] 30
$grades
[1] 85 90 78
You can access list elements using their names or indices.
# Accessing list elements by name
<- my_list$name
name <- my_list$age
age print(paste("Name:", name))
[1] "Name: John"
print(paste("Age:", age))
[1] "Age: 30"
# Accessing list elements by index
<- my_list[[1]]
name <- my_list[[2]]
age print(paste("Name:", name))
[1] "Name: John"
print(paste("Age:", age))
[1] "Age: 30"
You can modify lists by adding, updating, or deleting elements.
# Adding elements to a list
$city <- "New York"
my_listprint(my_list)
$name
[1] "John"
$age
[1] 30
$grades
[1] 85 90 78
$city
[1] "New York"
# Updating list elements
$age <- 31
my_listprint(my_list)
$name
[1] "John"
$age
[1] 31
$grades
[1] 85 90 78
$city
[1] "New York"
# Deleting list elements
$grades <- NULL
my_listprint(my_list)
$name
[1] "John"
$age
[1] 31
$city
[1] "New York"
Arrays are data structures that can hold elements of the same type in multiple dimensions.
You can create an array using the array()
function.
#one dimensional array
=array(1:10)
a1print(a1)
[1] 1 2 3 4 5 6 7 8 9 10
# Creating a 2D array (matrix)
<- array(data = 1:9, dim = c(3, 3))
my_matrix print(my_matrix)
[,1] [,2] [,3]
[1,] 1 4 7
[2,] 2 5 8
[3,] 3 6 9
# creating matrix using matrix function
=matrix(c(1,2,3,4,5,6,7,8,9),ncol=3,byrow=T)
m1 m1
[,1] [,2] [,3]
[1,] 1 2 3
[2,] 4 5 6
[3,] 7 8 9
# Creating a 3D array
<- array(data = 1:27, dim = c(3, 3, 3))
my_3d_array print(my_3d_array)
, , 1
[,1] [,2] [,3]
[1,] 1 4 7
[2,] 2 5 8
[3,] 3 6 9
, , 2
[,1] [,2] [,3]
[1,] 10 13 16
[2,] 11 14 17
[3,] 12 15 18
, , 3
[,1] [,2] [,3]
[1,] 19 22 25
[2,] 20 23 26
[3,] 21 24 27
# Accessing array elements
my_matrix
[,1] [,2] [,3]
[1,] 1 4 7
[2,] 2 5 8
[3,] 3 6 9
<- my_matrix[2, 3]
element print(paste("Element at (2, 3):", element))
[1] "Element at (2, 3): 8"
# Modifying array elements
1, 1] <- 10
my_matrix[print(my_matrix)
[,1] [,2] [,3]
[1,] 10 4 7
[2,] 2 5 8
[3,] 3 6 9
You can perform various operations on arrays, such as transposing and performing arithmetic operations.
# Transposing a matrix
<- t(my_matrix)
transposed_matrix print(transposed_matrix)
[,1] [,2] [,3]
[1,] 10 2 3
[2,] 4 5 6
[3,] 7 8 9
# Creating another matrix
<- array(data = 10:18, dim = c(3, 3))
another_matrix
# Matrix multiplication
<- my_matrix %*% another_matrix
multiplied_matrix print(multiplied_matrix)
[,1] [,2] [,3]
[1,] 228 291 354
[2,] 171 216 261
[3,] 204 258 312
Data frames are table-like data structures that organize data into rows and columns. Each column can hold data of a different type.
You can create a data frame using the data.frame()
function.
# Creating a data frame
<- data.frame(
my_data_frame id = 1:3,
name = c("Alice", "Bob", "Charlie"),
age = c(25, 30, 28),
score = c(85, 92, 78)
)print(my_data_frame)
id name age score
1 1 Alice 25 85
2 2 Bob 30 92
3 3 Charlie 28 78
You can access data frame elements using their column names or indices.
# Accessing data frame columns by name
<- my_data_frame$name
names <- my_data_frame$age
ages print(names)
[1] "Alice" "Bob" "Charlie"
print(ages)
[1] 25 30 28
You can modify data frames by adding, updating, or deleting columns and rows.
# Adding a column to a data frame
$city <- c("New York", "Los Angeles", "Chicago")
my_data_frameprint(my_data_frame)
id name age score city
1 1 Alice 25 85 New York
2 2 Bob 30 92 Los Angeles
3 3 Charlie 28 78 Chicago
# Updating data frame elements
$age[1] <- 26
my_data_frameprint(my_data_frame)
id name age score city
1 1 Alice 26 85 New York
2 2 Bob 30 92 Los Angeles
3 3 Charlie 28 78 Chicago
# Deleting a column from a data frame
$city <- NULL
my_data_frameprint(my_data_frame)
id name age score
1 1 Alice 26 85
2 2 Bob 30 92
3 3 Charlie 28 78
# Adding rows to a data frame
<- data.frame(id = 4, name = "David", age = 32, score = 90)
new_row <- rbind(my_data_frame, new_row)
my_data_frame print(my_data_frame)
id name age score
1 1 Alice 26 85
2 2 Bob 30 92
3 3 Charlie 28 78
4 4 David 32 90
#adding a column using cbind
=data.frame(city=c("Kottayam","Ettumanoor","Elanji","Muvattupuzha"))
new_col=cbind(my_data_frame,new_col)
my_data_frame my_data_frame
id name age score city
1 1 Alice 26 85 Kottayam
2 2 Bob 30 92 Ettumanoor
3 3 Charlie 28 78 Elanji
4 4 David 32 90 Muvattupuzha
# Deleting rows from a data frame
<- my_data_frame[-4, ]
my_data_frame print(my_data_frame)
id name age score city
1 1 Alice 26 85 Kottayam
2 2 Bob 30 92 Ettumanoor
3 3 Charlie 28 78 Elanji
You can perform various operations on data frames, such as subsetting, filtering, sorting, and merging.
# Subsetting data frames
<- my_data_frame[, c("name", "score")]
subset_df print(subset_df)
name score
1 Alice 85
2 Bob 92
3 Charlie 78
# Filtering data frames
<- my_data_frame[my_data_frame$age > 28, ]
filtered_df print(filtered_df)
id name age score city
2 2 Bob 30 92 Ettumanoor
# Sorting data frames
<- my_data_frame[order(my_data_frame$age), ]
sorted_df print(sorted_df)
id name age score city
1 1 Alice 26 85 Kottayam
3 3 Charlie 28 78 Elanji
2 2 Bob 30 92 Ettumanoor