test
test from R pub
Published on September 11, 2025 • By EduResHub Team
Iris dataset
Professor Chandra Kanta Dash, PhD
2025-08-26
#Load the mtcars Dataset
data("iris")
#view first six rows of mtcars dataset
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
#summarize mtcars dataset
summary(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
#display rows and columns
dim(iris)
## [1] 150 5
#display column names
names(iris)
## [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width" "Species"
#create histogram of values for Sepal.Length
hist(iris$Sepal.Length,
col='steelblue',
main='Histogram',
xlab='Sepal.Length',
ylab='Frequency')
#create scatterplot of sepal width vs. sepal length
plot(iris$Sepal.Width, iris$Sepal.Length,
col='steelblue',
main='Scatterplot',
xlab='Sepal Width',
ylab='Sepal Length',
pch=19)
#create boxplot of values for mpg
boxplot(iris$Sepal.Length,
main='Lenght of Sepal',
ylab='Length',
col='steelblue',
border='black')
#create scatterplot of sepal width vs. sepal length
boxplot(Sepal.Length~Species,
data=iris,
main='Sepal Length by Species',
xlab='Species',
ylab='Sepal Length',
col='steelblue',
border='black')
boxplot(Sepal.Width~Species,
data=iris,
main='Sepal Widgth by Species',
xlab='Species',
ylab='Sepal Widgth',
col='steelblue',
border='black')
boxplot(Petal.Length~Species,
data=iris,
main='Petal Length by Species',
xlab='Species',
ylab='Petal Length',
col='steelblue',
border='black')
boxplot(Petal.Width~Species,
data=iris,
main='Petal Length by Species',
xlab='Species',
ylab='Petal Length',
col='steelblue',
border='black')
# Load packages
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.3
# Boxplots with ggpubr
ggboxplot(iris, x = "Species", y = "Sepal.Length",
color = "Species", palette = "jco",
ylab = "Sepal Length", xlab = "Species")
ggboxplot(iris, x = "Species", y = "Sepal.Width",
color = "Species", palette = "jco",
ylab = "Sepal Width", xlab = "Species")
ggboxplot(iris, x = "Species", y = "Petal.Length",
color = "Species", palette = "jco",
ylab = "Petal Length", xlab = "Species")
ggboxplot(iris, x = "Species", y = "Petal.Width",
color = "Species", palette = "jco",
ylab = "Petal Width", xlab = "Species")
# Boxplots with ggpubr
p1 <- ggboxplot(iris, x = "Species", y = "Sepal.Length",
color = "Species", palette = "jco",
ylab = "Sepal Length", xlab = "Species")
p2 <- ggboxplot(iris, x = "Species", y = "Sepal.Width",
color = "Species", palette = "jco",
ylab = "Sepal Width", xlab = "Species")
p3 <- ggboxplot(iris, x = "Species", y = "Petal.Length",
color = "Species", palette = "jco",
ylab = "Petal Length", xlab = "Species")
p4 <- ggboxplot(iris, x = "Species", y = "Petal.Width",
color = "Species", palette = "jco",
ylab = "Petal Width", xlab = "Species")
# Arrange all four together
ggarrange(p1, p2, p3, p4,
ncol = 2, nrow = 2,
common.legend = TRUE, legend = "bottom")
ggdensity(iris, x = "Sepal.Length",
add = "mean", rug = TRUE,
color = "Species", fill = "Species",
palette = "lancet")
# Load library
library(ggpubr)
# Sepal Length dot plot
p1 <- ggdotplot(iris, x = "Species", y = "Sepal.Length",
color = "Species", palette = "jco",
add = "mean_se", # adds mean ± SE
ylab = "Sepal Length", xlab = "Species")
# Sepal Width dot plot
p2 <- ggdotplot(iris, x = "Species", y = "Sepal.Width",
color = "Species", palette = "jco",
add = "mean_se",
ylab = "Sepal Width", xlab = "Species")
# Petal Length dot plot
p3 <- ggdotplot(iris, x = "Species", y = "Petal.Length",
color = "Species", palette = "jco",
add = "mean_se",
ylab = "Petal Length", xlab = "Species")
# Petal Width dot plot
p4 <- ggdotplot(iris, x = "Species", y = "Petal.Width",
color = "Species", palette = "jco",
add = "mean_se",
ylab = "Petal Width", xlab = "Species")
# Arrange all plots in one panel
ggarrange(p1, p2, p3, p4,
ncol = 2, nrow = 2,
common.legend = TRUE, legend = "bottom")
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.
library(ggpubr)
#🌿 1. Bar plot (ggbarplot)
#We can plot the mean Sepal.Length by species with error bars:
# Bar plot: Mean Sepal.Length by Species
bar1 <- ggbarplot(iris, x = "Species", y = "Sepal.Length",
add = "mean_se", # add mean ± SE
fill = "Species", # fill color by species
palette = "jco", # color palette
xlab = "Species", ylab = "Mean Sepal Length")
bar1
#🌿 2. Scatter plot (ggscatter)
#Scatterplots are useful to show relationships between variables (e.g., Sepal.Length vs. Sepal.Width):
# Scatter plot: Sepal.Length vs Sepal.Width
sc1 <- ggscatter(iris, x = "Sepal.Length", y = "Sepal.Width",
color = "Species", palette = "jco", # group by species
shape = "Species", size = 3,
add = "reg.line", # add regression line
conf.int = TRUE, # confidence interval
add.params = list(color = "black", fill = "lightgray")) +
stat_cor(aes(color = Species), label.x = 5, label.y = 4.5) # correlation per species
sc1
library(ggplot2)
library(ggplot2)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggpubr)
library(car)
## Warning: package 'car' was built under R version 4.4.2
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
library(psych)
## Warning: package 'psych' was built under R version 4.4.3
##
## Attaching package: 'psych'
##
## The following object is masked from 'package:car':
##
## logit
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.4.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(gridExtra)
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
##???? 1. Normality Tests on Sepal.Length
# Shapiro-Wilk test (for small samples ??? 5000)
by(iris$Sepal.Length, iris$Species, shapiro.test)
## iris$Species: setosa
##
## Shapiro-Wilk normality test
##
## data: dd[x, ]
## W = 0.9777, p-value = 0.4595
##
## ------------------------------------------------------------
## iris$Species: versicolor
##
## Shapiro-Wilk normality test
##
## data: dd[x, ]
## W = 0.97784, p-value = 0.4647
##
## ------------------------------------------------------------
## iris$Species: virginica
##
## Shapiro-Wilk normality test
##
## data: dd[x, ]
## W = 0.97118, p-value = 0.2583
# Anderson-Darling test
library(nortest)
ad.test(iris$Sepal.Length)
##
## Anderson-Darling normality test
##
## data: iris$Sepal.Length
## A = 0.8892, p-value = 0.02251
# Lilliefors test
lillie.test(iris$Sepal.Length)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: iris$Sepal.Length
## D = 0.088654, p-value = 0.005788
# Kolmogorov-Smirnov test
ks.test(iris$Sepal.Length, "pnorm", mean(iris$Sepal.Length), sd(iris$Sepal.Length))
## Warning in ks.test.default(iris$Sepal.Length, "pnorm", mean(iris$Sepal.Length),
## : ties should not be present for the one-sample Kolmogorov-Smirnov test
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: iris$Sepal.Length
## D = 0.088654, p-value = 0.1891
## alternative hypothesis: two-sided
# Q-Q plot
ggqqplot(iris, x = "Sepal.Length", facet.by = "Species")
##???? 2. Barplot with Error Bars (Mean ?? SE)
iris_summary <- iris %>%
group_by(Species) %>%
summarise(mean = mean(Sepal.Length), se = sd(Sepal.Length)/sqrt(n()))
ggplot(iris_summary, aes(x = Species, y = mean, fill = Species)) +
geom_bar(stat = "identity") +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 0.2) +
ylab("Mean Sepal Length") +
theme_minimal()
##???? 3. Line Graph with Error Bars
ggplot(iris_summary, aes(x = Species, y = mean, group = 1)) +
geom_line() +
geom_point() +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 0.1) +
ylab("Mean Sepal Length") +
theme_minimal()
##???? 4. Boxplot with Mean & Error Bars
ggboxplot(iris, x = "Species", y = "Sepal.Length", add = "mean_se")