Pertemuan 7: MANOVA

Pengantar Multivariat

Offline di Departemen Matematika
Published

November 26, 2024

ANOVA

Hipotesis

\(H_0: \mu_1 = \mu_2 = ... = \mu_k\)

\(H_1: \text{Tidak Demikian}\)

Statistik Uji:

Aturan Penolakan:

Tolak \(H_0\) pada tingkat \(\alpha\) jika \(F > F_\alpha\)

Manual

Kita akan menggunakan data dari tabel 6.2 buku Rencher (Rootstock Data)

df <- read.delim("https://raw.githubusercontent.com/kmyafi/Multivariate-Statistics-R/main/Data/T6_2_ROOT.DAT",
                 sep = "", header = FALSE)
colnames(df) <- c('Rootstock', 'y1', 'y2', 'y3', 'y4')
df
   Rootstock   y1    y2   y3    y4
1          1 1.11 2.569 3.58 0.760
2          1 1.19 2.928 3.75 0.821
3          1 1.09 2.865 3.93 0.928
4          1 1.25 3.844 3.94 1.009
5          1 1.11 3.027 3.60 0.766
6          1 1.08 2.336 3.51 0.726
7          1 1.11 3.211 3.98 1.209
8          1 1.16 3.037 3.62 0.750
9          2 1.05 2.074 4.09 1.036
10         2 1.17 2.885 4.06 1.094
11         2 1.11 3.378 4.87 1.635
12         2 1.25 3.906 4.98 1.517
13         2 1.17 2.782 4.38 1.197
14         2 1.15 3.018 4.65 1.244
15         2 1.17 3.383 4.69 1.495
16         2 1.19 3.447 4.40 1.026
17         3 1.07 2.505 3.76 0.912
18         3 0.99 2.315 4.44 1.398
19         3 1.06 2.667 4.38 1.197
20         3 1.02 2.390 4.67 1.613
21         3 1.15 3.021 4.48 1.476
22         3 1.20 3.085 4.78 1.571
23         3 1.20 3.308 4.57 1.506
24         3 1.17 3.231 4.56 1.458
25         4 1.22 2.838 3.89 0.944
26         4 1.03 2.351 4.05 1.241
27         4 1.14 3.001 4.05 1.023
28         4 1.01 2.439 3.92 1.067
29         4 0.99 2.199 3.27 0.693
30         4 1.11 3.318 3.95 1.085
31         4 1.20 3.601 4.27 1.242
32         4 1.08 3.291 3.85 1.017
33         5 0.91 1.532 4.04 1.084
34         5 1.15 2.552 4.16 1.151
35         5 1.14 3.083 4.79 1.381
36         5 1.05 2.330 4.42 1.242
37         5 0.99 2.079 3.47 0.673
38         5 1.22 3.366 4.41 1.137
39         5 1.05 2.416 4.64 1.455
40         5 1.13 3.100 4.57 1.325
41         6 1.11 2.813 3.76 0.800
42         6 0.75 0.840 3.14 0.606
43         6 1.05 2.199 3.75 0.790
44         6 1.02 2.132 3.99 0.853
45         6 1.05 1.949 3.34 0.610
46         6 1.07 2.251 3.21 0.562
47         6 1.13 3.064 3.63 0.707
48         6 1.11 2.469 3.95 0.952

Pastikan tipe data variabel yang menyatakan anggota kelompok sebagai factor

df$Rootstock <- as.factor(df$Rootstock)

Misalkan ingin diuji apakah mean dari variabel y1 sama untuk setiap kelompok.

Akan dicari nilai \(n_k\), \(\bar{y}_{1k}\) dan vektor mean \(\bf{\bar{y}_1}\) untuk menghitung nilai statistik uji \(F\).

# Jumlah Group
k <- 6

n1 <- nrow(df[df$Rootstock == "1", ])
n2 <- nrow(df[df$Rootstock == "2", ])
n3 <- nrow(df[df$Rootstock == "3", ])
n4 <- nrow(df[df$Rootstock == "4", ])
n5 <- nrow(df[df$Rootstock == "5", ])
n6 <- nrow(df[df$Rootstock == "6", ])

mean1 <- mean(as.matrix(df[df$Rootstock == "1", "y1"]))
mean2 <- mean(as.matrix(df[df$Rootstock == "2", "y1"]))
mean3 <- mean(as.matrix(df[df$Rootstock == "3", "y1"]))
mean4 <- mean(as.matrix(df[df$Rootstock == "4", "y1"]))
mean5 <- mean(as.matrix(df[df$Rootstock == "5", "y1"]))
mean6 <- mean(as.matrix(df[df$Rootstock == "6", "y1"]))

overallMean <- mean(df$y1)

SSH <- sum(n1 * (mean1 - overallMean)^2 + 
             n2 * (mean2 - overallMean)^2 + 
             n3 * (mean3 - overallMean)^2 +
             n4 * (mean4 - overallMean)^2 +
             n5 * (mean5 - overallMean)^2 +
             n6 * (mean6 - overallMean)^2); SSH
[1] 0.07356042
MSH <- SSH/(k-1); MSH
[1] 0.01471208
SSE <- sum(c((as.matrix(df[df$Rootstock == "1", "y1"]) - mean1)^2,
             (as.matrix(df[df$Rootstock == "2", "y1"]) - mean2)^2,
             (as.matrix(df[df$Rootstock == "3", "y1"]) - mean3)^2,
             (as.matrix(df[df$Rootstock == "4", "y1"]) - mean4)^2,
             (as.matrix(df[df$Rootstock == "5", "y1"]) - mean5)^2,
             (as.matrix(df[df$Rootstock == "6", "y1"]) - mean6)^2)); SSE
[1] 0.3199875
MSE <- SSE/(k*(n1-1)) ; MSE
[1] 0.00761875
Fvalue <- MSH/MSE; Fvalue
[1] 1.931036

Bandingkan dengan nilai \(F\) tabel

qf(0.95, k-1, k*(n1-1))
[1] 2.437693

Atau p-value

pf(Fvalue, k-1, k*(n1-1), lower.tail = FALSE)
[1] 0.1094018

Kesimpulannya?

Fungsi aov()

anova_result <- aov(df$y1 ~ df$Rootstock, data = df)
summary(anova_result)
             Df Sum Sq  Mean Sq F value Pr(>F)
df$Rootstock  5 0.0736 0.014712   1.931  0.109
Residuals    42 0.3200 0.007619               

Apakah sama?

MANOVA

Hipotesis:

\(H_0: \bf{\mu_1} = \bf{\mu_2} = ... = \bf{\mu_k}\)

\(H_1: \text{Tidak Demikian}\)

Manual

Dengan data yang sama, akan diuji apakah \(\bf{\mu_1} = \bf{\mu_2} = ... = \bf{\mu_k}\)

y <- cbind(df$y1,df$y2,df$y3,df$y4); y
      [,1]  [,2] [,3]  [,4]
 [1,] 1.11 2.569 3.58 0.760
 [2,] 1.19 2.928 3.75 0.821
 [3,] 1.09 2.865 3.93 0.928
 [4,] 1.25 3.844 3.94 1.009
 [5,] 1.11 3.027 3.60 0.766
 [6,] 1.08 2.336 3.51 0.726
 [7,] 1.11 3.211 3.98 1.209
 [8,] 1.16 3.037 3.62 0.750
 [9,] 1.05 2.074 4.09 1.036
[10,] 1.17 2.885 4.06 1.094
[11,] 1.11 3.378 4.87 1.635
[12,] 1.25 3.906 4.98 1.517
[13,] 1.17 2.782 4.38 1.197
[14,] 1.15 3.018 4.65 1.244
[15,] 1.17 3.383 4.69 1.495
[16,] 1.19 3.447 4.40 1.026
[17,] 1.07 2.505 3.76 0.912
[18,] 0.99 2.315 4.44 1.398
[19,] 1.06 2.667 4.38 1.197
[20,] 1.02 2.390 4.67 1.613
[21,] 1.15 3.021 4.48 1.476
[22,] 1.20 3.085 4.78 1.571
[23,] 1.20 3.308 4.57 1.506
[24,] 1.17 3.231 4.56 1.458
[25,] 1.22 2.838 3.89 0.944
[26,] 1.03 2.351 4.05 1.241
[27,] 1.14 3.001 4.05 1.023
[28,] 1.01 2.439 3.92 1.067
[29,] 0.99 2.199 3.27 0.693
[30,] 1.11 3.318 3.95 1.085
[31,] 1.20 3.601 4.27 1.242
[32,] 1.08 3.291 3.85 1.017
[33,] 0.91 1.532 4.04 1.084
[34,] 1.15 2.552 4.16 1.151
[35,] 1.14 3.083 4.79 1.381
[36,] 1.05 2.330 4.42 1.242
[37,] 0.99 2.079 3.47 0.673
[38,] 1.22 3.366 4.41 1.137
[39,] 1.05 2.416 4.64 1.455
[40,] 1.13 3.100 4.57 1.325
[41,] 1.11 2.813 3.76 0.800
[42,] 0.75 0.840 3.14 0.606
[43,] 1.05 2.199 3.75 0.790
[44,] 1.02 2.132 3.99 0.853
[45,] 1.05 1.949 3.34 0.610
[46,] 1.07 2.251 3.21 0.562
[47,] 1.13 3.064 3.63 0.707
[48,] 1.11 2.469 3.95 0.952
totalmeans <- colMeans(y); totalmeans
[1] 1.102708 2.758854 4.087292 1.083000
n <- dim(df)[1] / length(unique(df$Rootstock)); n
[1] 8
df.group <- split(df[,2:5], df$Rootstock)
df.means <- sapply(df.group, function(x) {
  apply(x, 2, mean)
}, simplify = 'data.frame')

df.means
          1        2        3       4       5        6
y1 1.137500 1.157500 1.107500 1.09750 1.08000 1.036250
y2 2.977125 3.109125 2.815250 2.87975 2.55725 2.214625
y3 3.738750 4.515000 4.455000 3.90625 4.31250 3.596250
y4 0.871125 1.280500 1.391375 1.03900 1.18100 0.735000

Matriks H

## Matriks H ----
H <- matrix(data = 0, nrow = 4, ncol = 4)
for (i in 1:dim(H)[1]) {
  for (j in 1:i) {
    H[i,j] <- n * sum((df.means[i,] - totalmeans[i]) * (df.means[j,] - totalmeans[j]))
    H[j,i] <- n * sum((df.means[j,] - totalmeans[j]) * (df.means[i,] - totalmeans[i]))
  }
}
H
           [,1]      [,2]      [,3]     [,4]
[1,] 0.07356042 0.5373852 0.3322646 0.208470
[2,] 0.53738521 4.1996619 2.3553885 1.637108
[3,] 0.33226458 2.3553885 6.1139354 3.781044
[4,] 0.20847000 1.6371084 3.7810438 2.493091

Matriks E

## Matriks E ----
E <- matrix(data = 0, nrow = 4, ncol = 4)
for (i in 1:dim(E)[1]) {
  for (j in 1:i) {
    b <- c() 
    for (k in df.group) {
      a <- sum((k[,i] - mean(k[,i])) * (k[,j] - mean(k[,j])))
      b <- append(b, a)
    }
    E[i,j] <- sum(b)
    E[j,i] <- sum(b)
  }
}
E
          [,1]      [,2]      [,3]     [,4]
[1,] 0.3199875  1.696564 0.5540875 0.217140
[2,] 1.6965637 12.142790 4.3636125 2.110214
[3,] 0.5540875  4.363613 4.2908125 2.481656
[4,] 0.2171400  2.110214 2.4816563 1.722525

Fungsi manova()

manova_result <- manova(cbind(y1,y2,y3,y4) ~ Rootstock, data = df)

Wilk’s Lambda

# dengan matriks E dan H
Lambda <- det(E)/det(E + H); Lambda
[1] 0.1540077
# dengan fungsi `manova()`
summary(manova_result, test ="Wilks")
          Df   Wilks approx F num Df den Df    Pr(>F)    
Rootstock  5 0.15401   4.9369     20  130.3 7.714e-09 ***
Residuals 42                                             
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Roy’s Test

# dengan matriks E dan H
EinvH.eigen <- eigen(solve(E) %*% H)
roy.stat <- EinvH.eigen$values[1]; roy.stat
[1] 1.875671
theta <- roy.stat/(1+roy.stat); theta
[1] 0.6522551
# dengan fungsi `manova()`
summary(manova_result, test ="Roy")
          Df    Roy approx F num Df den Df    Pr(>F)    
Rootstock  5 1.8757   15.756      5     42 1.002e-08 ***
Residuals 42                                            
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Pillai Test

# dengan matriks E dan H
pillai.stat <- sum(diag(solve(E + H) %*% H)); pillai.stat
[1] 1.305472
sum(EinvH.eigen$values / (1 + EinvH.eigen$values))
[1] 1.305472
# dengan fungsi `manova()`
summary(manova_result, test ="Pillai")
          Df Pillai approx F num Df den Df    Pr(>F)    
Rootstock  5 1.3055   4.0697     20    168 1.983e-07 ***
Residuals 42                                            
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Lawley-Hotelling Test

# dengan matriks E dan H
lawhot.stat <- sum(diag(solve(E) %*% H)); lawhot.stat
[1] 2.921368
sum(EinvH.eigen$values)
[1] 2.921368
# dengan fungsi `manova()`
summary(manova_result, test ="Hotelling-Lawley")
          Df Hotelling-Lawley approx F num Df den Df    Pr(>F)    
Rootstock  5           2.9214   5.4776     20    150 2.568e-10 ***
Residuals 42                                                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1