> grades<-read.table('http://www.stat.wmich.edu/naranjo/stat4640/data/Grades.dat',header=F)
> names(grades)<-c("ID","Sex", "Class", "Quizzes", "Exam1", "Exam2", "Lab", "Final")
> grades[1:10,]
    ID Sex Class Quizzes Exam1 Exam2 Lab Final
1  air   f     4      50    93    93  98   162
2  aln   m     4      49    95    98  97   175
3  bam   m     4      39    63    84  95    95
4  bag   f     3      46    92    96  88   150
5  bes   f     4      45   100    98  96   191
6  bec   f     3      44    98   100  85   175
7  bej   m     3      41    86    86  94   138
8  bis   f     4      50   100   100  99   166
9  blc   m     4      50    95    97  96   162
10 boc   f     4      48    71   100  97   143
> 
> summary(grades)
       ID     Sex        Class          Quizzes          Exam1       
 air    : 1   f:17   Min.   :2.000   Min.   :20.00   Min.   : 33.00  
 aln    : 1   m:32   1st Qu.:3.000   1st Qu.:41.00   1st Qu.: 71.00  
 bag    : 1          Median :3.000   Median :44.00   Median : 86.00  
 bam    : 1          Mean   :3.449   Mean   :42.84   Mean   : 80.53  
 bec    : 1          3rd Qu.:4.000   3rd Qu.:47.00   3rd Qu.: 95.00  
 bej    : 1          Max.   :4.000   Max.   :50.00   Max.   :100.00  
 (Other):43                                                          
     Exam2             Lab             Final      
 Min.   : 51.00   Min.   : 63.00   Min.   : 74.0  
 1st Qu.: 86.00   1st Qu.: 92.00   1st Qu.:124.0  
 Median : 93.00   Median : 95.00   Median :147.0  
 Mean   : 90.57   Mean   : 93.53   Mean   :139.5  
 3rd Qu.: 98.00   3rd Qu.: 98.00   3rd Qu.:161.0  
 Max.   :100.00   Max.   :100.00   Max.   :191.0  
                                                  
> ########################################################################
> # ONE-SAMPLE SUMMARY STATISTCS
> attach(grades)
> head(Exam1)
[1]  93  95  63  92 100  98
> mean(Exam1)
[1] 80.53061
> sd(Exam1)
[1] 17.58563
> var(Exam1)
[1] 309.2543
> 
> # ONE-SAMPLE t-TEST   
> # Exam 1 average over all sections is 80.  Is this class comparable?
> ?
> 
> t.test(Exam1, mu=80)

	One Sample t-test

data:  Exam1 
t = 0.2112, df = 48, p-value = 0.8336
alternative hypothesis: true mean is not equal to 80 
95 percent confidence interval:
 75.47943 85.58179 
sample estimates:
mean of x 
 80.53061 

> ###############################################################################
> # Two-sample test
> 
> # Are Exam1 scores for class 3 and class 4 different?
> 
> sub3<-Exam1[Class==3]                # Extracts scores for Class=3
> sub3
 [1] 92 98 86 64 44 99 68 97 78 90 76 77 89 79 76 33 95 83 89 94 41 63 95
> sub4<-Exam1[Class==4]
> sub4                                 # Extracts scores for Class=4
 [1]  93  95  63 100 100  95  71  64  92  50  80  86  83  84  87  65  93  96  97
[20]  71  34  96  88  99

> t.test(sub3,sub4,var.equal = T)

	Two Sample t-test

data:  sub3 and sub4 
t = -0.7718, df = 45, p-value = 0.4443
alternative hypothesis: true difference in means is not equal to 0 
95 percent confidence interval:
 -14.661193   6.538004 
sample estimates:
mean of x mean of y 
 78.52174  82.58333 

> ################################################################
> Or better, do extraction and analysis in one step

> t.test(Exam1[Class == 3], Exam1[Class == 4])

	Welch Two Sample t-test

data:  Exam1[Class == 3] and Exam1[Class == 4] 
t = -0.7703, df = 44.267, p-value = 0.4452
alternative hypothesis: true difference in means is not equal to 0 
95 percent confidence interval:
 -14.685746   6.562558 
sample estimates:
mean of x mean of y 
 78.52174  82.58333 

> boxplot(Exam1[Class == 3], Exam1[Class == 4])

> ############## PAIRED t-TEST ###################
> #### Q: Is Exam2 higher than Exam1?
>
> t.test(Exam1, Exam2, paired=T, alt = "less")

	Paired t-test

data:  Exam1 and Exam2 
t = -4.6736, df = 48, p-value = 1.213e-05
alternative hypothesis: true difference in means is less than 0 
95 percent confidence interval:
      -Inf -6.437472 
sample estimates:
mean of the differences 
              -10.04082 

> boxplot(Exam1,Exam2)

>
> ############ REGRESSION AND CORRELATION ################

> cor(Exam1,Exam2)
[1] 0.5215269
> y<-Exam2
> x<-Exam1
> reg.out<-lm(y~x)
> reg.out

Call:
lm(formula = y ~ x)

Coefficients:
(Intercept)            x  
     66.250        0.302  

> summary(reg.out)

Call:
lm(formula = y ~ x)

Residuals:
    Min      1Q  Median      3Q     Max 
-35.787  -3.579   1.965   4.649  13.481 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 66.25014    5.93805   11.16 8.32e-15 ***
x            0.30201    0.07207    4.19 0.000122 ***
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 8.781 on 47 degrees of freedom
Multiple R-squared:  0.272,	Adjusted R-squared:  0.2565 
F-statistic: 17.56 on 1 and 47 DF,  p-value: 0.0001217

> head(cbind(y,x,reg.out$fitted.values,reg.out$residuals))
    y   x                   
1  93  93 94.33735 -1.337346
2  98  95 94.94137  3.058628
3  84  63 85.27696 -1.276955
4  96  92 94.03533  1.964667
5  98 100 96.45144  1.548563
6 100  98 95.84741  4.152589
> 
> ############ ANOVA (Comparison of means) ################
>
> levels(Class)    # Values found in the Class variable
[1] "2" "3" "4"
>
> table(Class)     # Frequency table (to know sample sizes)
Class
 2  3  4 
 2 23 24 

> aov(Exam1 ~ Class)
Call:
   aov(formula = Exam1 ~ Class)

Terms:
                    Class Residuals
Sum of Squares    198.632 14645.572
Deg. of Freedom         2        46

Residual standard error: 17.84326 
Estimated effects may be unbalanced

> 
> # calculate the sample means of the subgroups
> 
> tapply(Exam1, Class, mean)
       2        3        4 
79.00000 78.52174 82.58333 
>
> ?tapply
>
> tapply(Exam1, Class, var)
       2        3        4 
  8.0000 353.7154 298.0797 

> tapply(Exam1, Class, length)
 2  3  4 
 2 23 24 

> table(Class)     # same result
Class
 2  3  4 
 2 23 24 

> boxplot(Exam1 ~ Class)
> boxplot(Exam1 ~ Class*Sex)  

> ####################################################
> # Saving the ANOVA and using summary() is better
> 
> anova.fit <- aov(Exam1~Class)
> anova.fit
Call:
   aov(formula = Exam1 ~ Class)

Terms:
                    Class Residuals
Sum of Squares    198.632 14645.572
Deg. of Freedom         2        46

Residual standard error: 17.84326 
Estimated effects may be unbalanced

> summary(anova.fit)
            Df  Sum Sq Mean Sq F value Pr(>F)
Class        2   198.6    99.3  0.3119 0.7336
Residuals   46 14645.6   318.4