Code
library(haven)
library(dplyr)
library(tidyr)
library(ggplot2)
library(survey)
library(gt)
df <- read_dta("../data/raw/EG DIB_Main Student Data.dta")
ctrl <- filter(df, treatment == 0)Objective: Assess whether end-of-grade ASER scores are stable across cohorts — that is, whether students who reach a given grade in different assessment years score similarly on average.
The EG DIB evaluation tracked five cohorts of students, each labeled by their grade at Baseline (Year 1, September 2015). Because different cohorts were assessed at different endline rounds, multiple cohort–round combinations capture scores at the same grade level. For example, students in Grade 3 at the Y1 Endline include those who were in Grade 3 at Baseline (assessed in February 2016), those who were in Grade 2 at Baseline (assessed in February 2017), and those who were in Grade 1 at Baseline (assessed in February 2018).
If scores at a given grade are similar across these cohorts, it supports the assumption that cohorts are comparable and that cross-cohort learning comparisons are valid. Systematic differences would suggest cohort-level heterogeneity or secular time trends in learning levels — either of which could confound cohort-based estimates.
Scope: All comparisons are restricted to the control group (treatment == 0, n = 3,396 students across 141 villages) to avoid any contamination from the Educate Girls program.
The table below reproduces the cohort structure from the study documentation. Asterisks indicate that a cohort was not assessed in that round.
| Cohort label | Grade at Baseline | Grade at Y1 Endline | Grade at Y2 Endline | Grade at Y3 Endline | Years exposed to EG |
|---|---|---|---|---|---|
| Grade 1_Y1 | 1 | 1* | 2* | 3 | 1 |
| Grade 2_Y1 | 2 | 2* | 3 | 4 | 2 |
| Grade 3_Y1 | 3 | 3 | 4 | 5 | 3 |
| Grade 4_Y1 | 4 | 4 | 5 | 6* | 2 |
| Grade 5_Y1 | 5 | 5 | 6* | 7* | 1 |
Assessment dates: Baseline = September 2015; Y1 Endline = February 2016; Y2 Endline = February 2017; Y3 Endline = February 2018.
Reading across each row, the same end-of-grade score can be observed from the following cohort–round combinations:
| End of grade | Cohort | Assessment round | Variable suffix |
|---|---|---|---|
| 3 | Grade 1_Y1 | Y3 Endline | _ely3 |
| 3 | Grade 2_Y1 | Y2 Endline | _ely2 |
| 3 | Grade 3_Y1 | Y1 Endline | _ely1 |
| 3 | Grade 4_Y1 | Baseline | _bl |
| 4 | Grade 2_Y1 | Y3 Endline | _ely3 |
| 4 | Grade 3_Y1 | Y2 Endline | _ely2 |
| 4 | Grade 4_Y1 | Y1 Endline | _ely1 |
| 4 | Grade 5_Y1 | Baseline | _bl |
| 5 | Grade 3_Y1 | Y3 Endline | _ely3 |
| 5 | Grade 4_Y1 | Y2 Endline | _ely2 |
| 5 | Grade 5_Y1 | Y1 Endline | _ely1 |
For each end-of-grade comparison, we compute mean scores by cohort and plot them across subjects. Comparisons are restricted to the control group.
Subjects covered: Hindi (6-point scale), Math (5-point scale), English (5-point scale), and Total (16-point composite).
subject_levels <- c("hindi", "math", "english", "total")
subject_labels <- c("Hindi", "Math", "English", "Total")
grade3 <- bind_rows(
ctrl |>
filter(child_class_bl == 1, assessed_ely3 == 1) |>
transmute(cohort = "Grade 1_Y1 (ely3)", village_id_rand,
hindi = hindi_ely3, math = math_ely3,
english = english_ely3, total = total_ely3),
ctrl |>
filter(child_class_bl == 2, assessed_ely2 == 1) |>
transmute(cohort = "Grade 2_Y1 (ely2)", village_id_rand,
hindi = hindi_ely2, math = math_ely2,
english = english_ely2, total = total_ely2),
ctrl |>
filter(child_class_bl == 3, assessed_ely1 == 1) |>
transmute(cohort = "Grade 3_Y1 (ely1)", village_id_rand,
hindi = hindi_ely1, math = math_ely1,
english = english_ely1, total = total_ely1),
ctrl |>
filter(child_class_bl == 4, assessed_bl == 1) |>
transmute(cohort = "Grade 4_Y1 (bl)", village_id_rand,
hindi = hindi_bl, math = math_bl,
english = english_bl, total = total_bl)
) |>
mutate(cohort = factor(cohort,
levels = c("Grade 1_Y1 (ely3)", "Grade 2_Y1 (ely2)", "Grade 3_Y1 (ely1)", "Grade 4_Y1 (bl)")))
grade4 <- bind_rows(
ctrl |>
filter(child_class_bl == 2, assessed_ely3 == 1) |>
transmute(cohort = "Grade 2_Y1 (ely3)", village_id_rand,
hindi = hindi_ely3, math = math_ely3,
english = english_ely3, total = total_ely3),
ctrl |>
filter(child_class_bl == 3, assessed_ely2 == 1) |>
transmute(cohort = "Grade 3_Y1 (ely2)", village_id_rand,
hindi = hindi_ely2, math = math_ely2,
english = english_ely2, total = total_ely2),
ctrl |>
filter(child_class_bl == 4, assessed_ely1 == 1) |>
transmute(cohort = "Grade 4_Y1 (ely1)", village_id_rand,
hindi = hindi_ely1, math = math_ely1,
english = english_ely1, total = total_ely1),
ctrl |>
filter(child_class_bl == 5, assessed_bl == 1) |>
transmute(cohort = "Grade 5_Y1 (bl)", village_id_rand,
hindi = hindi_bl, math = math_bl,
english = english_bl, total = total_bl)
) |>
mutate(cohort = factor(cohort,
levels = c("Grade 2_Y1 (ely3)", "Grade 3_Y1 (ely2)", "Grade 4_Y1 (ely1)", "Grade 5_Y1 (bl)")))
grade5 <- bind_rows(
ctrl |>
filter(child_class_bl == 3, assessed_ely3 == 1) |>
transmute(cohort = "Grade 3_Y1 (ely3)", village_id_rand,
hindi = hindi_ely3, math = math_ely3,
english = english_ely3, total = total_ely3),
ctrl |>
filter(child_class_bl == 4, assessed_ely2 == 1) |>
transmute(cohort = "Grade 4_Y1 (ely2)", village_id_rand,
hindi = hindi_ely2, math = math_ely2,
english = english_ely2, total = total_ely2),
ctrl |>
filter(child_class_bl == 5, assessed_ely1 == 1) |>
transmute(cohort = "Grade 5_Y1 (ely1)", village_id_rand,
hindi = hindi_ely1, math = math_ely1,
english = english_ely1, total = total_ely1)
) |>
mutate(cohort = factor(cohort,
levels = c("Grade 3_Y1 (ely3)", "Grade 4_Y1 (ely2)", "Grade 5_Y1 (ely1)")))
cohort_means <- function(data, grade_label) {
data |>
pivot_longer(all_of(subject_levels), names_to = "subject", values_to = "score") |>
group_by(cohort, subject) |>
group_modify(~ {
des <- svydesign(ids = ~village_id_rand, data = .x)
est <- svymean(~score, des, na.rm = TRUE)
tibble(mean_score = as.numeric(est), se = as.numeric(SE(est)))
}) |>
ungroup() |>
mutate(
subject = factor(subject, levels = subject_levels, labels = subject_labels),
grade = grade_label
)
}
# Returns a formatted string of SD (across cohort means) for each subject
sd_across_cohorts <- function(means_data) {
means_data |>
group_by(subject) |>
summarise(sd = sd(mean_score, na.rm = TRUE), .groups = "drop") |>
mutate(label = paste0(subject, ": ", round(sd, 2))) |>
pull(label) |>
paste(collapse = "; ")
}
# Compute cohort means after dividing each student's score by the pooled SD
# for that grade-subject combination (SD computed across all cohorts in `data`)
cohort_means_normalized <- function(data, grade_label) {
long <- data |>
pivot_longer(all_of(subject_levels), names_to = "subject", values_to = "score")
sds <- long |>
group_by(subject) |>
summarise(sd = sd(score, na.rm = TRUE), .groups = "drop")
long |>
left_join(sds, by = "subject") |>
mutate(score = score / sd) |>
group_by(cohort, subject) |>
group_modify(~ {
des <- svydesign(ids = ~village_id_rand, data = .x)
est <- svymean(~score, des, na.rm = TRUE)
tibble(mean_score = as.numeric(est), se = as.numeric(SE(est)))
}) |>
ungroup() |>
mutate(
subject = factor(subject, levels = subject_levels, labels = subject_labels),
grade = grade_label
)
}
means3 <- cohort_means(grade3, "End of Grade 3")
means4 <- cohort_means(grade4, "End of Grade 4")
means5 <- cohort_means(grade5, "End of Grade 5")
means3_norm <- cohort_means_normalized(grade3, "End of Grade 3")
means4_norm <- cohort_means_normalized(grade4, "End of Grade 4")
means5_norm <- cohort_means_normalized(grade5, "End of Grade 5")
make_gt_table <- function(data, y_label = "Mean score (SE)") {
data |>
mutate(cell = paste0(round(mean_score, 2), " (", round(se, 2), ")")) |>
select(cohort, subject, cell) |>
pivot_wider(names_from = subject, values_from = cell) |>
gt() |>
cols_label(cohort = "Cohort") |>
tab_spanner(label = y_label, columns = -cohort) |>
tab_options(table.width = pct(100))
}Sample sizes (control group, assessed at relevant round): Grade 1_Y1 (ely3): n = 514; Grade 2_Y1 (ely2): n = 629; Grade 3_Y1 (ely1): n = 668; Grade 4_Y1 (bl): n = 679. SD of cohort means by subject: Hindi: 0.17; Math: 0.08; English: 0.08; Total: 0.29.
ggplot(means3, aes(x = cohort, y = mean_score, fill = cohort)) +
geom_col(width = 0.6) +
geom_errorbar(aes(ymin = mean_score - 1.96 * se, ymax = mean_score + 1.96 * se), width = 0.2) +
facet_wrap(~ subject, scales = "free_y", nrow = 1) +
labs(
title = "End-of-Grade-3 scores by cohort",
subtitle = "Control group only",
x = NULL,
y = "Mean score"
) +
theme_minimal() +
theme(
legend.position = "none",
axis.text.x = element_text(angle = 30, hjust = 1)
)| Cohort |
Mean score (SE)
|
|||
|---|---|---|---|---|
| English | Hindi | Math | Total | |
| Grade 1_Y1 (ely3) | 2.34 (0.06) | 3.4 (0.1) | 2.69 (0.05) | 8.43 (0.18) |
| Grade 2_Y1 (ely2) | 2.14 (0.05) | 3.04 (0.09) | 2.64 (0.05) | 7.83 (0.17) |
| Grade 3_Y1 (ely1) | 2.2 (0.05) | 3.31 (0.08) | 2.67 (0.04) | 8.18 (0.16) |
| Grade 4_Y1 (bl) | 2.23 (0.06) | 3.41 (0.09) | 2.82 (0.05) | 8.46 (0.18) |
Scores divided by the pooled SD for each grade–subject combination, so bars are in units of SD and comparable across subjects.
ggplot(means3_norm, aes(x = cohort, y = mean_score, fill = cohort)) +
geom_col(width = 0.6) +
geom_errorbar(aes(ymin = mean_score - 1.96 * se, ymax = mean_score + 1.96 * se), width = 0.2) +
facet_wrap(~ subject, nrow = 1) +
labs(
title = "End-of-Grade-3 scores by cohort (SD-normalized)",
subtitle = "Control group only; y-axis in units of pooled SD",
x = NULL,
y = "Mean score (SD units)"
) +
theme_minimal() +
theme(
legend.position = "none",
axis.text.x = element_text(angle = 30, hjust = 1)
)| Cohort |
Mean score in SD units (SE)
|
|||
|---|---|---|---|---|
| English | Hindi | Math | Total | |
| Grade 1_Y1 (ely3) | 2.33 (0.06) | 2.04 (0.06) | 3.2 (0.06) | 2.73 (0.06) |
| Grade 2_Y1 (ely2) | 2.13 (0.05) | 1.82 (0.06) | 3.14 (0.05) | 2.54 (0.06) |
| Grade 3_Y1 (ely1) | 2.18 (0.05) | 1.99 (0.05) | 3.17 (0.05) | 2.65 (0.05) |
| Grade 4_Y1 (bl) | 2.22 (0.06) | 2.04 (0.05) | 3.35 (0.06) | 2.74 (0.06) |
Sample sizes: Grade 2_Y1 (ely3): n = 619; Grade 3_Y1 (ely2): n = 643; Grade 4_Y1 (ely1): n = 665; Grade 5_Y1 (bl): n = 684. SD of cohort means by subject: Hindi: 0.06; Math: 0.1; English: 0.08; Total: 0.21.
ggplot(means4, aes(x = cohort, y = mean_score, fill = cohort)) +
geom_col(width = 0.6) +
geom_errorbar(aes(ymin = mean_score - 1.96 * se, ymax = mean_score + 1.96 * se), width = 0.2) +
facet_wrap(~ subject, scales = "free_y", nrow = 1) +
labs(
title = "End-of-Grade-4 scores by cohort",
subtitle = "Control group only",
x = NULL,
y = "Mean score"
) +
theme_minimal() +
theme(
legend.position = "none",
axis.text.x = element_text(angle = 30, hjust = 1)
)| Cohort |
Mean score (SE)
|
|||
|---|---|---|---|---|
| English | Hindi | Math | Total | |
| Grade 2_Y1 (ely3) | 2.72 (0.06) | 4 (0.09) | 3.06 (0.05) | 9.78 (0.18) |
| Grade 3_Y1 (ely2) | 2.57 (0.06) | 3.93 (0.09) | 3.08 (0.04) | 9.58 (0.16) |
| Grade 4_Y1 (ely1) | 2.58 (0.06) | 3.94 (0.09) | 3.03 (0.05) | 9.55 (0.18) |
| Grade 5_Y1 (bl) | 2.71 (0.07) | 4.06 (0.09) | 3.25 (0.06) | 10.02 (0.2) |
Scores divided by the pooled SD for each grade–subject combination, so bars are in units of SD and comparable across subjects.
ggplot(means4_norm, aes(x = cohort, y = mean_score, fill = cohort)) +
geom_col(width = 0.6) +
geom_errorbar(aes(ymin = mean_score - 1.96 * se, ymax = mean_score + 1.96 * se), width = 0.2) +
facet_wrap(~ subject, nrow = 1) +
labs(
title = "End-of-Grade-4 scores by cohort (SD-normalized)",
subtitle = "Control group only; y-axis in units of pooled SD",
x = NULL,
y = "Mean score (SD units)"
) +
theme_minimal() +
theme(
legend.position = "none",
axis.text.x = element_text(angle = 30, hjust = 1)
)| Cohort |
Mean score in SD units (SE)
|
|||
|---|---|---|---|---|
| English | Hindi | Math | Total | |
| Grade 2_Y1 (ely3) | 2.41 (0.05) | 2.39 (0.06) | 3.14 (0.05) | 2.95 (0.05) |
| Grade 3_Y1 (ely2) | 2.27 (0.05) | 2.35 (0.05) | 3.16 (0.05) | 2.89 (0.05) |
| Grade 4_Y1 (ely1) | 2.28 (0.05) | 2.36 (0.05) | 3.11 (0.05) | 2.88 (0.05) |
| Grade 5_Y1 (bl) | 2.39 (0.06) | 2.43 (0.05) | 3.34 (0.06) | 3.02 (0.06) |
Sample sizes: Grade 3_Y1 (ely3): n = 634; Grade 4_Y1 (ely2): n = 654; Grade 5_Y1 (ely1): n = 672. SD of cohort means by subject: Hindi: 0.15; Math: 0.04; English: 0.1; Total: 0.29.
ggplot(means5, aes(x = cohort, y = mean_score, fill = cohort)) +
geom_col(width = 0.6) +
geom_errorbar(aes(ymin = mean_score - 1.96 * se, ymax = mean_score + 1.96 * se), width = 0.2) +
facet_wrap(~ subject, scales = "free_y", nrow = 1) +
labs(
title = "End-of-Grade-5 scores by cohort",
subtitle = "Control group only",
x = NULL,
y = "Mean score"
) +
theme_minimal() +
theme(
legend.position = "none",
axis.text.x = element_text(angle = 30, hjust = 1)
)| Cohort |
Mean score (SE)
|
|||
|---|---|---|---|---|
| English | Hindi | Math | Total | |
| Grade 3_Y1 (ely3) | 3.14 (0.06) | 4.67 (0.07) | 3.53 (0.05) | 11.34 (0.16) |
| Grade 4_Y1 (ely2) | 2.93 (0.06) | 4.38 (0.08) | 3.47 (0.05) | 10.78 (0.18) |
| Grade 5_Y1 (ely1) | 3 (0.07) | 4.47 (0.08) | 3.45 (0.06) | 10.93 (0.19) |
Scores divided by the pooled SD for each grade–subject combination, so bars are in units of SD and comparable across subjects.
ggplot(means5_norm, aes(x = cohort, y = mean_score, fill = cohort)) +
geom_col(width = 0.6) +
geom_errorbar(aes(ymin = mean_score - 1.96 * se, ymax = mean_score + 1.96 * se), width = 0.2) +
facet_wrap(~ subject, nrow = 1) +
labs(
title = "End-of-Grade-5 scores by cohort (SD-normalized)",
subtitle = "Control group only; y-axis in units of pooled SD",
x = NULL,
y = "Mean score (SD units)"
) +
theme_minimal() +
theme(
legend.position = "none",
axis.text.x = element_text(angle = 30, hjust = 1)
)| Cohort |
Mean score in SD units (SE)
|
|||
|---|---|---|---|---|
| English | Hindi | Math | Total | |
| Grade 3_Y1 (ely3) | 2.69 (0.05) | 2.92 (0.04) | 3.36 (0.04) | 3.36 (0.05) |
| Grade 4_Y1 (ely2) | 2.51 (0.05) | 2.74 (0.05) | 3.3 (0.05) | 3.2 (0.05) |
| Grade 5_Y1 (ely1) | 2.57 (0.06) | 2.8 (0.05) | 3.28 (0.05) | 3.24 (0.06) |
Note: ASER scales — Hindi: 1 (Beginner) to 6 (Story Plus); Math: 1 (Beginner) to 5 (Division); English: 1 (Beginner) to 5 (Sentence); Total: sum of the three scales, range 3–16.
The code above compares end of grade learning levels for several cohorts. In two cases, we can also compare learning gains over a year across cohorts:
For each comparison, gains are computed at the individual level (later score minus earlier score) and then averaged with cluster-robust standard errors at the village level. Comparisons are restricted to the control group and to students assessed at both relevant rounds.
gains_3to4 <- bind_rows(
ctrl |>
filter(child_class_bl == 2, assessed_ely2 == 1, assessed_ely3 == 1) |>
transmute(cohort = "Grade 2_Y1", village_id_rand,
hindi = hindi_ely3 - hindi_ely2,
math = math_ely3 - math_ely2,
english = english_ely3 - english_ely2,
total = total_ely3 - total_ely2),
ctrl |>
filter(child_class_bl == 3, assessed_ely1 == 1, assessed_ely2 == 1) |>
transmute(cohort = "Grade 3_Y1", village_id_rand,
hindi = hindi_ely2 - hindi_ely1,
math = math_ely2 - math_ely1,
english = english_ely2 - english_ely1,
total = total_ely2 - total_ely1)
) |>
mutate(cohort = factor(cohort, levels = c("Grade 2_Y1", "Grade 3_Y1")))
gains_4to5 <- bind_rows(
ctrl |>
filter(child_class_bl == 3, assessed_ely2 == 1, assessed_ely3 == 1) |>
transmute(cohort = "Grade 3_Y1", village_id_rand,
hindi = hindi_ely3 - hindi_ely2,
math = math_ely3 - math_ely2,
english = english_ely3 - english_ely2,
total = total_ely3 - total_ely2),
ctrl |>
filter(child_class_bl == 4, assessed_ely1 == 1, assessed_ely2 == 1) |>
transmute(cohort = "Grade 4_Y1", village_id_rand,
hindi = hindi_ely2 - hindi_ely1,
math = math_ely2 - math_ely1,
english = english_ely2 - english_ely1,
total = total_ely2 - total_ely1)
) |>
mutate(cohort = factor(cohort, levels = c("Grade 3_Y1", "Grade 4_Y1")))
gains_means_3to4 <- cohort_means(gains_3to4, "Grade 3 → Grade 4")
gains_means_4to5 <- cohort_means(gains_4to5, "Grade 4 → Grade 5")Sample sizes (control group, assessed at both relevant rounds): Grade 2_Y1: n = 610; Grade 3_Y1: n = 636. SD of cohort mean gains by subject: Hindi: 0.22; Math: 0; English: 0.15; Total: 0.37.
ggplot(gains_means_3to4, aes(x = cohort, y = mean_score, fill = cohort)) +
geom_col(width = 0.6) +
geom_errorbar(aes(ymin = mean_score - 1.96 * se, ymax = mean_score + 1.96 * se), width = 0.2) +
facet_wrap(~ subject, scales = "free_y", nrow = 1) +
labs(
title = "Learning gains: end of grade 3 to end of grade 4",
subtitle = "Control group only; gain = later score − earlier score",
x = NULL,
y = "Mean gain (score points)"
) +
theme_minimal() +
theme(
legend.position = "none",
axis.text.x = element_text(angle = 30, hjust = 1)
)| Cohort |
Mean gain in score points (SE)
|
|||
|---|---|---|---|---|
| English | Hindi | Math | Total | |
| Grade 2_Y1 | 0.57 (0.04) | 0.92 (0.06) | 0.41 (0.03) | 1.9 (0.09) |
| Grade 3_Y1 | 0.36 (0.04) | 0.6 (0.06) | 0.41 (0.03) | 1.37 (0.09) |
Sample sizes (control group, assessed at both relevant rounds): Grade 3_Y1: n = 623; Grade 4_Y1: n = 650. SD of cohort mean gains by subject: Hindi: 0.2; Math: 0; English: 0.14; Total: 0.34.
ggplot(gains_means_4to5, aes(x = cohort, y = mean_score, fill = cohort)) +
geom_col(width = 0.6) +
geom_errorbar(aes(ymin = mean_score - 1.96 * se, ymax = mean_score + 1.96 * se), width = 0.2) +
facet_wrap(~ subject, scales = "free_y", nrow = 1) +
labs(
title = "Learning gains: end of grade 4 to end of grade 5",
subtitle = "Control group only; gain = later score − earlier score",
x = NULL,
y = "Mean gain (score points)"
) +
theme_minimal() +
theme(
legend.position = "none",
axis.text.x = element_text(angle = 30, hjust = 1)
)| Cohort |
Mean gain in score points (SE)
|
|||
|---|---|---|---|---|
| English | Hindi | Math | Total | |
| Grade 3_Y1 | 0.56 (0.04) | 0.73 (0.05) | 0.44 (0.03) | 1.74 (0.08) |
| Grade 4_Y1 | 0.36 (0.04) | 0.45 (0.04) | 0.44 (0.04) | 1.25 (0.08) |
Note: ASER scales — Hindi: 1 (Beginner) to 6 (Story Plus); Math: 1 (Beginner) to 5 (Division); English: 1 (Beginner) to 5 (Sentence); Total: sum of the three scales, range 3–16.