Analysis by Matt Wilkins, PhD
Data downloaded from the National Center for Education Statistics
Below, I include the R code to wrangle data and generate figures.
require(remotes)
install_github("galacticpolymath/galacticPubs") #custom theme stuff
require(pacman)
p_load(ggplot2,readxl,dplyr,galacticPubs,reshape2,ggiraph,widgetframe,htmlwidgets)
d<-read_xlsx("data/pisa-data_2000-2018_tidy.xlsx")
#Rename Long name(s)
d$jurisdiction <- recode(d$jurisdiction,`International Average (OECD Countries)`="Avg of OECD Countries")
# Add ranks for each subject-year
d$sub_yr<-paste(d$subject,d$year)
d2<-lapply(unique(d$sub_yr),function(i) {
d_i <- subset(d,sub_yr==i)
d_i$rank<-rank(-d_i$average,na.last="keep",ties.method="average")
d_i
}) %>% bind_rows()
d2$year <- as.numeric(d2$year)
#define which jurisdictions to compare
for_comparison<-c("Avg of OECD Countries","United States","Canada","Shanghai - China","Russian Federation")
#plot score by year, grouped by subject
G<-d2 %>% subset(.,jurisdiction%in%for_comparison) %>%
ggplot(.,aes(x=year,y=average,col=jurisdiction))+
geom_point()+geom_smooth(method="lm",se=F,formula='y~x')+
facet_grid(~subject)+ggGalactic(font.cex =.8)+
theme(strip.text=element_text(size=16))+
scale_colour_manual(values=gpPal[[1]]$hex[c(6,5,2,4,1,3)])+
scale_linetype_manual(values=c(3,1,1,1,2))+ylab("PISA average")
require(reshape2)
# Sum all the scores for each country for each year
d3 = melt(d2[,c("year","jurisdiction","average")],id=c("year","jurisdiction")) %>% dcast(.,formula=jurisdiction+year~variable,fun.aggregate=sum)
names(d3)[3]<-"combined_average"
#Now plot the result
G2= d3 %>% subset(.,jurisdiction%in%for_comparison) %>%
ggplot(.,aes(x=year,y=combined_average,col=jurisdiction))+ylim(1350,1800)+
geom_point()+geom_smooth(method="lm",se=F,formula='y~x')+
ggGalactic(font.cex =.8)+
theme(strip.text=element_text(size=16))+
scale_colour_manual(values=gpPal[[1]]$hex[c(6,5,2,4,1,3)])+
scale_linetype_manual(values=c(3,1,1,1,2))+ylab("Mean Combined PISA score")
us<-subset(d3,jurisdiction=="United States")
us
#fit a line (it's just 4 points, but hey, we're not testing for significance)
mod<-lm(combined_average~year,data=us,na.rm=T)
mod_form<-paste0("y = ",round(coef(mod)[2],2),"x + ",round(coef(mod)[1],2))
#make graph and add formula to it
G3 <- us %>% ggplot(.,aes(x=year,y=combined_average,col=jurisdiction))+
geom_point(size=3)+geom_smooth(method="lm",se=F,formula='y~x')+
ggGalactic(font.cex =.8)+ylim(1350,1800)+
theme(strip.text=element_text(size=16))+
scale_colour_manual(values=gpPal[[1]]$hex[1])+
ylab("Mean Combined PISA score")+annotate("text",label=mod_form,col=gpColors("hydro"),size=10,x=2013,y=1530)
## jurisdiction year combined_average
## 281 United States 2000 NA
## 282 United States 2003 NA
## 283 United States 2006 NA
## 284 United States 2009 1489.226
## 285 United States 2012 1476.358
## 286 United States 2015 1462.806
## 287 United States 2018 1485.978
#recalculate ranks based on combined PISA scores (all subject scores added together)
d4 <- lapply(sort(unique(d3$year)),function(i) {
d_i <- subset(d3,year==i)
d_i$rank<-rank(-d_i$combined_average,na.last="keep",ties.method="average")
d_i
}) %>% bind_rows()
d4$jurisdiction %>% unique()
## [1] "Australia" "Austria" "Avg of OECD Countries"
## [4] "Belgium" "Canada" "Chile"
## [7] "Colombia" "Czech Republic" "Denmark"
## [10] "Estonia" "Finland" "France"
## [13] "Germany" "Greece" "Hungary"
## [16] "Iceland" "Ireland" "Israel"
## [19] "Italy" "Japan" "Korea, Republic of"
## [22] "Latvia" "Luxembourg" "Mexico"
## [25] "Netherlands" "New Zealand" "Norway"
## [28] "Poland" "Portugal" "Russian Federation"
## [31] "Shanghai - China" "Singapore" "Slovak Republic"
## [34] "Slovenia" "Spain" "Sweden"
## [37] "Switzerland" "Turkey" "United Arab Emirates"
## [40] "United Kingdom" "United States" "Vietnam"
#get models for all countries & make hover text summary
hoverText<-sapply(unique(d4$jurisdiction),function(x){
d_x<-subset(d4,jurisdiction==x&complete.cases(rank))
if(nrow(d_x)<3){mod_form=status= "Not enough data yet"
}else{
mod_x<-lm(-rank~year,data=d_x,na.rm=T)
mod_form<-paste0("y = ",round(coef(mod_x)[2],2),"x + ",round(coef(mod_x)[1],2))
#make status statement based on slope
#Use slope of .25 as threshold (improving, declining or staying the same within 4 yrs)
status<-{if(coef(mod_x)[2]>.25){"improving"
}else if(coef(mod_x)[2]<(-.25)){"declining"
}else if(coef(mod_x)[2]>-.25 & coef(mod_x)[2]<.25){"steady"}}
}
maxRank<-max(d_x$rank,na.rm=T)
maxYear<-d_x$year[which.max(d_x$rank)]
minRank<-min(d_x$rank,na.rm=T)
minYear<-d_x$year[which.min(d_x$rank)]
#switched max/min in the output due to interpretation of 1 as a higher rank
return(assign(x,paste0(toupper(x),"\nStatus: ",status,"\n",mod_form,"\nHighest: ",minRank," (",minYear,")\nLowest: ",maxRank," (",maxYear,")")))
})
#Add formulas to the data frame
d4$hoverText<-hoverText[match(d4$jurisdiction,names(hoverText))]
#Make certain countries stand out a little
focal_jur<-c("United States","Canada","Avg of OECD Countries")
d4$colCode<-0
#change focal jurisdictions to number codes 1=US, 2=Canada, 3=avg oecd
for(i in 1:length(focal_jur)){d4$colCode[which(d4$jurisdiction==focal_jur[i])]<-i}
d4$colCode<-as.factor(ifelse(is.na(d4$rank),NA,d4$colCode))
#make a literal vector of colors, as well. We'll still use colCode to style shapes,etc
colVec<-ifelse(d4$colCode==0,"gray80","gray30") #make 0s light gray, and the highlighted countries darker gray
#I seem to need yet another vector of just the country colors
colVec_jur<-ifelse(d4$jurisdiction %>% unique()%in%focal_jur,"gray30","gray80")
names(colVec_jur)<-d4$jurisdiction %>% unique()
######
#make graph and add formula to it
G4 <- d4 %>% subset(.,!jurisdiction%in%focal_jur) %>% ggplot(.,aes(x=year,y=rank,group=jurisdiction),stroke=.5)+
#First add nonfocal countries in a less obtrusive color
geom_point_interactive(show.legend=F,size=1.5,color="gray80",shape=19,
aes(data_id=jurisdiction,tooltip=paste0(toupper(jurisdiction), "\nYear: ",year,"\nRank: ",rank)),
hover_css="stroke: #6812d1;fill: #6812d1;")+
geom_smooth_interactive(show.legend=F,method="lm",se=F,size=.5,formula='y~x',color="gray80",size=.8,
aes(data_id=jurisdiction,tooltip=hoverText),
hover_css="stroke: #6812d1;")+
#Add custom styling for Galactic Polymath
ggGalactic(font.cex =.6)+
#Scale axes
scale_x_continuous(breaks=seq(2006,2018,3),limits=c(2006,2018))+scale_y_reverse(limits=c(44,0),breaks=c(1,seq(45,1,-5)))+
#Now add on your focal countries so they look distinct
geom_point_interactive(inherit.aes=F, data=subset(d4,jurisdiction%in%focal_jur&complete.cases(rank)),
show.legend=T,size=1.85,color="gray30",
aes(shape=colCode,data_id=jurisdiction,x=year,y=rank,
tooltip=paste0(toupper(jurisdiction), "\nYear: ",year,"\nRank: ",rank)),
hover_css="stroke: #6812d1;fill: #6812d1;")+
geom_smooth_interactive(inherit.aes=F, data=subset(d4,jurisdiction%in%focal_jur&complete.cases(rank)),
show.legend=T,method="lm",se=F,size=1,formula='y~x',size=.8,color="gray30",
aes(x=year,y=rank,data_id=jurisdiction,tooltip=hoverText,linetype=colCode),
hover_css="stroke: #6812d1;")+
scale_shape_manual(name="Focal Jurisdictions:",values=c(24,22,23),labels=sort(focal_jur,decreasing = T))+
scale_linetype_manual(name="Focal Jurisdictions:",values=1:3,labels=sort(focal_jur,decreasing = T))+
ylab("Overall PISA Rank")+
labs(title="Change in PISA scores for 41 countries and the OECD average",
subtitle="Mouse over a trendline to get the line equation and other info")+
theme(legend.position="bottom",legend.background = element_rect(fill="gray94",color="transparent" ),
legend.key=element_rect("gray98"),legend.key.width=unit(22,"pt"),
legend.text=element_text(size=8),
plot.title=element_text(size=11),plot.subtitle=element_text(size=9,color="#6812d1"),
axis.title=element_text(size=10))+ xlab("")
# Data: combined average scores for the US
girafe(print(G4),width_svg=6,height_svg=6,options = list(
opts_hover_inv(css = "opacity:0.5;"),
opts_hover(css = "stroke-width:2;")
))
w<-girafe(print(G4),width_svg=6,height_svg=6,options = list(
opts_hover_inv(css = "opacity:0.5;"),
opts_hover(css = "stroke-width:2;")
))
w
w %>% frameableWidget() %>% saveWidget(.,"interactive_PISA_ranks.html")