Analysis by Matt Wilkins, PhD
Data downloaded from the National Center for Education Statistics
Below, I include the R code to wrangle data and generate figures.
require(remotes)
install_github("galacticpolymath/galacticPubs") #custom theme stuff
require(pacman)
p_load(ggplot2,readxl,dplyr,galacticPubs,reshape2,ggiraph,widgetframe,htmlwidgets)
d<-read_xlsx("data/pisa-data_2000-2018_tidy.xlsx")
#Rename Long name(s)
d$jurisdiction <- recode(d$jurisdiction,`International Average (OECD Countries)`="Avg of OECD Countries")
# Add ranks for each subject-year
d$sub_yr<-paste(d$subject,d$year)
d2<-lapply(unique(d$sub_yr),function(i) {
        d_i <- subset(d,sub_yr==i)
        d_i$rank<-rank(-d_i$average,na.last="keep",ties.method="average")
        d_i
      }) %>% bind_rows()
d2$year <- as.numeric(d2$year)
#define which jurisdictions to compare
for_comparison<-c("Avg of OECD Countries","United States","Canada","Shanghai - China","Russian Federation")
#plot score by year, grouped by subject
G<-d2 %>% subset(.,jurisdiction%in%for_comparison) %>% 
  ggplot(.,aes(x=year,y=average,col=jurisdiction))+
  geom_point()+geom_smooth(method="lm",se=F,formula='y~x')+
  facet_grid(~subject)+ggGalactic(font.cex =.8)+
  theme(strip.text=element_text(size=16))+
  scale_colour_manual(values=gpPal[[1]]$hex[c(6,5,2,4,1,3)])+
  scale_linetype_manual(values=c(3,1,1,1,2))+ylab("PISA average")  require(reshape2)
  # Sum all the scores for each country for each year
  d3 = melt(d2[,c("year","jurisdiction","average")],id=c("year","jurisdiction")) %>% dcast(.,formula=jurisdiction+year~variable,fun.aggregate=sum)
  names(d3)[3]<-"combined_average"
  
  #Now plot the result
  G2= d3 %>% subset(.,jurisdiction%in%for_comparison) %>% 
  ggplot(.,aes(x=year,y=combined_average,col=jurisdiction))+ylim(1350,1800)+
  geom_point()+geom_smooth(method="lm",se=F,formula='y~x')+
  ggGalactic(font.cex =.8)+
  theme(strip.text=element_text(size=16))+
  scale_colour_manual(values=gpPal[[1]]$hex[c(6,5,2,4,1,3)])+
  scale_linetype_manual(values=c(3,1,1,1,2))+ylab("Mean Combined PISA score")  us<-subset(d3,jurisdiction=="United States")
  us
  #fit a line (it's just 4 points, but hey, we're not testing for significance)
  mod<-lm(combined_average~year,data=us,na.rm=T)
  mod_form<-paste0("y = ",round(coef(mod)[2],2),"x + ",round(coef(mod)[1],2))
  #make graph and add formula to it
  G3 <- us %>%  ggplot(.,aes(x=year,y=combined_average,col=jurisdiction))+
  geom_point(size=3)+geom_smooth(method="lm",se=F,formula='y~x')+
  ggGalactic(font.cex =.8)+ylim(1350,1800)+
  theme(strip.text=element_text(size=16))+
  scale_colour_manual(values=gpPal[[1]]$hex[1])+
  ylab("Mean Combined PISA score")+annotate("text",label=mod_form,col=gpColors("hydro"),size=10,x=2013,y=1530)##      jurisdiction year combined_average
## 281 United States 2000               NA
## 282 United States 2003               NA
## 283 United States 2006               NA
## 284 United States 2009         1489.226
## 285 United States 2012         1476.358
## 286 United States 2015         1462.806
## 287 United States 2018         1485.978#recalculate ranks based on combined PISA scores (all subject scores added together)
d4 <-  lapply(sort(unique(d3$year)),function(i) {
        d_i <- subset(d3,year==i)
        d_i$rank<-rank(-d_i$combined_average,na.last="keep",ties.method="average")
        d_i
      }) %>% bind_rows()
d4$jurisdiction %>% unique()##  [1] "Australia"             "Austria"               "Avg of OECD Countries"
##  [4] "Belgium"               "Canada"                "Chile"                
##  [7] "Colombia"              "Czech Republic"        "Denmark"              
## [10] "Estonia"               "Finland"               "France"               
## [13] "Germany"               "Greece"                "Hungary"              
## [16] "Iceland"               "Ireland"               "Israel"               
## [19] "Italy"                 "Japan"                 "Korea, Republic of"   
## [22] "Latvia"                "Luxembourg"            "Mexico"               
## [25] "Netherlands"           "New Zealand"           "Norway"               
## [28] "Poland"                "Portugal"              "Russian Federation"   
## [31] "Shanghai - China"      "Singapore"             "Slovak Republic"      
## [34] "Slovenia"              "Spain"                 "Sweden"               
## [37] "Switzerland"           "Turkey"                "United Arab Emirates" 
## [40] "United Kingdom"        "United States"         "Vietnam"  #get models for all countries & make hover text summary
  hoverText<-sapply(unique(d4$jurisdiction),function(x){
      d_x<-subset(d4,jurisdiction==x&complete.cases(rank))
      if(nrow(d_x)<3){mod_form=status= "Not enough data yet"
      }else{
        mod_x<-lm(-rank~year,data=d_x,na.rm=T)
        mod_form<-paste0("y = ",round(coef(mod_x)[2],2),"x + ",round(coef(mod_x)[1],2))
        #make status statement based on slope
        #Use slope of .25 as threshold (improving, declining or staying the same within 4 yrs)
        status<-{if(coef(mod_x)[2]>.25){"improving"
        }else if(coef(mod_x)[2]<(-.25)){"declining"
        }else if(coef(mod_x)[2]>-.25 & coef(mod_x)[2]<.25){"steady"}}
      }
      maxRank<-max(d_x$rank,na.rm=T)
      maxYear<-d_x$year[which.max(d_x$rank)]
      minRank<-min(d_x$rank,na.rm=T)
      minYear<-d_x$year[which.min(d_x$rank)]
      
      #switched max/min in the output due to interpretation of 1 as a higher rank
      return(assign(x,paste0(toupper(x),"\nStatus: ",status,"\n",mod_form,"\nHighest: ",minRank," (",minYear,")\nLowest: ",maxRank," (",maxYear,")")))
  }) 
#Add formulas to the data frame
d4$hoverText<-hoverText[match(d4$jurisdiction,names(hoverText))]
#Make certain countries stand out a little
  focal_jur<-c("United States","Canada","Avg of OECD Countries")
  d4$colCode<-0
  #change focal jurisdictions to number codes 1=US, 2=Canada, 3=avg oecd
  for(i in 1:length(focal_jur)){d4$colCode[which(d4$jurisdiction==focal_jur[i])]<-i}
  d4$colCode<-as.factor(ifelse(is.na(d4$rank),NA,d4$colCode))
  #make a literal vector of colors, as well. We'll still use colCode to style shapes,etc
  colVec<-ifelse(d4$colCode==0,"gray80","gray30") #make 0s light gray, and the highlighted countries darker gray
  #I seem to need yet another vector of just the country colors
  colVec_jur<-ifelse(d4$jurisdiction %>% unique()%in%focal_jur,"gray30","gray80")
  names(colVec_jur)<-d4$jurisdiction %>% unique()
  
  ######
  #make graph and add formula to it
  G4 <- d4 %>% subset(.,!jurisdiction%in%focal_jur) %>%  ggplot(.,aes(x=year,y=rank,group=jurisdiction),stroke=.5)+
  #First add nonfocal countries in a less obtrusive color
  geom_point_interactive(show.legend=F,size=1.5,color="gray80",shape=19,
                         aes(data_id=jurisdiction,tooltip=paste0(toupper(jurisdiction), "\nYear: ",year,"\nRank: ",rank)),
                         hover_css="stroke: #6812d1;fill: #6812d1;")+
  geom_smooth_interactive(show.legend=F,method="lm",se=F,size=.5,formula='y~x',color="gray80",size=.8,
                          aes(data_id=jurisdiction,tooltip=hoverText),
                          hover_css="stroke: #6812d1;")+
    #Add custom styling for Galactic Polymath
    ggGalactic(font.cex =.6)+
    #Scale axes
    scale_x_continuous(breaks=seq(2006,2018,3),limits=c(2006,2018))+scale_y_reverse(limits=c(44,0),breaks=c(1,seq(45,1,-5)))+
    
    #Now add on your focal countries so they look distinct
  geom_point_interactive(inherit.aes=F, data=subset(d4,jurisdiction%in%focal_jur&complete.cases(rank)),
                           show.legend=T,size=1.85,color="gray30",
                         aes(shape=colCode,data_id=jurisdiction,x=year,y=rank,
                             tooltip=paste0(toupper(jurisdiction), "\nYear: ",year,"\nRank: ",rank)),
                         hover_css="stroke: #6812d1;fill: #6812d1;")+
  geom_smooth_interactive(inherit.aes=F, data=subset(d4,jurisdiction%in%focal_jur&complete.cases(rank)),
                          show.legend=T,method="lm",se=F,size=1,formula='y~x',size=.8,color="gray30",
                          aes(x=year,y=rank,data_id=jurisdiction,tooltip=hoverText,linetype=colCode),
                          hover_css="stroke: #6812d1;")+
    scale_shape_manual(name="Focal Jurisdictions:",values=c(24,22,23),labels=sort(focal_jur,decreasing = T))+
    scale_linetype_manual(name="Focal Jurisdictions:",values=1:3,labels=sort(focal_jur,decreasing = T))+
  ylab("Overall PISA Rank")+
    labs(title="Change in PISA scores for 41 countries and the OECD average",
         subtitle="Mouse over a trendline to get the line equation and other info")+
    theme(legend.position="bottom",legend.background = element_rect(fill="gray94",color="transparent" ),
          legend.key=element_rect("gray98"),legend.key.width=unit(22,"pt"), 
          legend.text=element_text(size=8),
          plot.title=element_text(size=11),plot.subtitle=element_text(size=9,color="#6812d1"),
          axis.title=element_text(size=10))+ xlab("")
  
  # Data: combined average scores for the US
girafe(print(G4),width_svg=6,height_svg=6,options = list(
    opts_hover_inv(css = "opacity:0.5;"),
    opts_hover(css = "stroke-width:2;")
  ))w<-girafe(print(G4),width_svg=6,height_svg=6,options = list(
    opts_hover_inv(css = "opacity:0.5;"),
    opts_hover(css = "stroke-width:2;")
  ))
ww %>% frameableWidget() %>% saveWidget(.,"interactive_PISA_ranks.html")