top of page



### EXAMPLE 4: Geithner connections
### Jeremy L Hour
### 11 avril 2017
### EDITED: 8/8/2018
### Jay Ryu: applying BCM PSC to annual RGDP (for A SINGLE TREATED unit), modifying Looping_PSC_Each_Post_T_EMDE.R
rm(list=ls())
library(purrr)
library(questionr)
library(readxl)
library(plyr)
library(Metrics)
library(dplyr)
library(tidyverse)
library(DataCombine) # to run shift
library(data.table) # in case shift etc. are needed
library("MASS")
library("ggplot2")
library("gtable")
library("grid")
library("reshape2")
library("LowRankQP")
library("R.matlab")
library("stargazer")
library("foreach")
library("doParallel")
if(!require(rlist)){install.packages("rlist")
  library(rlist)}

#' @param X0 is a p x n0 matrix
#' @param X1 is a p x 1 vector
#' @param V is a p x p matrix of weights
#' @param pen L1 penalty level (lambda)
wsoll1 <- function(X0,X1,V,pen=0.0){
  n = ncol(X0)
  Delta = diag(t(X0 - matrix(rep(1,n),ncol=n)%x%X1)%*%V%*%(X0 - matrix(rep(1,n),ncol=n)%x%X1))
  P = 2*t(X0)%*%V%*%X0
  q = t(-2*t(X0)%*%V%*%X1 + pen*Delta)
  sol = LowRankQP(Vmat=P,dvec=q,Amat=matrix(1, ncol=n),bvec=1,uvec=rep(1,n), method="LU")
  return(sol$alpha)
}
#' @param n number of observations wanted
#' @param p is the number of covariates
#' @param Ry gives the R-square wanted in outcome equation
#' @param Rd gives the R-square wanted in treatment equation
#' @param rho parametrizes the covariance between the covariates
#' @param a is the treatment effect value
matchDGP <- function(n=20,p=2,Ry=.5,Rd=.2,rho=.5,a=0){
  ### Covariate variance matrix
  Sigma = matrix(0,nrow=p, ncol=p)
  for(k in 1:p){
    for(j in 1:p){
      Sigma[k,j] = rho^abs(k-j)
    }
  }
  ### Treatment variable coefficient
  gamma = rep(0,p)
  for(j in 1:p){
    gamma[j] = 1*(-1)^(j) / j^2
  }
  ### Outcome equation coefficients
  b = gamma
  for(j in 1:p){
    b[j] = (-1)^(j+1) / (p-j+1)^2
  }
  ### Adjustment to match R.squared
  c = sqrt((1/t(gamma)%*%Sigma%*%gamma)*(Rd/(1-Rd)))
  gamma = c*gamma
  c = sqrt((1/t(b)%*%Sigma%*%b)*(Ry/(1-Ry)))
  b = c*b
  X = mvrnorm(n = n, mu=rep(0,p), Sigma)
  d = as.numeric(runif(n) < pnorm(X%*%gamma))
  y = a*d + exp(-X%*%b) + rnorm(n)
  return(list(X=X,
              y=y,
              d=d,
              b=b,
              g=gamma,
              ATT=sum(d*a)/sum(d)))
}
#' @param d is a vector of dimension n (treatment indicator)
#' @param y is a vector of dimension n (outcome)
#' @param w is a vector of dimension n0 (weights)
wATT <- function(y,d,w){
  y = as.vector(y); d = as.vector(d);
  theta <- mean(y[d==1]) - t(y[d==0])%*%w
  return(c(theta))
}
#' @param X0 is a p x n0 matrix
#' @param X1 is a p x 1 vector
#' @param V is a p x p matrix of weights
#' @param m number of neighbors to find
#' 
#' @author J?r?my L'Hour
matching <- function(X0,X1,V,m=3){
  n = ncol(X0)
  Delta = matrix(t(X1)%*%V%*%X1, nrow=n, ncol=1) - 2*t(X0)%*%V%*%X1 + diag(t(X0)%*%V%*%X0) # Compute the distance
  r = rank(Delta)
  # I take the first m
  sol = ifelse(r <= m,1/m,0)
  return(sol)
}
#' @param X0 a p x n0 matrix
#' @param X1 a p x n1 matrix
#' @param Y0 a n0 x 1 vector
#' @param Y1 a n1 x 1 vector
#' @param V a p x p matrix of weights (optionnal)
#' @param pen L1 penalty level (lambda)
#' @param tol gives the threshold for considering true zeros
#' @param if TRUE use parallel version careful not to use it in a loop
#' 
#' @seealso \code{\link{wsoll1}}
#' 
#' @author Jeremy L'Hour
regsynth <- function(X0,X1,Y0,Y1,V,pen,tol=1e-6,parallel=FALSE){
  if(missing(V)) V = diag(nrow(X0))
  n0 = ncol(X0); n1 = ncol(X1)
  func_list = c("wsoll1","TZero")
  if(parallel){
    # start parallel
    cores = detectCores()
    cl = makeCluster(5, outfile="",setup_timeout=.5)
    registerDoParallel(cl)
    t_start <- Sys.time()
    res <- foreach(i = 1:n1,.export=func_list,.packages=c('LowRankQP'),.combine='cbind', .multicombine=TRUE, .errorhandling = 'remove') %dopar% {
      sol = wsoll1(X0,X1[,i],V,pen)
      sol = TZero(sol,tol)
      sol
    }
    stopCluster(cl)
    print('--- Computation over ! ---')
    print(Sys.time()-t_start)
    Wsol = t(res)
  } else {
    Wsol = matrix(nrow=n1,ncol=n0)
    f = file()
    sink(file=f)
    for(i in 1:n1){
      sol = wsoll1(X0,X1[,i],V,pen)
      sol = TZero(sol,tol)
      Wsol[i,] = sol
    }
    sink()
    close(f)
  }
  y0_hat = Wsol%*%Y0
  tau = Y1 - y0_hat
  return(list(ATT = mean(tau),
              CATT = tau,
              Wsol = Wsol,
              y0_hat = y0_hat))
}
#' @param X0 a p x n0 matrix
#' @param X1 a p x n1 vector
#' @param Y0 a n0 x 1 vector
#' @param Y1 a n1 x 1 vector
#' @param V a p x p matrix of weights
#' @param lambda a vector of l1 penalty levels
#' @param tol gives the threshold for considering true zeros
#' @param bar if TRUE displays progress bar
#' 
#' @seealso \code{\link{regsynth}}
#' 
#' @author Jeremy L'Hour
regsynthpath <- function(X0,X1,Y0,Y1,V,lambda,tol=1e-6,bar=F){
  K = length(lambda); n0 = ncol(X0); n1 = ncol(X1)
  ATT = vector(length = K)
  tau = matrix(nrow=K, ncol=n1)
  Wsol = array(dim=c(K,n1,n0))
  t_start = Sys.time()
  if(bar) pb = txtProgressBar(style = 3)
  for(k in 1:K){
    sol = regsynth(X0,X1,Y0,Y1,V,lambda[k],tol=1e-6)
    ATT[k] = sol$ATT
    tau[k,] = sol$CATT
    Wsol[k,,] = sol$Wsol
    if(bar) setTxtProgressBar(pb, k/K)
  }
  if(bar) close(pb)
  print(Sys.time()-t_start)
  return(list(ATT=ATT,
              CATT=tau,
              Wsol=Wsol))
}
#' @param x vector with positive elements
#' @param tol tolerance level
#' @param scale If TRUE rescale so new vector elements sum to one
TZero <- function(x,tol=1e-6,scale=T){
  if(!all(x > 0)) stop("Some elements are negative!")
  y = ifelse(x < tol,0,x)
  if(scale) y = y/sum(y)
  return(y)
}
#' @param w is a n0 vector of weights
#' @param X0 is a p x n0 matrix
#' @param X1 is a p x 1 vector
#' @param V is a p x p matrix of weights
#' @param pen is l2 penalty level
#' 
#' @autor Jeremy LHour
synthObj <- function(w,X0,X1,V){
  n = ncol(X0)
  Delta = matrix(t(X1)%*%V%*%X1, nrow=n, ncol=1) - 2*t(X0)%*%V%*%X1 + diag(t(X0)%*%V%*%X0)
  #1. l1-norm
  l1norm = t(Delta)%*%w
  #2. loss function
  loss = t(X1 - X0%*%w) %*% V %*% (X1 - X0%*%w)
  return(list(loss=c(loss),
              l1norm=c(l1norm),
              Delta=Delta))
}
# revised bias function for PSC
bias <- function(X0,X1,y,d,W){
  n1 = ncol(X1); n0 = ncol(X0)  
  reg0 = rbind(X0,rep(1,n0)) 
  reg1 = rbind(X1,rep(1,n1))
  A = solve(reg0%*%t(reg0))%*%reg0
 # A = ginv(reg0%*%t(reg0))%*%reg0 # as a way around non-full ranks in the system
  W=matrix(Psol,nrow=1) 
  bias = matrix(nrow=nrow(y),ncol=n1)
  for(t in 1:nrow(y)){
    mu0 = A%*%y[t,d==0]
    bias[t,] = t(reg1)%*%mu0 - W%*%(t(reg0)%*%mu0)
  }
  return(bias)
}
### [JR] data steps; adapted from SC_7_24_22_AnnGDP_NO_Demean.R
setwd("/Users/jayryu/Library/CloudStorage/OneDrive-OhioUniversity/Global inflation/Annual GDP/PSC/1970/EMDE/BCM_PSC")
d0<-read_excel("/Users/jayryu/Library/CloudStorage/OneDrive-OhioUniversity/Global inflation/Annual GDP/Differ_Pre_T_Period/1960/pwt_longer_with_NA.xlsx", na = "NA", sheet="Data")
dc<-read_excel("/Users/jayryu/Library/CloudStorage/OneDrive-OhioUniversity/Global inflation/Annual GDP/Differ_Pre_T_Period/1960/pwt_longer_with_NA.xlsx", na = "NA", sheet="cs-units")
# for per capita gdp
d0$p_rgdpe=d0$rgdpe/d0$pop
d0$p_cn=d0$cn/d0$pop
d0$net_exp=d0$csh_x-d0$csh_m
d0$to = d0$csh_x + abs(d0$csh_m)
# to create change variables
lg=function(x)c(NA,x[1:length(x)-1])
ln=function(x) log(x)
var_names=c("p_rgdpe","pop","csh_i","cn")
var_names_lag=paste0("lag_",var_names[1:4])
for (i in 1:length(var_names)) {
  d0[, paste0("lag_", var_names[i])] = ave(d0[[var_names[i]]], d0$country, FUN = lg)
  d0[,paste0("log_",var_names[i])] =ave(d0[[var_names[i]]],FUN=ln)
  d0[,paste0("log_",var_names_lag[i])]=ave(d0[[var_names_lag[i]]],FUN=ln)
}
d0$change_p_rgdpe=d0$log_p_rgdpe-d0$log_lag_p_rgdpe
d0$change_pop=d0$log_pop-d0$log_lag_pop
d0$change_csh_i=d0$log_csh_i-d0$log_lag_csh_i
d0$change_cn=d0$log_cn-d0$log_lag_cn
vec=c(102,111,119,139) #[JR]
d0=d0[!d0$ctry_dum %in% vec,] #[JR]
### [JR] itimegdp was originally prepared for quarterly data, so some cleaning might be needed
dc$itimegdp=NULL;dc$itimegdp=1960
dc$itimegdp[dc$CODE==5|dc$CODE==8|dc$CODE==11 |dc$CODE==37 |dc$CODE==44|dc$CODE==51
            |dc$CODE==70 |dc$CODE==76 |dc$CODE==81 |dc$CODE==90|dc$CODE==109
            |dc$CODE==114 |dc$CODE==117 |dc$CODE==118 |dc$CODE==135]=1990
dc$itimegdp[dc$CODE==142]=1989

dv="residual"        
ov="p_rgdpe" # dependent/outcome variable
covariates=c("csh_i","change_pop","to") 
group="BCM_3Covars_EMDE_1972_p_rgdpe"    # [RD7.24.22] for filename; use DG13T20C_50PL for AEs  
EMDES = 1        # 1 (one) fore EMDEs, AEs otherwise 
###################################################################################################
# NOTE: NOTHING TO CHOOSE BELOW THIS LINE
###################################################################################################
#### Chooses treatment group, keep columns for name, code of country, and initial quarter of 
#(trimmed) pre-T period; use TREATED1ADV for ADVs
if (EMDES==1){
  treatment<- dc[(dc$TREATED1DEV==1|dc$TREATED1EME==1|(dc$ADDTREATED1==1&dc$ADV==0)),
                 c('UNITS','CODE',"itimegdp")]        
} else{
  treatment<- dc[(dc$TREATED1ADV==1),c('UNITS','CODE',"itimegdp")]    #itime for INF2/itimegdp for Growth2
}
##### Lines to remove some non-fully IT countries or ITers with too small sample (ARG, CR, JAM)
treatment<-treatment[(treatment$CODE!=4),] 
treatment<-treatment[(treatment$CODE!=33),]   
treatment<-treatment[(treatment$CODE!=145),] 
##### further drop the following countries b/c "Error in LowRankQP...NA/NaN?Inf in foreign function....
# Armenia, Georgia, Kazakhstan, Moldova, Russian Federation, Serbia, Ukraine
vec2=c(5,51,70,90,109,114,135)
treatment=treatment[!treatment$CODE %in% vec2,]
### Selects donor pool and keep columns for name and code of country and if low-income or not
if (EMDES==1){
  control0<- dc[(dc$CONTROL1EME==1|dc$CONTROL1DEV==1),c('UNITS','CODE','LOWINCOME')]
} else{
  control0<- dc[(dc$CONTROL1ADV==1),c('UNITS','CODE','LOWINCOME')] #[RD7.24.22]
}
### Removing some units: BEN, BOT, BF, CdI, MAURITIUS, MOR, NIGER, SEN, TOG, T&T (& Slovenia)
if (EMDES==1){
  remove<-as.matrix(c(13,16, 20,34,88,92,97,113,130,131))
  selr<-as.matrix(control0[,2])%in% remove
  control<-(control0[selr!=1,])
  ccountry<-as.matrix(control[,1]) #[JR] changed from control[,2]
  
} else{
  remove<-as.matrix(c(13,16, 20,34,88,92,97,113,130,131,118,102,111,119,139)) #12.22.21 #[JR] further cleaning 102,111,119,139 for EMDEs
  selr<-as.matrix(control0[,2])%in% remove
  control<-(control0[selr!=1,])
  ccountry<-as.matrix(control[,1]) #[JR] using country name instead of country code
}

d1<-subset(d0,d0$year>1971&d0$year<2019,select=c("country","year",ov,"IT1",covariates)) #[JR]1969 for 1970-2019; 1970 b/c covariate(change_cn) starts from 1971
tcountry=data.frame(treatment[,1])
country_temp=rbind(tcountry,ccountry)
country=country_temp$UNITS
country=sort(country)
d2=d1[d1$country%in%country,]
df.mat=data.frame(matrix(nrow=length(unique(d2$year)),ncol=1))
mylist=list()
for(i in 1:length(country)){
  df.mat[i]=d2[d2$country==country[i],ov] #still error two countries are gone  #subset(d2,country==country[i],select=c(dv)) causes errors
  mylist[i]=df.mat[i]
}
name=NULL
get_id_temp=unique(d2[,c("country")])
get_id=get_id_temp$country
for(i in 1:length(get_id)){
  name[i]= paste(get_id[i],"", sep = "")
}
d3=data.frame(do.call(cbind,lapply(mylist,as.vector))) # df.mat instead of mylist also works but it comes with column names
for (i in ls(pattern="d3")) {
  d4 <- get(i)
  names(d4)[1:length(get_id)] <- name
  assign(i,d4)
}
tunits=tcountry$UNITS
ccountry_temp=data.frame(ccountry)
cunits=ccountry_temp$UNITS
d5_2=d4[,colnames(d4)%in%cunits]
##### to create data matrix for covariate1
df.mat=data.frame(matrix(nrow=length(unique(d2$year)),ncol=1))
mylist=list()
for(i in 1:length(country)){
  df.mat[i]=d2[d2$country==country[i],covariates[1]] #still error two countries are gone  #subset(d2,country==country[i],select=c(dv)) causes errors
  mylist[i]=df.mat[i]
}
name=NULL
get_id_temp=unique(d2[,c("country")])
get_id=get_id_temp$country
for(i in 1:length(get_id)){
  name[i]= paste(get_id[i],"", sep = "")
}
covariate3=data.frame(do.call(cbind,lapply(mylist,as.vector))) # df.mat instead of mylist also works but it comes with column names
for (i in ls(pattern="covariate3")) {
  covariate4 <- get(i)
  names(covariate4)[1:length(get_id)] <- name
  assign(i,covariate4)
}
tunits=tcountry$UNITS
ccountry_temp=data.frame(ccountry)
cunits=ccountry_temp$UNITS
covariate5=covariate4[,colnames(covariate4)%in%cunits]
##### to create data matrix for covariate2
df.mat=data.frame(matrix(nrow=length(unique(d2$year)),ncol=1))
mylist=list()
for(i in 1:length(country)){
  df.mat[i]=d2[d2$country==country[i],covariates[2]] #still error two countries are gone  #subset(d2,country==country[i],select=c(dv)) causes errors
  mylist[i]=df.mat[i]
}
name=NULL
get_id_temp=unique(d2[,c("country")])
get_id=get_id_temp$country
for(i in 1:length(get_id)){
  name[i]= paste(get_id[i],"", sep = "")
}
covariate3_2=data.frame(do.call(cbind,lapply(mylist,as.vector))) # df.mat instead of mylist also works but it comes with column names
for (i in ls(pattern="covariate3_2")) {
  covariate4_2 <- get(i)
  names(covariate4_2)[1:length(get_id)] <- name
  assign(i,covariate4_2)
}
covariate5_2=covariate4_2[,colnames(covariate4_2)%in%cunits]
##### to create data matrix for covariate3
df.mat=data.frame(matrix(nrow=length(unique(d2$year)),ncol=1))
mylist=list()
for(i in 1:length(country)){
  df.mat[i]=d2[d2$country==country[i],covariates[3]] #still error two countries are gone  #subset(d2,country==country[i],select=c(dv)) causes errors
  mylist[i]=df.mat[i]
}
name=NULL
get_id_temp=unique(d2[,c("country")])
get_id=get_id_temp$country
for(i in 1:length(get_id)){
  name[i]= paste(get_id[i],"", sep = "")
}
covariate3_3=data.frame(do.call(cbind,lapply(mylist,as.vector))) # df.mat instead of mylist also works but it comes with column names
for (i in ls(pattern="covariate3_3")) {
  covariate4_3 <- get(i)
  names(covariate4_3)[1:length(get_id)] <- name
  assign(i,covariate4_3)
}
covariate5_3=covariate4_3[,colnames(covariate4_3)%in%cunits]
###########
stat.all=list()
######################### looping starts from the line below ##########
for (j in 1:nrow(treatment) ){ # 1:nrow(treatment) 
  t.unit <- tunits[j]
  
  d0_temp <- subset(d0, year > 1971 & year < 2019, select = c("country", "year", "IT1",ov, covariates))
  d0_temp2 <- d0_temp[d0_temp$country == t.unit | d0_temp$country %in% cunits, ]
 
  formula_str=paste(ov,"~",paste(covariates,collapse="+"))
  formula=as.formula(formula_str)
  reg_base=lm(formula,data=d0_temp2)
  d0_temp2$residual = reg_base$residuals
  d0_2=d0_temp2 
  
  mylist=list()
  for (i in 1:length(unique(d0_2$country))) {
    subset_data <- d0_2[d0_2$country == unique(d0_2$country)[i], dv, drop = FALSE]
    mylist[[i]] <- subset_data
  }
  
  name <- unique(d0_2$country)
  d3 <- do.call(cbind, mylist)
  
  names(d3) <- name
  
  tunits <- tcountry$UNITS
  ccountry_temp <- data.frame(ccountry)
  cunits <- ccountry_temp$UNITS
  
  d5 <- d3[, colnames(d3) %in% cunits]
  
  d6=subset(d3,select=t.unit)
  d7=d2[d2$country==t.unit,c("year","IT1")]
  d8=cbind(d7,d6,d5)
  runs <- rle(d8$IT1) #[Jay] there are some IT1s, after which IT1=0, so dropping obs after the first IT1 ends (even
  #though IT1 might have been reintroduced later)
  indices <- which(runs$values == 1 & runs$lengths >= 2)
  last=vector()
  if (length(indices) > 0) {
    for (i in indices) {
      first <- cumsum(runs$lengths)[i] - runs$lengths[i] + 1
      last <- c(last,cumsum(runs$lengths)[i])
    }
  } 
  d8=d8[1:last[1],]
  ITc=cumsum(d8$IT1)
  
  covariate6=subset(covariate4,select=t.unit)
  covariate7=d2[d2$country==t.unit,c("year","IT1")]
  covariate8=cbind(covariate7,covariate6,covariate5)
  covariate9=covariate8[1:last[1],-(1:2)]
  
  covariate6_2=subset(covariate4_2,select=t.unit)
  covariate7_2=d2[d2$country==t.unit,c("year","IT1")]
  covariate8_2=cbind(covariate7_2,covariate6_2,covariate5_2)
  covariate9_2=covariate8_2[1:last[1],-(1:2)]
  
  covariate6_3=subset(covariate4_3,select=t.unit)
  covariate7_3=d2[d2$country==t.unit,c("year","IT1")]
  covariate8_3=cbind(covariate7_3,covariate6_3,covariate5_3)
  covariate9_3=covariate8_3[1:last[1],-(1:2)]
  
  covariate=rbind(covariate9,covariate9_2,covariate9_3)
  
  ### [JR] I added as.matrix to X1 and Y1 for a single treated unit but also works for multiple T units.
  
  y=data.matrix(d8)
  #PreTreatPeriod=1:round(length(ITc[ITc==0])*0.8)
  PreTreatPeriod=1:length(ITc[ITc==0])  # for hold-out; 100% of PreTreat, for instance # note: numbers are matrix row numbers
  ValPeriod= 1:length(ITc[ITc==0]) # for hold-out; 100% of PreTreat, for instance
  TestPeriod= which(ITc==1):length(ITc)   
  PreTreat=1:length(ITc[ITc==0])
  Out=-(1:3)
  IT_dum=y[,"year"]>y[,"year"][length(ITc[ITc==0])]
  event=which(ITc==1)
  event_year=d8$year[which(ITc==1)]
  
  X1 = data.matrix(y[PreTreatPeriod,t.unit])
  X0 = data.matrix(y[PreTreatPeriod,Out])
  Y0 = data.matrix(y[event,Out])
  Y1 = data.matrix(y[event,t.unit])
  X00 = data.matrix(y[PreTreat,Out])
  X11= data.matrix(y[TestPeriod,Out])
  
  # to identify target variables only for V below: [Jay Ryu]
  y_fix=y[,-(1:2)] 
  ### 3. CV for selecting optimal lambda
  V = diag(1/diag(var(t(y_fix[PreTreatPeriod,])))) # Reweight by inverse of variance
  #Czech andd Slovak (for AEs) have missing values for PreTreatPeriod
  
  lambda =  c(seq(0,0.1,.0025),seq(0.10001,2,.1)) # sequence of lambdas to test
  estval = regsynthpath(X0,X1,Y0,Y1,V,lambda,tol=1e-6)
  estval$ATT          #[JR]
  estval$CATT         #[JR]
  estval$Wsol        #[JR]
  dim(estval$Wsol)    #[JR]
  estval$Y0_hat       #[JR] NULL?
  dim(t(estval$Wsol[1,,]))  # [JR] For a single T unit, this dimension differs from other multiple
  # T unit cases. Thus, for one T unit, take out t (transpose) in the loop for 
  # MSPE below as I did below.
  #####################################################################
  #####################################################################
  ######## For "multiple" T units, t(estval$Wsol[k,,])#################
  #####################################################################
  #####################################################################
  
  MSPE = vector(length=length(lambda))
  
  for(k in 1:length(lambda)){
    MSPE[k] = mean(apply((y[ValPeriod,t.unit] - y[ValPeriod,Out]%*%(estval$Wsol[k,,]))^2,2,mean))
  }
  # I am using new code lines to avoid errors
  ############# made changes in permutation placec
  min_indices=which(MSPE==min(MSPE))
  lambda.opt.MSPE=lambda[min_indices[1]]
  # the original code was: lambda.opt.MSPE = which(lambda[which(MSPE==min(MSPE))]) but
  # this caused an error when there are multiple min(MSPE) values, so I used below, which also causes an error
  #lambda.opt.MSPE = which.min(lambda[which(MSPE==min(MSPE))]) 
  #[JR] changed which() to which.min
  lambda.opt.MSPE
  min(MSPE)
  ### Figure 1: MSPE
  #matplot(lambda, MSPE, type="b", pch=20, lwd=1,
  #    main=expression("MSPE, "*lambda^{opt}*" computed on the validation year window"), col="steelblue",
  #   xlab=expression(lambda), ylab="MSPE")
  #abline(v=lambda.opt.MSPE,lty=2,lwd=2,col="grey")
  #dev.off()
  
  ### 4. Estimation
  ## to use the actual OV as dv
  d6_2=subset(d4,select=t.unit)
  d7_2=d2[d2$country==t.unit,c("year","IT1")]
  d8_2=cbind(d7_2,d6_2,d5_2)
  runs <- rle(d8_2$IT1) #[Jay] there are some IT1s, after which IT1=0, so dropping obs after the first IT1 ends (even
  #though IT1 might have been reintroduced later)
  indices <- which(runs$values == 1 & runs$lengths >= 2)
  last=vector()
  if (length(indices) > 0) {
    for (i in indices) {
      first <- cumsum(runs$lengths)[i] - runs$lengths[i] + 1
      last <- c(last,cumsum(runs$lengths)[i])
    }
  } 
  d8_2=d8_2[1:last[1],]
  
  ## 4.1 Penalized Synthetic Control
  ##### I am using a new code line 
  Psol = data.matrix(estval$Wsol[min_indices[1],,])
  # I used a modified code; Psol = data.matrix(estval$Wsol[which.min(MSPE==min(MSPE)),,]) # in case there are multiple min(MSPE) values, choose one column
  # but still returns a wrong result
  NPsol = estval$Wsol[1,,]
  #[JR] changed which() to which.min
  estval$Wsol[1:5,,]
  # to clean y for the analyses below: [Jay Ryu]
  y=y[,-(1:2)] # y = y under ###3
  d=colnames(y)==t.unit
  # muC_temp=matrix(apply(y[PreTreat,d==0],2,mean),nrow=20) # for potential demeaning
  # muC= t(t(y[TestPeriod,d==0])-muC_temp[,rep(1:1,each=114)]) # for potential demeaning
  # muT=y[TestPeriod,d==1]-mean(y[PreTreat,d==1]) # for potential demeaning
  
  ## to use actual OV when computing ATTs
  y_2=data.matrix(d8_2)
  
  X1_2 = data.matrix(y_2[PreTreatPeriod,t.unit])
  X0_2 = data.matrix(y_2[PreTreatPeriod,Out])
  X00_2 = data.matrix(y_2[PreTreat,Out])
  X11_2= data.matrix(y_2[TestPeriod,Out])
  
  y_2=y_2[,-(1:2)]
  
  #### double looping starts below
  #### INDICATORS
  name.ind=c("ATT_psc","p-value_psc","RMSE_psc","MAPE_psc","SD_psc","MAPE/SD_psc","LAMBDA_psc",
             "ATT_npsc","p-value_npsc","RMSE_npsc","MAPE_npsc","SD_npsc","MAPE/SD_npsc","LAMBDA_npsc",
             "ATT_psc_BC","p-value_psc_BC","RMSE_psc_BC","MAPE_psc_BC","SD_psc_BC","MAPE/SD_psc_BC","LAMBDA_psc_BC",
             "ATT_npsc_BC","p-value_npsc_BC","RMSE_npsc_BC","MAPE_npsc_BC","SD_npsc_BC","MAPE/SD_npsc_BC","LAMBDA_npsc_BC"
  )
  v.ind=data.frame(indicator=name.ind, matrix(NA,length(name.ind),length(TestPeriod)))
  data.full=data.matrix(d8_2[,Out])
  for(i in 1:length(TestPeriod) ){ #length(TestPeriod)
    PX0=data.matrix(y[PreTreatPeriod,d==0]) #these lines look redundant but without them the following permutations do not work properly
    PX1=data.matrix(y[PreTreatPeriod,d==1])
    PY0=data.matrix(y[TestPeriod[i],d==0])
    PY1=data.matrix(y[TestPeriod[i],d==1]) 
    PX=data.matrix(y[PreTreatPeriod,])
    PY=data.matrix(y[TestPeriod[i],]) 
    PCOV0=data.matrix(covariate[PreTreat,d==0]) ## covariate used; dim(covariate) should be p * n (=n1+n0)
    PCOV1=data.matrix(covariate[PreTreat,d==1])
    
    #PSC
    permutation.iter.C=function(d,y,V,lambda.perm.set){
      dstar=sample(d)
      SX0=data.matrix(y[PreTreatPeriod,dstar==0])
      SX1=data.matrix(y[PreTreatPeriod,dstar==1])
      SSY0=data.matrix(y[event,dstar==0])
      SSY1=data.matrix(y[event,dstar==1])
      SY0=data.matrix(y[TestPeriod[i],dstar==0])
      SY1=data.matrix(y[TestPeriod[i],dstar==1])
      
      temp=cbind(SX1,SX0)
      V = diag(1/diag(var(t(temp)))) # Reweight by inverse of variance
      
      ### SELECTION OF LAMBDA OPT FOR THIS ITERATION ### 
      lambda.perm.set = c(seq(0,0.1,.01),seq(.2,1.5,.1)) # sequence of lambdas to test
      estval=regsynthpath(SX0,SX1,SSY0,SSY1,V,lambda.perm.set,tol=1e-6)
      MSPE = vector(length=length(lambda.perm.set))
      for(k in 1:length(lambda.perm.set)){
        MSPE[k] = mean(apply((y[ValPeriod,dstar==1] - y[ValPeriod,dstar==0]%*%(estval$Wsol[k,,]))^2,2,mean))
      } # [Jay Ryu] take out t before (estval) - do the same in all the relevant places.
      
      min_indices=which(MSPE==min(MSPE)) # using new code lines
      Wsol_perm = matrix(estval$Wsol[min_indices[1],,],nrow=dim(Psol)[1],ncol=1) # COLLECT W(lambda.opt)
      #[JR] changed which() to which.min
      SX0_2=data.matrix(y_2[PreTreatPeriod,dstar==0])
      SX1_2=data.matrix(y_2[PreTreatPeriod,dstar==1])
      SY0_2=data.matrix(y_2[TestPeriod[i],dstar==0])
      SY1_2=data.matrix(y_2[TestPeriod[i],dstar==1])
      
      
      MSPE_post=mean(apply((SY1_2-t(SY0_2)%*%(Wsol_perm))^2,2,mean)) # instead of SY0
      MSPE_pre=mean(apply((SX1_2-SX0_2%*%(Wsol_perm))^2,2,mean))
      ratio_star=MSPE_post/MSPE_pre
      ATT_star=mean(apply((SY1_2-t(SY0_2)%*%(Wsol_perm)),1,mean)) # instead of SY0
      return(ratio_star)
    }
    
    perm.test=function(d,y,V,lambda.perm.set,R=1000){
      PX0_2=data.matrix(y_2[PreTreatPeriod,d==0])
      PX1_2=data.matrix(y_2[PreTreatPeriod,d==1])
      PY0_2=data.matrix(y_2[TestPeriod[i],d==0])
      PY1_2=data.matrix(y_2[TestPeriod[i],d==1]) #use Psol for the following
      MSPE_po_A=mean(apply((PY1_2-t(PY0_2)%*%(Psol))^2,2,mean)) #instead of PY0
      MSPE_pr_A=mean(apply((PX1_2-PX0_2%*%(Psol))^2,2,mean))
      ratio=MSPE_po_A/MSPE_pr_A
      ATT=mean(apply((PY1_2-t(PY0_2)%*%(Psol)),1,mean)) #instead of PY0
      
      theta.reshuffled =replicate(R,permutation.iter.C(d,y,V,lambda.perm.set),simplify = "vector")
      
      #compute p-value
      p.val=mean(abs(theta.reshuffled)>=abs(ratio))
      #print(paste("p_value:",p.val))
      return(list(p.val=p.val,ATT_psc=ATT, psc_i=PY1-ATT))
    }
    set.seed(1234)
    psc=perm.test(d,y_2,V,lambda.perm.set,1000) # use y_2 for original outcome variables; ditto for all three below
                  # however, the above does not chagne the results because data sets "IN" the permutations will be used 
    
    # Create synthetic series 
    psc_temp=c((data.full%*%Psol)[PreTreat])
    Y0_hat_psc=c(psc_temp,psc$psc_i)
    
    
    ## 4.2 Non-Penalized Synthetic Control
    #NPSC
    permutation.iter.C=function(d,y,V,lambda.perm.set){
      dstar=sample(d)
      SX0=data.matrix(y[PreTreatPeriod,dstar==0])
      SX1=data.matrix(y[PreTreatPeriod,dstar==1])
      SSY0=data.matrix(y[event,dstar==0])
      SSY1=data.matrix(y[event,dstar==1])
      SY0=data.matrix(y[TestPeriod[i],dstar==0])
      SY1=data.matrix(y[TestPeriod[i],dstar==1])
      
      temp=cbind(SX1,SX0)
      V = diag(1/diag(var(t(temp)))) # Reweight by inverse of variance
      ### SELECTION OF LAMBDA OPT FOR THIS ITERATION ### 
      lambda.perm.set = c(seq(0,0.1,.01),seq(.2,1.5,.1)) # sequence of lambdas to test
      estval=regsynthpath(SX0,SX1,SSY0,SSY1,V,lambda.perm.set,tol=1e-6)
      MSPE = vector(length=length(lambda.perm.set))
      for(k in 1:length(lambda.perm.set)){
        MSPE[k] = mean(apply((y[ValPeriod,dstar==1] - y[ValPeriod,dstar==0]%*%(estval$Wsol[k,,]))^2,2,mean))
      } # [Jay Ryu] take out t before (estval) - do the same in all the relevant places.
      
      Wsol_perm = matrix(estval$Wsol[1,,],nrow=length(NPsol),ncol=1) # COLLECT W(lambda.opt); in some of previous looping codes
      #estval$Wsol[which.min(MSPE==min(MSPE)),,] was used; caution needed
      #[JR] changed which() to which.min
      SX0_2=data.matrix(y_2[PreTreatPeriod,dstar==0])
      SX1_2=data.matrix(y_2[PreTreatPeriod,dstar==1])
      SY0_2=data.matrix(y_2[TestPeriod[i],dstar==0])
      SY1_2=data.matrix(y_2[TestPeriod[i],dstar==1])
      
      MSPE_post=mean(apply((SY1_2-t(SY0_2)%*%(Wsol_perm))^2,2,mean)) # instead of SY0
      MSPE_pre=mean(apply((SX1_2-SX0_2%*%(Wsol_perm))^2,2,mean))
      ratio_star=MSPE_post/MSPE_pre
      ATT_star=mean(apply((SY1_2-t(SY0_2)%*%(Wsol_perm)),1,mean)) # instead of SY0
      return(ratio_star)
    }
    
    perm.test=function(d,y,V,lambda.perm.set,R=1000){
      PX0_2=data.matrix(y_2[PreTreatPeriod,d==0])
      PX1_2=data.matrix(y_2[PreTreatPeriod,d==1])
      PY0_2=data.matrix(y_2[TestPeriod[i],d==0])
      PY1_2=data.matrix(y_2[TestPeriod[i],d==1]) #use NPsol for the following
      MSPE_po_A=mean(apply((PY1_2-t(PY0_2)%*%(NPsol))^2,2,mean)) #instead of PY0
      MSPE_pr_A=mean(apply((PX1_2-PX0_2%*%(NPsol))^2,2,mean))
      ratio=MSPE_po_A/MSPE_pr_A
      ATT=mean(apply((PY1_2-t(PY0_2)%*%(NPsol)),1,mean)) #instead of PY0
      
      theta.reshuffled =replicate(R,permutation.iter.C(d,y,V,lambda.perm.set),simplify = "vector")
      
      #compute p-value
      p.val=mean(abs(theta.reshuffled)>=abs(ratio))
      # print(paste("p_value:",p.val))
      return(list(p.val=p.val,ATT_npsc=ATT,npsc_i=PY1-ATT))
    }
    set.seed(1234)
    npsc=perm.test(d,y_2,V,lambda.perm.set,1000)
    
    # Create synthetic series 
    npsc_temp=c((data.full%*%NPsol)[PreTreat])
    Y0_hat_npsc=c(npsc_temp,npsc$npsc_i)
    
    ########### Bias Correction below now uses an X covariate, so I apply to pre-T periods as well as post-T periods
    ########### different from "PSC_Ann_GDP.R" <- uses pre-T outcomes for BC, thus I did not apply the latter to pre-T periods
    ## 4.3 Penalized Synthetic Control: BCM
    #PSC_BC
    ##############################################################################################
    ####### if an error (Wsol_perm_BC is not found), then run each line below (line by line separately)
    ####### then, rerun the entire looping from the beginning
    ##############################################################################################
    permutation.iter.C_BC=function(d,y,V,lambda.perm.set){
      dstar=sample(d)
      SX0=data.matrix(y[PreTreatPeriod,dstar==0])
      SX1=data.matrix(y[PreTreatPeriod,dstar==1])
      SSY0=data.matrix(y[event,dstar==0])
      SSY1=data.matrix(y[event,dstar==1])
      SY0=data.matrix(y[TestPeriod[i],dstar==0])
      SY1=data.matrix(y[TestPeriod[i],dstar==1])
      SX=data.matrix(y[PreTreat,]) #PreTreat to be clearer
      SY=data.matrix(y[TestPeriod,])
      COV0=data.matrix(covariate[PreTreat,dstar==0]) ## covariate used; dim(covariate) should be p * n (=n1+n0)
      COV1=data.matrix(covariate[PreTreat,dstar==1])
      
      temp=cbind(SX1,SX0)
      V = diag(1/diag(var(t(temp)))) # Reweight by inverse of variance
      
      ### SELECTION OF LAMBDA OPT FOR THIS ITERATION ### 
      lambda.perm.set = c(seq(0,0.1,.01),seq(.2,1.5,.1)) # sequence of lambdas to test
      estval=regsynthpath(SX0,SX1,SSY0,SSY1,V,lambda.perm.set,tol=1e-6)
      MSPE = vector(length=length(lambda.perm.set))
      for(k in 1:length(lambda.perm.set)){
        MSPE[k] = mean(apply((y[ValPeriod,dstar==1] - y[ValPeriod,dstar==0]%*%(estval$Wsol[k,,]))^2,2,mean))
      } # [Jay Ryu] take out t before (estval) - do the same in all the relevant places.
      
      min_indices=which(MSPE==min(MSPE)) # using new code lines
      Wsol_perm_BC = matrix(estval$Wsol[min_indices[1],,],nrow=dim(Psol)[1],ncol=1) # COLLECT W(lambda.opt)
      #[JR] changed which() to which.min
      SX0_2=data.matrix(y_2[PreTreatPeriod,dstar==0])
      SX1_2=data.matrix(y_2[PreTreatPeriod,dstar==1])
      SY0_2=data.matrix(y_2[TestPeriod[i],dstar==0])
      SY1_2=data.matrix(y_2[TestPeriod[i],dstar==1])
      SY_2=data.matrix(y_2[TestPeriod,])
      
      tau_post=SY1_2-t(SY0_2)%*%(Wsol_perm_BC)
      tau_post_BC=tau_post - bias(COV0,COV1, SY_2, dstar, Wsol_perm_BC)[i] # instead of SX0, SX1
      tau_pre=SX1_2-SX0_2%*%(Wsol_perm_BC)
      #tau_pre_BC=tau_pre - bias(COV0,COV1,SX,dstar,Wsol_perm_BC) # instead of SX0, SX1, but still not using this line 
      # b/c bias correction includes backforecasting (using newer periods to predict older periods when used in time series)
      tau_pre_BC=tau_pre
      MSPE_post_BC=mean(apply((tau_post_BC)^2,2,mean))
      MSPE_pre_BC=mean(apply((tau_pre_BC)^2,2,mean))
      
      ratio_star_BC= MSPE_post_BC / MSPE_pre_BC
      phiP_perm = apply((SY1_2-t(SY0_2)%*%(Wsol_perm_BC)),1,mean) # [Jay Ryu] took out t in front of (Psol) for a single treated unit
      ATT_star_BC = mean(phiP_perm - apply(bias(COV0,COV1,SY_2,dstar,Wsol_perm_BC),1,mean)) # instead of SX0, SX1
      return(ratio_star_BC)
    }
    
    perm.test_BC=function(d,y,V,lambda.perm.set,R=1000){
      PX0_2=data.matrix(y_2[PreTreatPeriod,d==0])
      PX1_2=data.matrix(y_2[PreTreatPeriod,d==1])
      PY0_2=data.matrix(y_2[TestPeriod[i],d==0])
      PY1_2=data.matrix(y_2[TestPeriod[i],d==1]) 
      PX_2=data.matrix(y_2[PreTreat,]) # PreTreat to be clearer
      PY_2=data.matrix(y_2[TestPeriod,])
      PCOV0=data.matrix(covariate[PreTreat,d==0]) ## covariate used; dim(covariate) should be p * n (=n1+n0)
      PCOV1=data.matrix(covariate[PreTreat,d==1])
      
      tau_A_po=PY1_2-t(PY0_2)%*%(Psol)
      tau_A_po_BC=tau_A_po - bias(PCOV0, PCOV1, PY_2, d, Psol)[i] # instead of PX0, PX1
      tau_A_pr=PX1-PX0_2%*%(Psol)
      #tau_A_pr_BC=tau_A_pr - bias(PCOV0, PCOV1,PX,d,Psol) # instead of SX0, SX1, but still not using this line 
      # b/c bias correction includes backforecasting (using newer periods to predict older periods when used in time series)
      tau_A_pr_BC=tau_A_pr
      MSPE_po_A_BC=mean(apply((tau_A_po_BC)^2,2,mean))
      MSPE_pr_A_BC=mean(apply((tau_A_pr_BC)^2,2,mean))
      ratio_BC=MSPE_po_A_BC/MSPE_pr_A_BC
      phiP_perm_A = apply((PY1_2-t(PY0_2)%*%(Psol)),1,mean) # [Jay Ryu] took out t in front of (Psol) for a single treated unit
      #ATT_BC = mean(phiP_perm_A - apply(bias(PCOV0, PCOV1, PY, d, Psol),1,mean)) # instead of PX0, PX1
      ATT_BC = tau_A_po_BC
      
      theta.reshuffled_BC =replicate(R,permutation.iter.C_BC(d,y,V,lambda.perm.set),simplify = "vector")
      
      #compute p-value
      p.val=mean(abs(theta.reshuffled_BC)>=abs(ratio_BC))
      #print(paste("p_value_BC:",p.val))
      return(list(p.val_BC=p.val,ATT_psc_BC=ATT_BC,tau_A_po=tau_A_po,tau_A_po_BC=tau_A_po_BC))
    }
    set.seed(1234)
    psc_BC=perm.test_BC(d,y_2,V,lambda.perm.set,1000)
    # Create synthetic series 
    Y0_hat_psc2=data.full%*%Psol
    Y0_hat_psc_bc=c(Y0_hat_psc2[PreTreat],(Y0_hat_psc2[TestPeriod[1]] + (psc_BC$tau_A_po-psc_BC$tau_A_po_BC))) #bias correction only for post-treatment periods
    
    ### 4.4 Non-Penalized SC: BCM
    #NPSC_BC
    permutation.iter.C_BC=function(d,y,V,lambda.perm.set){
      dstar=sample(d)
      SX0=data.matrix(y[PreTreatPeriod,dstar==0])
      SX1=data.matrix(y[PreTreatPeriod,dstar==1])
      SSY0=data.matrix(y[event,dstar==0])
      SSY1=data.matrix(y[event,dstar==1])
      SY0=data.matrix(y[TestPeriod[i],dstar==0])
      SY1=data.matrix(y[TestPeriod[i],dstar==1])
      SX=data.matrix(y[PreTreat,]) #PreTreat to be clearer
      SY=data.matrix(y[TestPeriod,])
      COV0=data.matrix(covariate[PreTreat,dstar==0]) ## covariate used; dim(covariate) should be p * n (=n1+n0)
      COV1=data.matrix(covariate[PreTreat,dstar==1])
      
      temp=cbind(SX1,SX0)
      V = diag(1/diag(var(t(temp)))) # Reweight by inverse of variance
      
      ### SELECTION OF LAMBDA OPT FOR THIS ITERATION ### 
      lambda.perm.set = c(seq(0,0.1,.01),seq(.2,1.5,.1)) # sequence of lambdas to test
      estval=regsynthpath(SX0,SX1,SSY0,SSY1,V,lambda.perm.set,tol=1e-6)
      MSPE = vector(length=length(lambda.perm.set))
      for(k in 1:length(lambda.perm.set)){
        MSPE[k] = mean(apply((y[ValPeriod,dstar==1] - y[ValPeriod,dstar==0]%*%(estval$Wsol[k,,]))^2,2,mean))
      } # [Jay Ryu] take out t before (estval) - do the same in all the relevant places.
      
      Wsol_perm_BC = matrix(estval$Wsol[1,,],nrow=length(NPsol),ncol=1) # COLLECT W(lambda.opt); in some of previous looping codes
      #estval$Wsol[which.min(MSPE==min(MSPE)),,] was used; caution needed
      #[JR] changed which() to which.min
      SX0_2=data.matrix(y_2[PreTreatPeriod,dstar==0])
      SX1_2=data.matrix(y_2[PreTreatPeriod,dstar==1])
      SY0_2=data.matrix(y_2[TestPeriod[i],dstar==0])
      SY1_2=data.matrix(y_2[TestPeriod[i],dstar==1])
      SY_2=data.matrix(y_2[TestPeriod,])
      
      tau_post=SY1_2-t(SY0_2)%*%(Wsol_perm_BC)
      tau_post_BC=tau_post - bias(COV0,COV1, SY_2, dstar, Wsol_perm_BC)[i] # instead of SX0, SX1
      tau_pre=SX1_2-SX0_2%*%(Wsol_perm_BC)
      #tau_pre_BC=tau_pre - bias(COV0,COV1,SX,dstar,Wsol_perm_BC) # instead of SX0, SX1, but still not using this line 
      # b/c bias correction includes backforecasting (using newer periods to predict older periods when used in time series)
      tau_pre_BC=tau_pre
      MSPE_post_BC=mean(apply((tau_post_BC)^2,2,mean))
      MSPE_pre_BC=mean(apply((tau_pre_BC)^2,2,mean))
      
      ratio_star_BC= MSPE_post_BC / MSPE_pre_BC
      phiP_perm = apply((SY1_2-t(SY0_2)%*%(Wsol_perm_BC)),1,mean) # [Jay Ryu] took out t in front of (Psol) for a single treated unit
      ATT_star_BC = mean(phiP_perm - apply(bias(COV0,COV1,SY_2,dstar,Wsol_perm_BC),1,mean)) # instead of SX0, SX1
      return(ratio_star_BC)
    }
    perm.test_BC=function(d,y,V,lambda.perm.set,R=1000){
      PX0_2=data.matrix(y_2[PreTreatPeriod,d==0])
      PX1_2=data.matrix(y_2[PreTreatPeriod,d==1])
      PY0_2=data.matrix(y_2[TestPeriod[i],d==0])
      PY1_2=data.matrix(y_2[TestPeriod[i],d==1]) 
      PX_2=data.matrix(y_2[PreTreat,]) # PreTreat to be clearer
      PY_2=data.matrix(y_2[TestPeriod,])
      PCOV0=data.matrix(covariate[PreTreat,d==0]) ## covariate used; dim(covariate) should be p * n (=n1+n0)
      PCOV1=data.matrix(covariate[PreTreat,d==1])
      
      tau_A_po=PY1_2-t(PY0_2)%*%(NPsol)
      tau_A_po_BC=tau_A_po - bias(PCOV0, PCOV1, PY_2, d, NPsol)[i] # instead of PX0, PX1
      tau_A_pr=PX1-PX0_2%*%(NPsol)
      #tau_A_pr_BC=tau_A_pr - bias(PCOV0, PCOV1,PX,d,Psol) # instead of SX0, SX1, but still not using this line 
      # b/c bias correction includes backforecasting (using newer periods to predict older periods when used in time series)
      tau_A_pr_BC=tau_A_pr
      MSPE_po_A_BC=mean(apply((tau_A_po_BC)^2,2,mean))
      MSPE_pr_A_BC=mean(apply((tau_A_pr_BC)^2,2,mean))
      ratio_BC=MSPE_po_A_BC/MSPE_pr_A_BC
      phiP_perm_A = apply((PY1_2-t(PY0_2)%*%(NPsol)),1,mean) # [Jay Ryu] took out t in front of (Psol) for a single treated unit
      #ATT_BC = mean(phiP_perm_A - apply(bias(PCOV0, PCOV1, PY, d, Psol),1,mean)) # instead of PX0, PX1
      ATT_BC = tau_A_po_BC
      
      theta.reshuffled_BC =replicate(R,permutation.iter.C_BC(d,y,V,lambda.perm.set),simplify = "vector")
      
      #compute p-value
      p.val=mean(abs(theta.reshuffled_BC)>=abs(ratio_BC))
      #print(paste("p_value_BC:",p.val))
      return(list(p.val_BC=p.val,ATT_npsc_BC=ATT_BC,tau_A_po=tau_A_po,tau_A_po_BC=tau_A_po_BC))
    }
    set.seed(1234)
    npsc_BC=perm.test_BC(d,y_2,V,lambda.perm.set,1000)
    
    # Create synthetic series 
    Y0_hat_npsc2=data.full%*%NPsol
    #setdiff(Y0_hat_npsc,Y0_hat_psc)
    Y0_hat_npsc_bc=c(Y0_hat_npsc2[PreTreat],(Y0_hat_npsc2[TestPeriod[i]] + (npsc_BC$tau_A_po-npsc_BC$tau_A_po_BC) )) #bias correction only for post-treatment periods
    
    data_temp=rbind(d8_2[PreTreat,],d8_2[TestPeriod[i],])
    #dim(data_temp);length(Y0_hat_psc);length(Y0_hat_npsc);length(Y0_hat_psc_bc);length(Y0_hat_npsc_bc)
    data.final=cbind(data_temp, Y0_hat_psc, Y0_hat_npsc,Y0_hat_psc_bc, Y0_hat_npsc_bc)
    rmse.pre_T=data.final[IT_dum==0,]
    rmse_psc=rmse(rmse.pre_T[,t.unit],rmse.pre_T[,"Y0_hat_psc"])
    mape_psc=mean(abs((rmse.pre_T[,t.unit]-rmse.pre_T[,"Y0_hat_psc"])))
    sd_psc=sd(rmse.pre_T[,t.unit])
    mape_psc_sd=mape_psc/sd_psc
    rmse_npsc=rmse(rmse.pre_T[,t.unit],rmse.pre_T[,"Y0_hat_npsc"])
    mape_npsc=mean(abs((rmse.pre_T[,t.unit]-rmse.pre_T[,"Y0_hat_npsc"])))
    sd_npsc=sd(rmse.pre_T[,t.unit])
    mape_npsc_sd=mape_npsc/sd_npsc
    
    rmse_psc_bc=rmse(rmse.pre_T[,t.unit],rmse.pre_T[,"Y0_hat_psc_bc"])
    mape_psc_bc=mean(abs((rmse.pre_T[,t.unit]-rmse.pre_T[,"Y0_hat_psc_bc"])))
    sd_psc_bc=sd(rmse.pre_T[,t.unit])
    mape_psc_sd_bc=mape_psc_bc/sd_psc_bc
    rmse_npsc_bc=rmse(rmse.pre_T[,t.unit],rmse.pre_T[,"Y0_hat_npsc_bc"])
    mape_npsc_bc=mean(abs((rmse.pre_T[,t.unit]-rmse.pre_T[,"Y0_hat_npsc_bc"])))
    sd_npsc_bc=sd(rmse.pre_T[,t.unit])
    mape_npsc_sd_bc=mape_npsc_bc/sd_npsc_bc
    
    v.ind[,i+1]=c(psc$ATT_psc,psc$p.val,rmse_psc,mape_psc,sd_psc,mape_psc_sd,lambda.opt.MSPE,
                  npsc$ATT_npsc,npsc$p.val,rmse_npsc,mape_npsc,sd_npsc,mape_npsc_sd,0,
                  psc_BC$ATT_psc_BC,psc_BC$p.val,rmse_psc_bc,mape_psc_bc,sd_psc_bc,mape_psc_sd_bc,lambda.opt.MSPE,
                  npsc_BC$ATT_npsc_BC,npsc_BC$p.val,rmse_npsc_bc,mape_npsc_bc,sd_npsc_bc,mape_npsc_sd_bc,0
    )
    colnames(v.ind)[i+1]=d8$year[TestPeriod[i]]
    
  }
  stat.all[[j]]=list(v.ind)
  names(stat.all[[j]])=t.unit
}
##########################################looping ends
datalist=list() 
for (i in 1:length(stat.all)){
  dat=data.frame(stat.all[[i]][1])
  datalist[[i]]=dat
}
namelist=list()
for(i in 1:length(stat.all)){
  l_names=names(stat.all[[i]][1])
  namelist[[i]]=l_names
}
namelist2=as.vector(do.call(rbind,namelist))
names(datalist)=namelist2
datalist%>%writexl::write_xlsx(paste(dv,'_', group,'.xlsx'))

bottom of page