### Stat 427/627 (Baron). Lab 11a. ### Efficacy of the Pfizer-BioNTech COVID-19 Vaccine ### # The Phase III clinical trial of Pfizer vaccine included n=44000 people, # 50% took trt, 50% placebo. # Result: 8 vaccinated and 162 placebo participants contracted COVID-19. # A conclusion was made that vaccination reduces the risk of COVID by # the factor of 20. # Vaccine critics point to a small sample size of 170 for any meaningful # conclusions. Here, we explore what confidence statements we can deduce # from these data. # Construct a Bootstrap CI for p1/p2 = the ratio of probabilities # p1 = P( infection | placebo group ), p2 = P( infection | vaccine group ) V = c(rep(0,22000),rep(1,22000)); # 0 = placebo, 1 = treatment X = c(rep(0,21838),rep(1,162),rep(0,21992),rep(1,8)); # 1 = contracted covid XV = data.frame(X,V); pratio = function(xv,Z){ # function of data xv and a subsample Z x = xv[,1]; v = xv[,2]; Pplacebo = sum(x[Z]==1 & v[Z]==0)/sum(v[Z]==0); Pvaccine = sum(x[Z]==1 & v[Z]==1)/sum(v[Z]==1); return( Pplacebo/Pvaccine ) } pratio(XV,) # The ratio is estimated as 20.25. We need to supply it with the margin of error # and a confidence level. That is, use Bootstrap to construct a confidence interval. library(boot) b = boot( XV, pratio, R=10000 ) # Apply bootstrap quantile(b$t,c(0.025,0.975)) # This is a 95% confidence interval for p1/p2 # 2.5% 97.5% # Thus, we can claim with 95% confidence that the ratio of # 11.34329 54.51769 # probabilities is between 11.3 and 54.5. But do we need an interval, # or should we just find a lower bound? quantile(b$t,0.05) # Try a one-sided confidence interval. # 5% # We can now claim, also with 95% confidence, that vaccination # 12.29509 # reduces the chance of COVID-19 infection by at least a factor of 12. ################################################################################ ### Official efficacy of the vaccine ########################################### ### The efficacy of a vaccine is defined as the difference of risks between #### ### the vaccine group and the placebo group, relative to the placebo group. #### ################################################################################ ### To study the efficacy of Pfizer, we modify our function: efficacy = function(xv,Z){ # function of data xv and a subsample Z x = xv[,1]; v = xv[,2]; Pplacebo = sum(x[Z]==1 & v[Z]==0)/sum(v[Z]==0); Pvaccine = sum(x[Z]==1 & v[Z]==1)/sum(v[Z]==1); return( (Pplacebo - Pvaccine)/Pplacebo ) } pratio(XV,) # This gives the estimated efficacy which you see in all official reports # 0.9506173 # But we know that it is only based on the sample, so it has a margin of error. # So, we proceed by building a confidence interval b = boot( XV, efficacy, 10000 ) # Bootstrap! hist(b$t) # This is a histogram of bootstrap efficacy values, it's not very symmetric # I'm just checking normality, to see if we can settle with a PARAMETRIC # bootstrap confidence interval, which is the mean b$t, plus or minus 1.96 # standard deviations shapiro.test(b$t) # Shapiro-Wilk test is a rigorous test of normality Error in shapiro.test(b$t) : sample size must be between 3 and 5000 # The way it's built in R does not handle samples > 5000 shapiro.test(b$t[sample(10000,5000)]) # Ok, we select a random sample of n=5000 # Shapiro-Wilk normality test # # data: b$t[sample(10000, 5000)] # W = 0.98926, p-value < 2.2e-16 # Shapiro-Wilk test rejects normal distribution. # Hence, we construct a NONPARAMETRIC quantile( b$t, c(0.025,0.975) ) # bootstrap confidence interval # 2.5% 97.5% # 0.9105982 0.9816955 # This is a 95% confidence interval for efficacy quantile( b$t, c(0.005,0.995) ) 0.5% 99.5% 0.8947269 0.9882617 # This is a 99% confidence interval