%% This is Monte Carlo exercise that demonstrates poor size control in a one sample t-test
%% based on the conventional asymptotic distribution when the distribution is non-normal and
%% the sample size is somewhat small. It also demonstrates the use of the bootstrap to obtain an
%% improvement in the size distortion.

clear all
S=1000;  % the number of iterations for the monte carlo simulation
n=50;    % the sample size
B=1000   % the number of bootstrap samples


%% Normal population distribution
for s=1:S,
    Y=normrnd(0,1,n,1);   % this line generates the sample of size n
    t(s)=mean(Y)/(sqrt(var(Y)/n));      % this line calculates the t statistic     
    reject_conv(s,1)=1*(abs(t(s))>=1.96); % this line determines rejection using the conventional critical value 
    stats = bootstrp(B,@(x)[mean(x) std(x)],Y);  % this line draws 250 bootstrap samples and calculates relevant statistics for each
                                                 % the relevant statistics here are the mean and the standard deviation for each of the bootstrap samples
    % the output is a B by 2 matrix containing the B values of these two
    % statistics.                               
    t_boot=(stats(:,1)-mean(Y))./(stats(:,2)/sqrt(n));         % this line constructs the t statistic for each bootstrap sample from the means and standard deviations.
                                                               % note that substracting mean(Y) here is crucial. 
    p_boot(s,1)=mean(t_boot<-abs(t(s)))+mean(t_boot>abs(t(s)));  % this line takes the B draws of the t statistic and determines how often
                                                               % these draws are not between -|t| and |t|. This is the p-value. 
    reject_boot(s,1)=1*(p_boot(s)<=0.05);                        % the null is rejected if the p-value is no larger than the nominal level, .05.
end

%% Log normal population distribution
for s=1:S,
    %Y=normrnd(0,1,n,1);
    Y=exp(normrnd(0,1,n,1))-exp(0.5);   % this line generates the sample of size n
    t(s)=mean(Y)/(sqrt(var(Y)/n));      % this line calculates the t statistic     
    reject_conv(s,2)=1*(abs(t(s))>=1.96); % this line determines rejection using the conventional critical value 
    stats = bootstrp(B,@(x)[mean(x) std(x)],Y);  % this line draws 250 bootstrap samples and calculates relevant statistics for each
                                                    % the relevant statistics here are the mean and the standard deviation for each of the bootstrap samples
    % the output is a B by 2 matrix containing the B values of these two
    % statistics.                               
    t_boot=(stats(:,1)-mean(Y))./(stats(:,2)/sqrt(n));         % this line constructs the t statistic for each bootstrap sample from the means and standard deviations.
                                                               % note that substracting mean(Y) here is crucial. 
    p_boot(s,2)=mean(t_boot<-abs(t(s)))+mean(t_boot>abs(t(s)));  % this line takes the B draws of the t statistic and determines how often
                                                               % these draws are not between -|t| and |t|. This is the p-value. 
    reject_boot(s,2)=1*(p_boot(s)<=0.05);                        % the null is rejected if the p-value is no larger than the nominal level, .05.
end


%Rejection probabilities
mean(reject_conv)
mean(reject_boot)