/* The following example of SAS code for meta-analysis
uses a modified version of the dataset and analysis used for
a meta-analysis published by Hoeksema & Forde (2008).
The goal of the meta-analysis was to test for local adaptation of parasites to hosts.
Thus, the "effect size" on which the analysis focused was calculated
by comparing average parasite infectivity in sympatric
pairings (host and parasite from the same population) versus allopatric
pairings (host and parasite from different populations).
The effects of multiple independent moderator variables on the effect size
were explored using mixed-factor multiple regression models, a type of analysis
that is referred to as "meta-regression" in some fields. */
/* Import a dataset from an Excel file. Note, for PROC IMPORT to work correctly, columns of
numeric continuous data must not have blanks in the first 8 rows; otherwise, it
will treat those columns as character data. */
PROC IMPORT OUT= WORK.parasites
DATAFILE=
"C:\Users\Jason\Desktop\data.xls"
DBMS=Excel REPLACE;
SHEET="data";
GETNAMES=YES;
MIXED=YES;
SCANTEXT=YES;
USEDATE=YES;
SCANTIME=YES;
RUN;
quit;
data parasites;
set parasites;
/*Calculate effect size as log response ratio of mean infectivity in sympatric pairings
divided by mean infectivity of allopatric pairings. See Hedges et al. (1999) for an
analysis of the usefulness of response ratios as effect sizes in meta-analyses. */
lnRRinf=(log(syminf/alo_inf));
/* Calculate the estimated within-study variance associated with each effect size
estimate, and call it 'est' so that it is recognized by the PARMS statement
in PROC MIXED below. See Hedges et al. (1999) for the formula for the variance
of the log response ratio. */
est=(((syminfsd)**2)/((syminfn)*((syminf)**2)))+(((aloinfsd)**2)/((aloinfn)*((alo_inf)**2)));
studyid = _n_; /* insert a column with a series of integers, each associated with a
different study */
run;
/*
Higgins & Thompson (2004) show that Type I error rates are substantial when
meta-regression is conducted using fixed-effect models, and that Type I error rates
are best controlled by using a mixed-effect model (with a random between-studies
variance component estimated by maximum likelihood), and using randomization
tests to obtain p-values. A mixed-effects maximum likelihood model, modified from code
suggested by van Houwelingen et al. (2002, Sect 5), is used to conduct the meta-analysis, and
SAS macros are used to facilitate the calculation of p-values using randomization tests.
*/
/* These next few steps are used to create a data set that contains
a starting value of 0.05 for estimation of the random between-studies variance component
as well as a list of the known within-study variances. */
data betvar;
infile cards;
input est;
cards;
0.05
;
run;
data vars;
set parasites;
keep est;
run;
proc append base=betvar data=vars;
run;
proc sql noprint;
select count(*)
into :nobs
from betvar;
quit;
/*This PROC MIXED model obtains an estimate of between-studies
variance component, and overall weighted mean effect size, using maximum likelihood
estimation of parameters. */
proc mixed cl method=ml data=parasites ;
parms / parmsdata=betvar eqcons=2 to &nobs;
/* the parms statement provides proc mixed with list of known within-study variances,
along with a starting value of 0.05 for the random between-studies variance component,
which is an unknown variance component to be estimated. */
class studyid;
model lnRRinf= / s cl; /* Intercept-only model */
random int / subject=studyid; /* Specifies that between-studies variance is a random
effect to be estimated. */
repeated / group=studyid; /*Specifies that each study has its own unique variance. */
run;
/* Mixed-effects meta-regression model, testing the effects of three independent
moderator variables, hosttype (plant or animal), genflwsim (gene flow rate similarity
between host and parasite), and tax_sim (taxonomic closeness between host and
parasite) on the effect size. */
proc mixed cl method=ml data=parasites ;
parms / parmsdata=betvar eqcons=2 to &nobs;
class studyid hosttype genflwsim ;
model lnRRinf = hosttype genflwsim tax_sim / s cl covb ddf=1000,1000,1000 solution;
random int/ subject=studyid s;
repeated / group=studyid;
run;
quit;
/*Below are two macros used for conducting randomization tests to obtain p-values.
Macros were modified from code provided by Cassel (2002). */
/* In macro rand_gen, which is used to create a set of randomized datasets
for use in randomization tests, definitions of declared macro variables are as follows:
indata = name of input SAS dataset
outdata = name you want to assign to the randomized output dataset
depvar = the dependent effect-size variable
var = the variance estimate that should be paired with each effect size estimate; this
should usually be named 'est'
numreps = the number of iterations for the randomization test; should be at least 1000
and ideally 10000
seed = the seed for the random number generator; zero gives a different seed every time
while a non-zero integer will give the same set of random numbers each time
*/
%macro rand_gen(indata=,outdata=,depvar=,var=,numreps=,seed=);
/* Obtain size of input dataset into macro
variable &NUMRECS */
proc sql noprint;
select count(*) into :numrecs from
&INDATA;
quit;
/* Generate &NUMREPS random numbers for each
record, so records can be randomly
sorted within each replicate */
data __temp_1;
retain seed &SEED ; drop seed;
set &INDATA;
do replicate = 1 to &NUMREPS;
call ranuni(seed,rand_dep);
output;
end;
run;
proc sort data=__temp_1;
by replicate rand_dep;
run;
/* Now append the new re-orderings to the
original dataset. Label the original
as Replicate=0, so the %RAND_ANL macro
will be able to pick out the correct
test statistic. Then use the ordering of
__counter within each replicate to
write the original values of &DEPVAR and &VAR,
thus creating a randomization of the
dependent variable and its corresponding variance in every replicate. */
data &OUTDATA ;
array deps{ &NUMRECS } $ _temporary_ ;
array vars{ &NUMRECS } $ _temporary_ ;
set &INDATA(in=in_orig)
__temp_1(drop=rand_dep);
if in_orig then do;
replicate=0;
deps{_n_} = &DEPVAR ;
end;
else
&DEPVAR = deps{ 1+ mod(_n_,&NUMRECS) };
if in_orig then do;
replicate=0;
vars{_n_} = &VAR ;
end;
else
&VAR = vars{ 1+ mod(_n_,&NUMRECS) };
run;
%mend rand_gen;
/*
In macro rand_anl, which is used to calculate randomization test p-values,
definitions of declared macro variables are as follows:
randdata = name of dataset produced by the proc Mixed analysis of the set of
randomized datasets
where = label of the effect of interest
teststat = test statistic used for calculating p-value
testlabel = words used to describe the type of randomization test conducted
*/
%macro rand_anl(randdata=,where=,teststat=,testlabel=);
data _null_;
retain observed numsig numtot 0;
set &RANDDATA end=endofile;
%if "&WHERE" ne ""
%then where &WHERE %str(;) ;
if Replicate=0 then observed = &TESTSTAT ;
else do;
numtot+1;
numsig + ( &TESTSTAT >= observed ); /* Use >= when &TESTSTAT is a test statistic such as SS or F */
end; /* but use <= when &TESTSTAT is the p-value itself */
/* Also, when using p-value as the test stat, should round all p-values, or multiply them */
/* by a large number, so that numerical operators perform accurately in SAS */
/* e.g. very small p-values can sometimes be inaccurately compared */
if endofile then do;
ratio = numsig/numtot;
put "Randomization test for &TESTLABEL "
%if "&WHERE" ne "" %then "where &WHERE";
" has significance level of "
ratio 12.10;
end;
run;
%mend rand_anl;
/* invoke macro rand_gen to obtain set of randomized datasets for randomization tests.
Values of macro variables need to be declared correctly here.
*/
%rand_gen(indata=parasites,outdata=outrand,depvar=lnRRinf,var=est,numreps=1000,seed=0);
/* sort output from rand_gen macro */
proc sort data=outrand;
by replicate studyid;
run;
/* create dataset called randvars for use in parms statement, during randomization
iterations of mixed model below Randars is like betar reated aboe but with a opy for
eery randoied repliate produed by the rand_gen aro */
data randvars;
set outrand;
where studyid = 1;
est=0.05;
order=0;
keep order replicate est;
run;
data startvars;
set outrand;
order = _n_;
keep order replicate est;
run;
proc append base=randvars data=startvars;
run;
proc sort data=randvars;
by replicate order;
run;
data randvars;
set randvars;
drop order;
run;
/*Conduct mixed-model meta-regression for each of the randomized datasets produced by the
rand_gen macro, producing a dataset called "Tests" that has test statistics from each of
these analyses. */
ods output Tests3=Tests;
ods listing close;
options nonotes;
proc mixed cl method=ml data=outrand; /* use output dataset, outrand, from macro rand_gen */
by replicate; /* asks SAS to perform the analysis for each of the randomized datasets separately */
ods exclude parmsearch; /* Prevents SAS from printing the parameter search history */
ods exclude covparms; /* Prevents SAS from printing the long list of covariance parameters */
parms / parmsdata=randvars eqcons=2 to &nobs;
class studyid hosttype genflwsim ;
model lnRRinf = hosttype genflwsim tax_sim/ s cl covb ddf=1000,1000,1000;
random int / subject=studyid s;
repeated / group=studyid;
run;
ods output close;
ods listing;
options notes;
/* Invoke rand_anl macro to calculate randomization test p-values using the
test statistics generated from proc mixed in the previous step. A separate
invocation of the macro is needed to obtain p-values for each of the
moderator variables. Randomization test p-values are printed in the SAS Log
window. */
%rand_anl(randdata=Tests,where=Effect='HOSTTYPE',
teststat=FValue,testlabel= test of significance );
%rand_anl(randdata=Tests,where=Effect='GENFLWSIM',
teststat=FValue,testlabel=test of significance );
%rand_anl(randdata=Tests,where=Effect='TAX_SIM',
teststat=FValue,testlabel=test of significance );
quit;
/*
REFERENCES
Cassell, D. L. 2002. A randomization test wrapper for SAS PROCs (paper 251-27)
in SAS User Group International (SUGI) 27.
Hedges, L. V., J. Gurevitch, and P. S. Curtis. 1999. The meta-analysis of
response ratios in experimental ecology. Ecology 80:1150-1156.
Higgins, J. P. T. and S. G. Thompson. 2004. Controlling the risk of spurious
findings from meta-regression. Statistics in Medicine 23:1663-1682.
Hoeksema, J. D. and S. E. Forde. 2008. A meta-analysis of factors affecting
local adaptation between interacting species. American Naturalist 171:275-290.
van Houwelingen, H. C., L. R. Arends, and T. Stijnen. 2002. Advanced methods
in meta-analysis: Multivariate approach and meta-regression.
Statistics in Medicine 21:589-624.
*/