SAS Raking Macro
SAS Raking Macro, from David Izrael, etc.
rakinge.sas
—
Plain Text,
13Kb
File contents
/**************************************************************************/
/* Macro: RAKINGE */
/* */
/* Developed by David Izrael, Abt Associates, June 1999 */
/* */
/* Objective: To compute weights for a sample to make it agree */
/* with independent control totals on a number of */
/* characteristics */
/* */
/* Note from author: This macro is in general SAS-replica of raking */
/* program developed in 1960's in Fortran by */
/* MARKEMATH inc. */
/* */
/* Unlike the original Fortran program this SASmacro uses Mike Battaglia's*/
/* convergence criterion which states that raking can continue */
/* until convergence of less than 1 is obtained */
/* between all of the control totals and the adjusted weighted totals. */
/* 1 was set as a default but convergence criterion may be changed */
/* by user through respective macro parameter (TRMPREC) */
/* */
/* Requirements: Marginal proportions or totals must be passed to RAKINGE */
/* in working */
/* data sets and must contain raking variable and percent */
/* (PERCENT) or raking variable and marginal total */
/* (MRGTOTAL) */
/* Control Total must be obtained and passed to macro either*/
/* by direct indication of parameter (CNTOTAL) or through */
/* macro variable created outside macro RAKING */
/* */
/* Notes: There is no limit on number of raking variables, however */
/* if number of raking variables is very large the running time may*/
/* become unpredictably long. */
/* */
/* There is no limit of number of categories of raking variables */
/* */
/* From experience of authors of Fortran raking procedure and */
/* author of current macro even on poorest of samples the algorithm*/
/* converges in less than 10 passes (iterations). Default parameter*/
/* (NUMITER)is set to 15 and can be changed by user. */
/* */
/* Listing of macro RAKING contains marginal totals, adjusted */
/* totals and their difference after each pass of each level of */
/* raking variable as well as termination diagnostics */
/* */
/* Recommendation: for speeding up raking process input data set should */
/* contain only raking variables, input weight being */
/* adjusted */
/* and some key variables needed for further merging */
/**************************************************************************/
%macro rakinge (inds=INPUTDATASETNAME,
outds=OUTPUTDATASETNAME,
inwt=, /* if unweighted sample, weight =1 will be assigned by macro */
freqlist=,
outwt=NEW_PWEIGHT_VARIABLE_NAME,
byvar=,
varlist=LIST OF VARIABLES IN RAKING MODEL,
numvar=4,
cntotal=, /* any number here, 100 is most natural */
trmprec=1,
trmpct=0.001, /* macro will terminate based on this criterion (WAS 0.001) */
numiter=50, /* was 50 */
dircont=work,
prdiag=Y);
/* checking on existence of required parameters */
%macro reqpar (param);
%if (%bquote(&&¶m) eq ) %then %do;
%put **** Program terminated: Macro parameter %upcase(&PARAM) missing ****;
/*endsas;*/
%end;
%mend;
%reqpar (inds);
%reqpar (outds);
*****%reqpar (inwt);
%reqpar (outwt);
%reqpar (varlist);
%reqpar (numvar);
%reqpar (trmprec);
%reqpar (numiter);
/* checking on number of raking variables */
%if (%upcase(%scan(&varlist,&numvar)) eq ) or
(%upcase(%scan(&varlist,%eval(&numvar+1))) ne )
%then %do;
%put **** Program terminated: Number of variables in the VARLIST ****;
%put **** does not match NUMVAR ****;
/*endsas;*/
%end;
data __i0;
set &inds;
%if (&inwt ne ) %then %do;
weight=&inwt;
%end;
%else %do;
weight=1;
%end;
%do i=1 %to &numiter; /* loop on iteration */
%let sumterm = 0; /* set cumulative sum of */
/* termination flags */
/* checking on number of */
/* raking variables */
%do j=1 %to &numvar; /* loop on raking variable */
%let varrake= %upcase(%scan(&varlist,&j)); /* retrieve raking variable*/
/* from list */
%if (&freqlist ne ) %then
%let dsfreq=%scan(&freqlist,&j);
%else
%let dsfreq=&varrake; /* if list of ds with marg */
/* freq is empty then their */
/* name are same as names */
/* of raking variables */
proc sort data=__i0;
by &varrake;
run;
proc summary nway data=__i0 ; /* calc adjusted marginal total -sum&j */
class &varrake;
var weight;
output out=__i1(drop=_type_ _freq_) sum=sum&j;
run;
data __i0;
merge __i0(in=_1) __i1 &dsfreq(in=_2); /* merge with ds with marginal proportions*/
by &varrake;
%if &i=1 %then %do; /* all checking are done in 1st iteration only*/
if (_1 and ^_2) or (_2 and ^_1) then do;call symput('match','1');stop;end; else
call symput('match','2');
if mrgtotal ne . then call symput ('mrg','1');else call symput('mrg','2');
if percent ne . then call symput ('pct','1');else call symput('pct','2');
%end;
run;
%if &i=1 %then %do; /* all checking are done on first iteration */
%if &match=1 %then %do;
%put
**** Program terminated: levels of variable &varrake do not match ****;
%put
**** in sample and marginal totals data sets ****;
/*endsas;*/
%end;
%if &pct = 1 and (&cntotal eq .) %then %do;
%put
**** Program terminated: PERCENT is not missing and CNTOTAL is missing ****;
%put
**** for raking variable &varrake ****;
/*endsas;*/
%end;
%else
%if &pct=2 and &mrg=2 %then %do;
%put **** Program terminated: Both PERCENT and MRGTOTAL are missing ****;
/*endsas;*/
%end;
%end;
data __i0;
set __i0;
%if (&cntotal ne ) %then %do;
if mrgtotal ne . then /* case of marginal totals*/
cntmarg=mrgtotal;
else
if percent ne . then /* case of marginal freqs */
cntmarg=&cntotal.*percent/100;
%end;
%else %do;
if mrgtotal ne . then /* case of marginal totals*/
cntmarg=mrgtotal;
%end;
weight=weight*cntmarg/sum&j; /* actual raking, weight adjustment */
drop percent mrgtotal;
run;
data __i2(keep=&varrake sum&j cntmarg differ) _forgraph(keep=Iteration &varrake differ);
retain &varrake sum&j cntmarg differ;
set __i0;
by &varrake;
if first.&varrake;
differ=cntmarg-sum&j;
Iteration=&i;
run;
/*** create % for printing ****/
proc summary data=__i2 nway noprint;
var sum&j cntmarg;
output out=__outs(drop=_: ) sum= sumsum summarg;
run;
data __i2(drop=sumsum summarg);
set __i2;
if _n_ =1 then set __outs;
%if (&trmpct ne ) %then %do;
_pctsum=100*sum&j/sumsum;
_pctmrg=100*cntmarg/summarg;
%end;
%else %do;
_pctsum=100*sum&j/sumsum;
_pctmrg=100*cntmarg/summarg;
%end;
_diffpct=_pctsum-_pctmrg;
run;
%if &i=1 %then %do;
proc delete data= &dircont.._table_&varrake._&byvar; run;
%end;
proc append base=&dircont.._table_&varrake._&byvar data=_forgraph; /** accumulating tables with variables and differences **/
run;
%if %upcase(&prdiag)=Y %then %do;
proc print label noobs data=__i2;
%if (&byvar ne) %then %do;
title3 "Raking %upcase(&byvar) - &s by &varrake, iteration - &i ";
%end; %else %do;
title3 "Raking by &varrake, iteration - &i ";
%end;
sum sum&j cntmarg _pctsum _pctmrg;
label sum&j ='Calculated margin'
differ='Difference'
cntmarg='Marginal Control Total'
_pctsum='Calculated %'
_pctmrg='Marginal Control %'
_diffpct= 'Difference in %';
;
format _pctsum _pctmrg _diffpct 6.3;
run;
%end;
data __i2;
set __i2 end=eof;
retain comm 0;
%if (&trmpct ne ) %then %do;
if abs( _diffpct)>&trmpct then comm=1; /* termination test for proportions */
%end;
%else %do;
if abs( differ)>&trmprec then comm=1; /* termination test for totals */
%end;
if eof and comm=1 then
call symput("term&j",'2'); /* continue iterations */
else
if eof then call symput("term&j",'1'); /* terminate */
run;
%let sumterm=%eval(&sumterm+&&term&j); /* cumulation of termination
flags */
%end;
data __i0;
set __i0;
drop %do m=1 %to &numvar; sum&m %end;; /* drop all sums for next loop*/
%if (&byvar ne ) %then
%put %upcase(&byvar)=&s iteration=&i numvar=&numvar sumterm=&sumterm; /* diagnostics for LOG */
%else
%put numvar=&numvar sumterm=&sumterm;
%if &sumterm=&numvar or &i=&numiter %then %do; /* termination test */
%if (&byvar ne ) %then
%put **** Terminated %upcase(&byvar) &s at &i-th iteration;
%else
%put ***** Task terminated at &i-th iteration ****;
title3 ' ';
%if &sumterm ne &numvar %then %do;
%do j=1 %to &numvar;
%let curr = %scan (&varlist,&j,%str( ) );
/** creates data sets for further graphing and analyses of convergence **/
proc sql;
create table _forgraph as
select iteration, &curr, log10(abs(differ)) as log_diff
from &dircont.._table_&curr._&byvar a
where abs(differ) =
(select max(abs(differ)) from &dircont.._table_&curr._&byvar b
where a.iteration =b.iteration)
order by iteration;
quit;
data _forgraph(drop=&curr);
set _forgraph;
variable=&j;
proc append base=_base data=_forgraph;
run;
%end;
/*** module for calculation of predicted number of iterations ***/
proc sort data=_base out=_out;
by descending iteration descending log_diff;
data _out;
set _out;
if _n_=1;
proc sort data=_base out=_out1;
by variable descending iteration;
data _out2;
merge _out1 _out(in=_1 rename=(iteration=last_iteration));
by variable;
if _1;
data _null_;
set _out2;
by descending iteration;
retain nach;
if iteration=last_iteration then nach=log_diff;
if iteration=last_iteration-1 then do; mytan=1/(log_diff-nach);
add_iter=ceil(nach*mytan);
tota_iter=last_iteration+add_iter;
call symput('sug',tota_iter);
end;
%end;
data _null_; /* diagnostic for listing */
set __i0;
if _n_=1;
file print ls=80 ps=59;
put ' ';
%if &sumterm=&numvar %then %do; /* convergence achieved */
%if (&trmpct ne ) %then %do;
%if (&byvar ne ) %then %do;
put "**** Program for %upcase(&byvar) &s terminated at iteration &i because all calculated percents";
%end; %else %do;
put "**** Program terminated at iteration &i because all calculated percents";
%end;
put "differ from Marginal Percents by less than &trmpct ";
run;
%end;
%else %do;
%if (&byvar ne ) %then %do;
put "**** Program for %upcase(&byvar) &s terminated at iteration &i because all calculated margins";
%end; %else %do;
put "**** Program terminated at iteration &i because all calculated margins";
%end;
put "differ from Marginal Control Totals by less than &trmprec ";
run;
%end;
%end;
%else %do; /* no convergence */
%if (&byvar ne ) %then %do;
put "**** Program for %upcase(&byvar) &s terminated at iteration &i";
%end; %else %do;
put "**** Program terminated at iteration &i ";
%end;
put "**** No convergence achieved. Try NUMITER = %left(&sug)";
run;
%end;
data &outds(drop=cntmarg) ; /* create output data set */
set __i0;
rename weight=&outwt;
%let i=&numiter; /* for exiting i loop in case of early termination */
%end;
%end;
/* cleaning work data sets */
proc datasets library=work;
delete __i0 __i1 __i2 __i22 __outs;
run;
%mend;
%rakinge;
quit;

