Personal tools
You are here: Home Statistical and Analytical Methods Weighting SAS Raking Macro

SAS Raking Macro

— filed under:

SAS Raking Macro, from David Izrael, etc.

rakinge.sas — Plain Text, 13Kb

File contents

/**************************************************************************/
/*                           Macro: RAKINGE                               */
/*                                                                        */
/* Developed by David Izrael, Abt Associates, June 1999                   */
/*                                                                        */
/* Objective: To compute weights for a sample to make it agree            */
/*            with independent control totals on a number of              */
/*            characteristics                                             */
/*                                                                        */
/* Note from author: This macro is in general SAS-replica of raking       */
/*                   program developed in 1960's in Fortran by            */
/*                   MARKEMATH inc.                                       */ 
/*                                                                        */
/* Unlike the original Fortran program this SASmacro uses Mike Battaglia's*/         
/* convergence criterion which states that raking can continue            */                                 
/* until convergence of less than 1 is obtained                           */
/* between all of the control totals and the adjusted weighted totals.    */
/* 1 was set as a default but convergence criterion may be changed        */
/* by user through respective macro parameter (TRMPREC)                   */
/*                                                                        */
/* Requirements: Marginal proportions or totals must be passed to RAKINGE */
/*               in working                                               */
/*               data sets  and must contain raking variable and percent  */
/*               (PERCENT) or raking variable and marginal total          */
/*               (MRGTOTAL)                                               */
/*               Control Total must be obtained and passed to macro either*/
/*               by direct indication of parameter (CNTOTAL) or through   */
/*               macro variable created outside macro RAKING              */
/*                                                                        */
/* Notes: There is no limit on number of raking variables, however        */ 
/*        if number of raking variables is very large the running time may*/
/*        become unpredictably long.                                      */
/*                                                                        */
/*        There is no limit of number of categories of raking variables   */
/*                                                                        */
/*        From experience of authors of Fortran  raking procedure and     */
/*        author of current macro even on poorest of samples the algorithm*/
/*        converges in less than 10 passes (iterations). Default parameter*/
/*        (NUMITER)is set to 15 and can be changed by user.               */         
/*                                                                        */
/*        Listing of macro RAKING contains marginal totals, adjusted      */
/*        totals and their difference after each pass of each level of    */
/*        raking variable as well as termination diagnostics              */
/*                                                                        */
/* Recommendation: for speeding up raking process input data set should   */
/*                 contain only raking variables, input weight being      */ 
/*                 adjusted                                               */
/*                 and some key variables needed for further merging      */
/**************************************************************************/   


%macro rakinge (inds=INPUTDATASETNAME,
outds=OUTPUTDATASETNAME,
inwt=,       /* if unweighted sample, weight =1 will be assigned by macro */     
freqlist=,
outwt=NEW_PWEIGHT_VARIABLE_NAME,
byvar=,
varlist=LIST OF VARIABLES IN RAKING MODEL,
numvar=4,
cntotal=,  /* any number here, 100 is most natural */
trmprec=1,
trmpct=0.001, /* macro will terminate based on this criterion (WAS 0.001) */ 
numiter=50,	/* was 50 */
dircont=work,
prdiag=Y);


/*  checking on existence of required parameters */

%macro reqpar (param);

%if (%bquote(&&&param) eq ) %then %do;
     %put **** Program terminated: Macro parameter %upcase(&PARAM) missing ****;
     /*endsas;*/
                          %end;
%mend;


%reqpar (inds);
%reqpar (outds);
*****%reqpar (inwt);
%reqpar (outwt);
%reqpar (varlist);
%reqpar (numvar);
%reqpar (trmprec);
%reqpar (numiter);

/*  checking on number of raking variables */ 

%if  (%upcase(%scan(&varlist,&numvar)) eq ) or
     (%upcase(%scan(&varlist,%eval(&numvar+1))) ne ) 
%then %do;
     %put **** Program terminated: Number of variables in the VARLIST ****;
     %put **** does not match NUMVAR ****;
     /*endsas;*/
                          %end;

data __i0;
set &inds;
%if (&inwt ne ) %then %do;
weight=&inwt;
%end;
%else %do;
weight=1;
%end;


%do i=1 %to &numiter;                 /*  loop on iteration      */
   
  %let sumterm = 0;                   /* set cumulative sum of   */ 
                                      /* termination flags       */
                                    
                                      /* checking on number of   */
                                      /* raking variables        */
   
%do j=1 %to &numvar;                /* loop on raking variable */

  %let varrake=  %upcase(%scan(&varlist,&j)); /* retrieve raking variable*/
                                             /* from list               */ 
   %if (&freqlist ne ) %then 
                 %let dsfreq=%scan(&freqlist,&j); 
            %else
                 %let dsfreq=&varrake;       /* if list of ds with marg  */
                                             /* freq is empty then their */
                                             /* name are same as names   */            
                                             /* of raking variables      */
proc sort data=__i0;
by &varrake;
run;

proc summary nway data=__i0 ;       /* calc adjusted marginal total -sum&j */
class &varrake;
var weight;
output out=__i1(drop=_type_ _freq_) sum=sum&j;
run;


data __i0;
merge __i0(in=_1) __i1 &dsfreq(in=_2);      /* merge with ds with marginal proportions*/
by &varrake;

%if &i=1 %then %do;  /* all checking are done in 1st iteration only*/

if (_1 and ^_2) or (_2 and ^_1) then do;call symput('match','1');stop;end; else
                                     call symput('match','2');
if mrgtotal ne . then call symput ('mrg','1');else call symput('mrg','2');
if percent  ne . then call symput ('pct','1');else call symput('pct','2');

%end;
run;

%if &i=1 %then %do;            /* all checking are done on first iteration */
%if &match=1 %then %do;
%put 
**** Program terminated: levels of variable &varrake do not match ****;
%put
**** in sample and marginal totals data sets ****;

/*endsas;*/
%end; 

%if &pct = 1 and (&cntotal eq .) %then %do;
%put 
**** Program terminated: PERCENT is not missing and CNTOTAL is missing ****;
%put
**** for raking variable &varrake ****;

/*endsas;*/

%end; 
%else
%if &pct=2 and &mrg=2 %then %do;
  %put **** Program terminated: Both PERCENT and MRGTOTAL are missing ****;
/*endsas;*/
%end;
%end;

data __i0;
set __i0;

%if (&cntotal ne ) %then %do;
if mrgtotal ne . then                   /*  case of marginal totals*/     
cntmarg=mrgtotal;
else
if percent ne . then                     /*  case of marginal freqs */
cntmarg=&cntotal.*percent/100;
%end; 
%else %do;
if mrgtotal ne . then                   /*  case of marginal totals*/     
cntmarg=mrgtotal;
%end;

weight=weight*cntmarg/sum&j;     /* actual raking, weight adjustment */ 
drop percent mrgtotal;
run;


data __i2(keep=&varrake sum&j cntmarg differ) _forgraph(keep=Iteration &varrake differ);
retain &varrake sum&j cntmarg differ;
set __i0;
by &varrake;
if first.&varrake;
differ=cntmarg-sum&j;
Iteration=&i;
run;

/*** create % for printing ****/

proc summary data=__i2 nway noprint;
var sum&j cntmarg;
output out=__outs(drop=_: ) sum= sumsum summarg;
run;


data __i2(drop=sumsum summarg);
set __i2;
   if _n_ =1 then set __outs;

%if (&trmpct ne ) %then %do;
_pctsum=100*sum&j/sumsum;
_pctmrg=100*cntmarg/summarg;

%end;

%else %do;
_pctsum=100*sum&j/sumsum;
_pctmrg=100*cntmarg/summarg;

%end;


_diffpct=_pctsum-_pctmrg;
run;

%if &i=1 %then %do;

proc delete data= &dircont.._table_&varrake._&byvar; run;

%end;

proc append base=&dircont.._table_&varrake._&byvar data=_forgraph; /** accumulating tables with variables and differences **/
run; 
 

%if %upcase(&prdiag)=Y %then %do;

proc print label noobs data=__i2;                      

%if  (&byvar ne) %then %do; 
title3 "Raking %upcase(&byvar) - &s by &varrake, iteration - &i ";
                        %end; %else %do;
title3 "Raking  by &varrake, iteration - &i ";
%end;
sum sum&j cntmarg _pctsum _pctmrg;

label sum&j ='Calculated margin'
      differ='Difference'
      cntmarg='Marginal Control Total'
      _pctsum='Calculated %'
      _pctmrg='Marginal Control %'
      _diffpct= 'Difference in %';   

; 

format _pctsum _pctmrg _diffpct 6.3;
run;

%end;

data __i2;
set __i2 end=eof;
retain comm 0;

%if (&trmpct ne ) %then %do;
if  abs( _diffpct)>&trmpct then comm=1;     /*   termination test for proportions   */
%end;

%else %do;
if  abs( differ)>&trmprec then comm=1;     /*   termination test for totals   */
%end;


if eof and comm=1 then 
        call symput("term&j",'2');         /*   continue iterations */
else
if eof then call symput("term&j",'1');     /*        terminate      */
run;
%let sumterm=%eval(&sumterm+&&term&j);     /*   cumulation of termination 
                                              flags                 */
%end;

data __i0;
set __i0;
drop %do m=1 %to &numvar;  sum&m %end;;    /* drop all sums for next loop*/

%if (&byvar ne ) %then
%put %upcase(&byvar)=&s iteration=&i numvar=&numvar sumterm=&sumterm;   /* diagnostics for LOG */    
%else
%put  numvar=&numvar sumterm=&sumterm;

%if &sumterm=&numvar or &i=&numiter %then %do;                 /*  termination test */ 
  %if (&byvar ne ) %then
   %put **** Terminated %upcase(&byvar) &s at &i-th iteration;
                    %else 
   %put ***** Task terminated at &i-th iteration ****;

 title3 ' ';

%if &sumterm ne &numvar %then %do;

%do j=1 %to &numvar;

%let curr = %scan (&varlist,&j,%str( ) );

/** creates data sets for further graphing and analyses of convergence **/

proc sql;
create table _forgraph as
select iteration, &curr, log10(abs(differ)) as log_diff 
from &dircont.._table_&curr._&byvar a 
where abs(differ) = 
(select max(abs(differ)) from  &dircont.._table_&curr._&byvar b
where a.iteration =b.iteration)

order by iteration;
quit;

data _forgraph(drop=&curr);
set _forgraph;
variable=&j;

proc append base=_base data=_forgraph;
run;

%end;

/*** module for calculation of predicted number of iterations ***/

proc sort data=_base out=_out;
by descending iteration descending log_diff;

data _out;
set _out;
if _n_=1;

proc sort data=_base out=_out1;
by variable descending iteration;

data _out2;
merge _out1 _out(in=_1 rename=(iteration=last_iteration));
by variable;
if _1;


data _null_;
set _out2;
by descending iteration;
retain nach;
if iteration=last_iteration then nach=log_diff;
if iteration=last_iteration-1 then do; mytan=1/(log_diff-nach); 
                                       add_iter=ceil(nach*mytan);
                                       tota_iter=last_iteration+add_iter;
                                       call symput('sug',tota_iter);
                            end; 


%end;


data _null_;                                    /* diagnostic for listing */
set __i0;
if _n_=1;
    file print ls=80 ps=59;
put ' ';
%if &sumterm=&numvar %then %do;             /* convergence achieved */      

%if (&trmpct ne ) %then %do;

%if (&byvar ne ) %then %do;  
put "**** Program for %upcase(&byvar) &s terminated at iteration &i because all calculated percents";
                        %end; %else %do;
put "**** Program terminated at iteration &i because all calculated percents";
                                    %end;
put "differ from Marginal Percents by less than &trmpct ";
run;

%end;
%else %do;
%if (&byvar ne ) %then %do;  
put "**** Program for %upcase(&byvar) &s terminated at iteration &i because all calculated margins";
                        %end; %else %do;
put "**** Program terminated at iteration &i because all calculated margins";
                                    %end;
put "differ from Marginal Control Totals by less than &trmprec ";
run;
%end;



%end; 

%else %do;                                  /* no convergence */                  

%if (&byvar ne ) %then %do;  
put "**** Program for %upcase(&byvar) &s terminated at iteration &i";
                        %end; %else %do;
put "**** Program terminated at iteration &i ";
                                    %end;
put "**** No convergence achieved. Try NUMITER = %left(&sug)";
run;

%end;


data &outds(drop=cntmarg) ;                /*   create output data set */
set __i0;
rename weight=&outwt;

%let i=&numiter; /*   for exiting i loop in case of early termination */

    %end;
%end;
               /*    cleaning work data sets   */

                  proc datasets library=work;
                    delete __i0 __i1 __i2 __i22 __outs;
                  run;    

%mend;



%rakinge;


quit;
Document Actions

Copyright ©2009, The Pennsylvania State University | Privacy and Legal Statements
Contact the Help Site Administrator | Last modified Dec 03, 2008 | Weblion Partner