*This program provides SAS sample code to use when analyzing Community Health Survey data    *
*for users who have access to SAS but not SUDAAN.                           *

For more information, please contact:
NYC Department of Health & Mental Hygiene
Bureau of Epidemiology Services
EpiDatarequest@health.nyc.gov
********************************************************************;

/*Call in CHS 2017. Working data set is called chs2017*/
/*There are 10005 observations and 133 variables in the dataset	*/
/*enter in the pathway where dataset and format programs are stored*/

libname intdat    'X';
filename formatin 'X\formatstatements_chs2017_public.sas';
%include          'X\formats_chs2017_public.sas';

data chs2017; 
	set intdat.chs2017_public;
run;
proc sort data=chs2017;
	by strata;
run;
proc contents data=chs2017; run;

*This code produces the weighted N. Do not use the column or row percentages for prevalence estimates because they are not weighted.
Weighted N estimates are typically rounded to the nearest 1,000;
proc freq data = chs2017;
	tables sex*(smoker);
	weight wt18_dual; 
run;

*Crude prevalence estimate;
proc sort data=chs2017; by strata; run;
proc surveyfreq data=chs2017 NOMCAR varmethod=taylor;
	strata strata;
	weight wt18_dual;
	table smoker / expected row cl(type=logit); 
	ods output Oneway = crd_sas1;
run;  quit;
proc print data = crd_sas1;
where smoker ne .;
var smoker WgtFreq Percent LowerCL UpperCL;
run;

*Crude subgroup prevalence estimate;
proc sort data=chs2017; by strata; run;
proc surveyfreq data=chs2017 NOMCAR varmethod=taylor;
	strata strata;
	weight wt18_dual;
	table sex*smoker/ expected row cl(type=logit); 
	ods output  CrossTabs = crd_sas2;
run;  quit;
proc print data = crd_sas2;
where smoker ne . and sex ne .;
var smoker sex WgtFreq Percent LowerCL UpperCL;
run;

*Prepare variables for creating age adjusted prevalence estimates;
data temp; set chs2017;
if smoker = 1 then smoker1=100;
else if smoker in (2,3) then smoker1=0;
else smoker1=.;

if smoker = 2 then smoker2=100;
else if smoker in (1,3) then smoker2=0;
else smoker2=.;

if smoker = 3 then smoker3=100;
else if smoker in (1,2) then smoker3=0;
else smoker3=.;
run;

proc freq data=temp;
table smoker smoker1 smoker2 smoker3; run;

data age_out; run;

*Age adjusted prevalence estimate for smoker = 2;
proc sort data=temp; by strata; run;

proc surveyreg data=temp NOMCAR varmethod=taylor;
	strata strata;
	class agegroup;
	weight wt18_dual;
	model smoker2 = agegroup/solution;
	estimate "Total" intercept 1 agegroup 0.128810 0.401725 0.299194 0.170271;
	ods output Surveyreg.DependentVariable.Estimates=adj_sas2;
run; quit;

data age_sas;
set adj_sas2;
smoker = 2;
alpha=0.05;
p=1-alpha/2;
df=DF;
CritVal = TINV(p,df);
Percent=Estimate/100;
PerStdErr=StdErr/100;

fp=log(Percent)-log(1-Percent);
s_fp=PerStdErr/((Percent)*(1-Percent));

L_f=fp-CritVal*s_fp;
U_f=fp+CritVal*s_fp;

low=(exp(L_f)/(1+exp(L_f)))*100;
up=(exp(U_f)/(1+exp(U_f)))*100;
run;
proc print data=age_sas; 
var smoker Estimate low up;
run;

*Age adjusted subgroup prevalence estimate;
proc surveyreg data=temp NOMCAR varmethod=taylor;
	strata strata;
	class agegroup;
	weight wt18_dual;
	model smoker2 = agegroup/solution;
	domain sex;
	estimate "Total" intercept 1 agegroup 0.128810 0.401725 0.299194 0.170271;
	ods output Estimates=age_domain_sas2;
run; quit;

data age_sas2;
set age_domain_sas2;
smoker = 2;
alpha=0.05;
p=1-alpha/2;
df=DF;
CritVal = TINV(p,df);
Percent=Estimate/100;
PerStdErr=StdErr/100;

fp=log(Percent)-log(1-Percent);
s_fp=PerStdErr/((Percent)*(1-Percent));

L_f=fp-CritVal*s_fp;
U_f=fp+CritVal*s_fp;

low=(exp(L_f)/(1+exp(L_f)))*100;
up=(exp(U_f)/(1+exp(U_f)))*100;
run;

proc print data=age_sas2; 
var smoker domain Estimate low up;
run;

/*Age Adjustment using the 2000 Projected U.S.Population 
For a discussion of age adjustment and links to articles, sample code, and data, see:
http://www.cdc.gov/nchs/data/statnt/statnt20.pdf */