*This program provides SAS sample code to use when analyzing Community Health Survey data * *for users who have access to SAS but not SUDAAN. * For more information, please contact: NYC Department of Health & Mental Hygiene Bureau of Epidemiology Services EpiDatarequest@health.nyc.gov ********************************************************************; /*Call in CHS 2017. Working data set is called chs2017*/ /*There are 10005 observations and 133 variables in the dataset */ /*enter in the pathway where dataset and format programs are stored*/ libname intdat 'X'; filename formatin 'X\formatstatements_chs2017_public.sas'; %include 'X\formats_chs2017_public.sas'; data chs2017; set intdat.chs2017_public; run; proc sort data=chs2017; by strata; run; proc contents data=chs2017; run; *This code produces the weighted N. Do not use the column or row percentages for prevalence estimates because they are not weighted. Weighted N estimates are typically rounded to the nearest 1,000; proc freq data = chs2017; tables sex*(smoker); weight wt18_dual; run; *Crude prevalence estimate; proc sort data=chs2017; by strata; run; proc surveyfreq data=chs2017 NOMCAR varmethod=taylor; strata strata; weight wt18_dual; table smoker / expected row cl(type=logit); ods output Oneway = crd_sas1; run; quit; proc print data = crd_sas1; where smoker ne .; var smoker WgtFreq Percent LowerCL UpperCL; run; *Crude subgroup prevalence estimate; proc sort data=chs2017; by strata; run; proc surveyfreq data=chs2017 NOMCAR varmethod=taylor; strata strata; weight wt18_dual; table sex*smoker/ expected row cl(type=logit); ods output CrossTabs = crd_sas2; run; quit; proc print data = crd_sas2; where smoker ne . and sex ne .; var smoker sex WgtFreq Percent LowerCL UpperCL; run; *Prepare variables for creating age adjusted prevalence estimates; data temp; set chs2017; if smoker = 1 then smoker1=100; else if smoker in (2,3) then smoker1=0; else smoker1=.; if smoker = 2 then smoker2=100; else if smoker in (1,3) then smoker2=0; else smoker2=.; if smoker = 3 then smoker3=100; else if smoker in (1,2) then smoker3=0; else smoker3=.; run; proc freq data=temp; table smoker smoker1 smoker2 smoker3; run; data age_out; run; *Age adjusted prevalence estimate for smoker = 2; proc sort data=temp; by strata; run; proc surveyreg data=temp NOMCAR varmethod=taylor; strata strata; class agegroup; weight wt18_dual; model smoker2 = agegroup/solution; estimate "Total" intercept 1 agegroup 0.128810 0.401725 0.299194 0.170271; ods output Surveyreg.DependentVariable.Estimates=adj_sas2; run; quit; data age_sas; set adj_sas2; smoker = 2; alpha=0.05; p=1-alpha/2; df=DF; CritVal = TINV(p,df); Percent=Estimate/100; PerStdErr=StdErr/100; fp=log(Percent)-log(1-Percent); s_fp=PerStdErr/((Percent)*(1-Percent)); L_f=fp-CritVal*s_fp; U_f=fp+CritVal*s_fp; low=(exp(L_f)/(1+exp(L_f)))*100; up=(exp(U_f)/(1+exp(U_f)))*100; run; proc print data=age_sas; var smoker Estimate low up; run; *Age adjusted subgroup prevalence estimate; proc surveyreg data=temp NOMCAR varmethod=taylor; strata strata; class agegroup; weight wt18_dual; model smoker2 = agegroup/solution; domain sex; estimate "Total" intercept 1 agegroup 0.128810 0.401725 0.299194 0.170271; ods output Estimates=age_domain_sas2; run; quit; data age_sas2; set age_domain_sas2; smoker = 2; alpha=0.05; p=1-alpha/2; df=DF; CritVal = TINV(p,df); Percent=Estimate/100; PerStdErr=StdErr/100; fp=log(Percent)-log(1-Percent); s_fp=PerStdErr/((Percent)*(1-Percent)); L_f=fp-CritVal*s_fp; U_f=fp+CritVal*s_fp; low=(exp(L_f)/(1+exp(L_f)))*100; up=(exp(U_f)/(1+exp(U_f)))*100; run; proc print data=age_sas2; var smoker domain Estimate low up; run; /*Age Adjustment using the 2000 Projected U.S.Population For a discussion of age adjustment and links to articles, sample code, and data, see: http://www.cdc.gov/nchs/data/statnt/statnt20.pdf */