2015年12月17日 星期四

在R中處理複雜抽樣的統計方法(Dealing the statistics with complex sample in R)

    能夠處理複雜抽樣統計的軟件,大約有SUDAANSASSTATARSPSS,前4項都是需要編程的,SPSS則以菜單的形式操作。4個編程的軟件中,以其編程簡潔度來說,據聞首是STATA,其次是R5者中只有R是免費開源的。所以較理想的選擇應是R!
    R軟件內,處理複雜抽樣的包是”survey”,先要安裝和載入:

data(api) #survey包內的數據
survey包內附帶的api數據
str(apipop)
summary(apipop)
mean(apipop$api00) #若將複雜抽樣的數據當作隨機抽樣來處理結果: 664.7126
sum(apipop$enroll,na.rm = T) #結果: 3811472

#Specifying a complex survey design
dstrat<-svydesign(ids=~1, strata = ~stype, weights = ~pw, data = apistrat, fpc = ~fpc)
#定義複雜抽樣的數據
#ids/id 必定項, 取值0~1,
#strata 分層變項
#weights 加權變項
#fpc 有限總體的人數
summary(dstrat)
#Stratified Independent Sampling design
#svydesign(ids=~1, strata=~stype, weights=~pw, data= pistrat, fpc=~fpc)
#Probabilities:
#   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#0.02262 0.02262 0.03587 0.04014 0.05339 0.06623 
#Stratum Sizes: 
#              E  H  M
#obs         100 50 50
#design.PSU 100 50 50
#actual.PSU 100 50 50
#Population stratum sizes (PSUs): 
#   E    H    M 
#4421  755 1018 
#Data variables:
# [1] "cds"      "stype"    "name"     "sname"    "snum"     "dname"   
# [7] "dnum"     "cname"    "cnum"     "flag"     "pcttest"  "api00"   
#[13] "api99"    "target"   "growth"   "sch.wide" "comp.imp" "both"    
#[19] "awards"   "meals"    "ell"      "yr.rnd"   "mobility" "acs.k3"  
#[25] "acs.46"   "acs.core" "pct.resp" "not.hsg"  "hsg"      "some.col"
#[31] "col.grad" "grad.sch" "avg.ed"   "full"     "emer"     "enroll"  
#[37] "api.stu"  "pw"       "fpc"     
svymean(~api00,dstrat,deff=T)
#          mean       SE   DEff
#api00 662.2874   9.4089 1.2045
#與上述當作隨機抽樣的結果相比較, 複雜抽樣的均數是662.2874, 是有分別的.
svytotal(~enroll,dstrat,deff=T) #計算總數
svyquantile(~api00,dstrat,c(0.25,0.5,0.75)) #計算四分位數
svyvar(~api00,dstrat) #計算方差
svyratio(~api.stu,~enroll,dstrat) #計算率

#One stage cluster sample
dclus1<-svydesign(id=~dnum, weights = ~pw, data = apiclus1, fpc=~fpc)
summary(dclus1)
svymean(~interaction(stype,comp.imp),dclus1) #交互項的平均值

#Two stage cluster sample
dclus2<-svydesign(id=~dnum+snum,fpc=~fpc1+fpc2,data = apiclus2)
summary(dclus2)

#Two stage 'with replacement'
dclus2wr<-svydesign(id=~dnum+snum,weights=~pw, data=apiclus2)
summary(dclus2wr)

參考文獻: (哈哈 =.=! 忘記了, 互聯網上的標準示範)

沒有留言:

張貼留言