#加載包
library(HSAUR2)
#加載數據集
data(household)

#查看數據結構
str(household)
## 'data.frame':    40 obs. of  5 variables:
##  $ housing: int  820 184 921 488 721 614 801 396 864 845 ...
##  $ food   : int  114 74 66 80 83 55 56 59 65 64 ...
##  $ goods  : int  183 6 1686 103 176 441 357 61 1618 1935 ...
##  $ service: int  154 20 455 115 104 193 214 80 352 414 ...
##  $ gender : Factor w/ 2 levels "female","male": 1 1 1 1 1 1 1 1 1 1 ...
head(household)
##   housing food goods service gender
## 1     820  114   183     154 female
## 2     184   74     6      20 female
## 3     921   66  1686     455 female
## 4     488   80   103     115 female
## 5     721   83   176     104 female
## 6     614   55   441     193 female


# A data frame with 40 observations on the following 5 variables.

# housing:expenditure on housing, including fuel and light.
# food:expenditure on foodstuffs, including alcohol and tobacco.
# goods:expenditure on other goods, including clothing, footwear and durable goods.
# service:expenditure on services, including transport and vehicles.
# gender:a factor with levels female and male

# The data are part of a data set collected from a survey of household expenditure and give the expenditure of 20 single men and 20 single women on four commodity groups.
# The units of expenditure are Hong Kong dollars,

#計算總支出
household$total <- household$housing + household$food+ household$goods + household$service

# add percent item for 4 variable (exclud gender)
household$h <- household$housing/household$total*100
household$f <- household$food/household$total*100
household$g <- household$goods/household$total*100
household$s <- household$service/household$total*100
head(household)
##   housing food goods service gender total        h         f         g
## 1     820  114   183     154 female  1271 64.51613  8.969315 14.398112
## 2     184   74     6      20 female   284 64.78873 26.056338  2.112676
## 3     921   66  1686     455 female  3128 29.44373  2.109974 53.900256
## 4     488   80   103     115 female   786 62.08651 10.178117 13.104326
## 5     721   83   176     104 female  1084 66.51292  7.656827 16.236162
## 6     614   55   441     193 female  1303 47.12203  4.221028 33.844973
##           s
## 1 12.116444
## 2  7.042254
## 3 14.546036
## 4 14.631043
## 5  9.594096
## 6 14.811972

# 為做條形圖整理數據
mdata <- matrix(c(household$h, household$f, household$g, household$s), 
             ncol=4, 
             dimnames=list(1:40, c("housing", "food", "goods", "service")))

#######################
# analysis
#########################
# 數據集中給出了調查對象的性別信息,
# 前二十個是女性,后二十個是男性

# 先看下總支出的散點圖
plot(household$total)

# plot(density(household$total[which(household$total<4000)]))

# 總支出可以分為兩大類別 以4000為分界線

#先來看下總支出在4000以上的高消費人群的特徵
# 4000以上的調查者編號
which(household$total>4000)
## [1] 25 30 31 35 36 37 40
# 25 30 31 35 36 37 40
#看下4000以上的具體數據
household[which(household$total>4000), ]
##    housing food goods service gender total        h         f        g
## 25    1585  781  2476    1740   male  6582 24.08083 11.865694 37.61775
## 30    1641  440  6471    2063   male 10615 15.45926  4.145078 60.96090
## 31    1180 1243   768     813   male  4004 29.47053 31.043956 19.18082
## 35    1981  869  1489    1032   male  5371 36.88326 16.179482 27.72296
## 36    1746  746  2662    1594   male  6748 25.87433 11.055127 39.44873
## 37    1865  915  5184    1767   male  9731 19.16555  9.402939 53.27304
## 40    1524  964  1739    1410   male  5637 27.03566 17.101295 30.84974
##           s
## 25 26.43573
## 30 19.43476
## 31 20.30470
## 35 19.21430
## 36 23.62181
## 37 18.15846
## 40 25.01330
summary(household[which(household$total>4000), ]$total)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    4004    5504    6582    6955    8240   10615
# Min. 1st Qu.  Median    Mean 3rd Qu.   Max. 
# 4004   5504    6582    6955   8240  10620 
# 看下條形圖所反映的百分比情況
barplot(t(mdata[which(household$total>4000), ]), legend.text=TRUE, 
       args.legend=list(bty="n", horiz=TRUE), 
       xlab="單個調查者", ylab="百分比")

# 特徵1:都是男性調查者, 因為編號屬於后20位
# 特徵2:房屋支出占比較低,25%左右
# 特徵3:耐用品支出一般較高,37%左右
# 特徵4:服務支出較高,20%左右
# 特徵5:共7位調查者屬於高消費人群,占總調查人數的17.5%
# 7/40*100
# 標籤:吃穿用不愁、沒事買個奢侈品、找人服務下的高富帥們

#再來看下總支出在4000以下的人群
plot(household$total[which(household$total<4000)])

# plot(density(household$total[which(household$total<4000)]))
barplot(t(mdata[which(household$total<4000), ]), legend.text=TRUE, 
       args.legend=list(bty="n", horiz=TRUE), 
       xlab="單個調查者", ylab="百分比")

#部份人住房支出較高50%以上
#部份人食品支出較高30%以上
#部份人耐用品支出較高30%以上

# 4000以下的調查者編號 33個
which(household$total<4000)
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
## [24] 24 26 27 28 29 32 33 34 38 39
# 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18
# 19 20 21 22 23 24 26 27 28 29 32 33 34 38 39

#看下4000以下並且住房支出占比50%以上的調查者編號 13個
which(household$total<4000 & household$h>50)
##  [1]  1  2  4  5  7  8 11 13 14 15 16 18 20
# 1  2  4  5  7  8 11 13 14 15 16 18 20
#看下具體數據 
household[which(household$total<4000 & household$h>50), ]
##    housing food goods service gender total        h         f         g
## 1      820  114   183     154 female  1271 64.51613  8.969315 14.398112
## 2      184   74     6      20 female   284 64.78873 26.056338  2.112676
## 4      488   80   103     115 female   786 62.08651 10.178117 13.104326
## 5      721   83   176     104 female  1084 66.51292  7.656827 16.236162
## 7      801   56   357     214 female  1428 56.09244  3.921569 25.000000
## 8      396   59    61      80 female   596 66.44295  9.899329 10.234899
## 11     404   97    33      47 female   581 69.53528 16.695353  5.679862
## 13     457  103   136     108 female   804 56.84080 12.810945 16.915423
## 14    1029   71   244     189 female  1533 67.12329  4.631442 15.916504
## 15    1047   90   653     298 female  2088 50.14368  4.310345 31.273946
## 16     552   91   185     158 female   986 55.98377  9.229209 18.762677
## 18     495  114    65      74 female   748 66.17647 15.240642  8.689840
## 20    1090   59   313     177 female  1639 66.50397  3.599756 19.097010
##            s
## 1  12.116444
## 2   7.042254
## 4  14.631043
## 5   9.594096
## 7  14.985994
## 8  13.422819
## 11  8.089501
## 13 13.432836
## 14 12.328767
## 15 14.272031
## 16 16.024341
## 18  9.893048
## 20 10.799268
#看下該群體的總支出數據情況
summary(household[which(household$total<4000 & household$h>50), ]$total)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     284     748     986    1064    1428    2088
# Min. 1st Qu.  Median    Mean 3rd Qu.   Max. 
# 284    748     986   1064    1428    2088
# 特徵1:全是女性調查者
# 特徵2:房屋支出占比高,50%以上
# 特徵3:總支出總體上在2000以下, 中位數是986
# 特徵4:共13位調查者,占總調查人數的32.5%
# 13/40*100
# 標籤:紮進褲腰帶還房貸或房租的艱苦奮鬥省吃儉用的苦逼月光女同胞

#看下4000以下並且食品支出占比30%以上的調查者編號 12個
which(household$total<4000 & household$f>30)
##  [1] 21 22 23 24 26 27 29 32 33 34 38 39
# 21 22 23 24 26 27 29 32 33 34 38 39
#看下具體數據 
household[which(household$total<4000 & household$f>30), ]
##    housing food goods service gender total        h        f         g
## 21     497  591   153     291   male  1532 32.44125 38.57702  9.986945
## 22     839  942   302     365   male  2448 34.27288 38.48039 12.336601
## 23     798 1308   668     584   male  3358 23.76415 38.95176 19.892793
## 24     892  842   287     395   male  2416 36.92053 34.85099 11.879139
## 26     755  764   428     438   male  2385 31.65618 32.03354 17.945493
## 27     388  655   153     233   male  1429 27.15185 45.83625 10.706788
## 29     248  438    22      65   male   773 32.08279 56.66235  2.846054
## 32     619  684    99     204   male  1606 38.54296 42.59029  6.164384
## 33     253  422    15      48   male   738 34.28184 57.18157  2.032520
## 34     661  739    71     188   male  1659 39.84328 44.54491  4.279687
## 38     238  522    29      75   male   864 27.54630 60.41667  3.356481
## 39    1199 1095   261     344   male  2899 41.35909 37.77165  9.003105
##            s
## 21 18.994778
## 22 14.910131
## 23 17.391304
## 24 16.349338
## 26 18.364780
## 27 16.305108
## 29  8.408797
## 32 12.702366
## 33  6.504065
## 34 11.332128
## 38  8.680556
## 39 11.866161
#看下該群體的總支出數據情況
summary(household[which(household$total<4000 & household$f>30), ]$total)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     738    1288    1632    1842    2424    3358
# Min. 1st Qu.  Median    Mean 3rd Qu.   Max. 
# 738   1288    1632    1842   2424   3358 
# 特徵1:全是男性調查者
# 特徵2:食品支出占比高,30%以上
# 特徵3:總支出總體上在3500以下,中位數是1632
# 特徵4:共12位調查者,占總調查人數的30%
# 12/40*100
# 標籤:伴隨總支出的增加,終於可以在支付房貸或租金后
# 放心大膽的吃喝認為明天更美好的男同胞

#看下4000以下並且耐用品支出占比30%以上的調查者編號 7個
which(household$total<4000 & household$g>30)
## [1]  3  6  9 10 12 15 17
# 3  6  9 10 12 15 17
#看下具體數據 
household[which(household$total<4000 & household$g>30), ]
##    housing food goods service gender total        h        f        g
## 3      921   66  1686     455 female  3128 29.44373 2.109974 53.90026
## 6      614   55   441     193 female  1303 47.12203 4.221028 33.84497
## 9      864   65  1618     352 female  2899 29.80338 2.242152 55.81235
## 10     845   64  1935     414 female  3258 25.93616 1.964395 59.39227
## 12     781   47  1906     452 female  3186 24.51350 1.475204 59.82423
## 15    1047   90   653     298 female  2088 50.14368 4.310345 31.27395
## 17     718  104   583     304 female  1709 42.01287 6.085430 34.11352
##           s
## 3  14.54604
## 6  14.81197
## 9  12.14212
## 10 12.70718
## 12 14.18707
## 15 14.27203
## 17 17.78818
#看下該群體的總支出數據情況
summary(household[which(household$total<4000 & household$g>30), ]$total)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1303    1898    2899    2510    3157    3258
# Min. 1st Qu.  Median    Mean 3rd Qu.   Max. 
# 1303   1898    2899    2510   3157   3258 
# 特徵1:全是女性調查者
# 特徵2:衣服鞋子及耐用品支出占比高,30%~60%之間
# 特徵3:總支出總體上在3500以下,中位數是2899
# 特徵4:共7位調查者,占總調查人數的17.5%
# 7/40*100
# 標籤:伴隨總支出的增加,終於可以在支付房貸或租金后
# 放心大膽的瘋狂購物認為未來可以嚮往的女同胞

#看下總支出4000以下的三類人群的總支出大體位置
plot(household$total[which(household$total<4000 & household$h>50)], pch=1, ylim=c(0, 4000), 
    ylab="總支出", xlab="單個調查者")
points(household$total[which(household$total<4000 & household$f>30)], pch=2)
points(household$total[which(household$total<4000 & household$g>30)], pch=3)
legend("top", legend=c("housing", "food", "goods"), pch=1:3, bty="n")

下面開始YY,這個小數據集剛好是三足鼎立啊

鄭重聲明:此分析純屬虛構,數據也是習題而已

备注:转移自新浪博客,截至2021年11月,原阅读数61,评论0个。