版权声明:本套课程材料开源,使用和分享必须遵守「创作共用许可协议 CC BY-NC-SA」(来源引用-非商业用途使用-以相同方式共享)。
data[i, j, by] ——
只需要一对中括号,就能完成所有操作!
i:行操作j:列操作by:分组变量## 数据准备:大五人格问卷BFI(部分数据)
data = as.data.table(psych::bfi)
d = data[31:50, c("gender", "education", "age")]
d gender education age
<int> <int> <int>
1: 2 NA 17
2: 2 3 41
3: 1 5 23
4: 2 NA 17
5: 1 3 20
6: 1 3 23
7: 1 3 20
8: 1 3 21
9: 1 NA 30
10: 2 5 48
11: 2 3 40
12: 2 4 27
13: 1 1 18
14: 1 4 20
15: 2 5 24
16: 1 3 25
17: 1 2 22
18: 2 1 18
19: 2 1 43
20: 1 3 20
gender education age
<int> <int> <int>
Classes 'data.table' and 'data.frame': 20 obs. of 3 variables:
$ gender : int 2 2 1 2 1 1 1 1 1 2 ...
$ education: int NA 3 5 NA 3 3 3 3 NA 5 ...
$ age : int 17 41 23 17 20 23 20 21 30 48 ...
- attr(*, ".internal.selfref")=<externalptr>
gender education age Gender
<int> <int> <int> <fctr>
1: 2 NA 17 Female
2: 2 3 41 Female
3: 1 5 23 Male
4: 2 NA 17 Female
5: 1 3 20 Male
6: 1 3 23 Male
7: 1 3 20 Male
8: 1 3 21 Male
9: 1 NA 30 Male
10: 2 5 48 Female
11: 2 3 40 Female
12: 2 4 27 Female
13: 1 1 18 Male
14: 1 4 20 Male
15: 2 5 24 Female
16: 1 3 25 Male
17: 1 2 22 Male
18: 2 1 18 Female
19: 2 1 43 Female
20: 1 3 20 Male
gender education age Gender
<int> <int> <int> <fctr>
gender education age Gender
<int> <int> <int> <fctr>
1: 2 NA 17 Female
2: 2 3 41 Female
3: 1 5 23 Male
4: 2 NA 17 Female
5: 1 3 20 Male
6: 1 3 23 Male
7: 1 3 20 Male
8: 1 3 21 Male
9: 1 NA 30 Male
10: 2 5 48 Female
11: 2 3 40 Female
12: 2 4 27 Female
13: 1 1 18 Male
14: 1 4 20 Male
15: 2 5 24 Female
16: 1 3 25 Male
17: 1 2 22 Male
18: 2 1 18 Female
19: 2 1 43 Female
20: 1 3 20 Male
gender education age Gender
<int> <int> <int> <fctr>
## 方式 3: 多变量 let() 函数 (原地更新)
d[, let(
Gender = factor(gender, levels=1:2, labels=c("Male", "Female")),
Age = as.numeric(age),
Edu = as.factor(education)
)]
d gender education age Gender Age Edu
<int> <int> <int> <fctr> <num> <fctr>
1: 2 NA 17 Female 17 <NA>
2: 2 3 41 Female 41 3
3: 1 5 23 Male 23 5
4: 2 NA 17 Female 17 <NA>
5: 1 3 20 Male 20 3
6: 1 3 23 Male 23 3
7: 1 3 20 Male 20 3
8: 1 3 21 Male 21 3
9: 1 NA 30 Male 30 <NA>
10: 2 5 48 Female 48 5
11: 2 3 40 Female 40 3
12: 2 4 27 Female 27 4
13: 1 1 18 Male 18 1
14: 1 4 20 Male 20 4
15: 2 5 24 Female 24 5
16: 1 3 25 Male 25 3
17: 1 2 22 Male 22 2
18: 2 1 18 Female 18 1
19: 2 1 43 Female 43 1
20: 1 3 20 Male 20 3
gender education age Gender Age Edu
<int> <int> <int> <fctr> <num> <fctr>
Classes 'data.table' and 'data.frame': 20 obs. of 6 variables:
$ gender : int 2 2 1 2 1 1 1 1 1 2 ...
$ education: int NA 3 5 NA 3 3 3 3 NA 5 ...
$ age : int 17 41 23 17 20 23 20 21 30 48 ...
$ Gender : Factor w/ 2 levels "Male","Female": 2 2 1 2 1 1 1 1 1 2 ...
$ Age : num 17 41 23 17 20 23 20 21 30 48 ...
$ Edu : Factor w/ 5 levels "1","2","3","4",..: NA 3 5 NA 3 3 3 3 NA 5 ...
- attr(*, ".internal.selfref")=<externalptr>
(本小节进度:1/4)
gender education age Gender
<int> <int> <int> <fctr>
1: 2 NA 17 Female
2: 2 3 41 Female
3: 1 5 23 Male
4: 2 NA 17 Female
5: 1 3 20 Male
6: 1 3 23 Male
7: 1 3 20 Male
8: 1 3 21 Male
9: 1 NA 30 Male
10: 2 5 48 Female
11: 2 3 40 Female
12: 2 4 27 Female
13: 1 1 18 Male
14: 1 4 20 Male
15: 2 5 24 Female
16: 1 3 25 Male
17: 1 2 22 Male
18: 2 1 18 Female
19: 2 1 43 Female
20: 1 3 20 Male
gender education age Gender
<int> <int> <int> <fctr>
Gender
<fctr>
1: Female
2: Female
3: Male
4: Female
5: Male
6: Male
7: Male
8: Male
9: Male
10: Female
11: Female
12: Female
13: Male
14: Male
15: Female
16: Male
17: Male
18: Female
19: Female
20: Male
Gender
<fctr>
(本小节进度:2/4)
gender education age A1 A2 A3 A4 A5
<int> <int> <int> <int> <int> <int> <int> <int>
1: 2 NA 17 1 6 5 6 3
2: 2 3 41 2 5 6 6 6
3: 1 5 23 1 5 6 5 4
4: 2 NA 17 2 4 5 6 5
5: 1 3 20 4 4 4 4 4
6: 1 3 23 5 3 5 4 2
7: 1 3 20 1 6 4 6 4
8: 1 3 21 1 4 4 2 3
9: 1 NA 30 1 6 6 6 6
10: 2 5 48 1 5 4 3 5
11: 2 3 40 1 5 5 6 5
12: 2 4 27 5 4 3 6 4
13: 1 1 18 1 5 4 4 5
14: 1 4 20 5 6 4 3 5
15: 2 5 24 2 6 6 6 6
16: 1 3 25 1 6 6 6 6
17: 1 2 22 5 5 3 4 3
18: 2 1 18 2 6 4 5 5
19: 2 1 43 1 5 3 2 3
20: 1 3 20 1 6 6 6 6
gender education age A1 A2 A3 A4 A5
<int> <int> <int> <int> <int> <int> <int> <int>
Classes 'data.table' and 'data.frame': 20 obs. of 8 variables:
$ gender : int 2 2 1 2 1 1 1 1 1 2 ...
$ education: int NA 3 5 NA 3 3 3 3 NA 5 ...
$ age : int 17 41 23 17 20 23 20 21 30 48 ...
$ A1 : int 1 2 1 2 4 5 1 1 1 1 ...
$ A2 : int 6 5 5 4 4 3 6 4 6 5 ...
$ A3 : int 5 6 6 5 4 5 4 4 6 4 ...
$ A4 : int 6 6 5 6 4 4 6 2 6 3 ...
$ A5 : int 3 6 4 5 4 2 4 3 6 5 ...
- attr(*, ".internal.selfref")=<externalptr>
gender
<int>
1: 2
2: 2
3: 1
4: 2
5: 1
[1] 2 2 1 2 1
gender age
<int> <int>
1: 2 17
2: 2 41
3: 1 23
4: 2 17
5: 1 20
gender age
<int> <int>
1: 2 17
2: 2 41
3: 1 23
4: 2 17
5: 1 20
gender education age A1 A2 A3 A4 A5
<int> <int> <int> <int> <int> <int> <int> <int>
1: 2 NA 17 1 6 5 6 3
2: 2 3 41 2 5 6 6 6
3: 1 5 23 1 5 6 5 4
gender education age A1 A2 A3 A4 A5
<int> <int> <int> <int> <int> <int> <int> <int>
1: 2 NA 17 1 6 5 6 3
2: 2 3 41 2 5 6 6 6
3: 1 5 23 1 5 6 5 4
education age
<int> <int>
1: NA 17
2: 3 41
3: 5 23
4: NA 17
5: 3 20
6: 3 23
7: 3 20
8: 3 21
9: NA 30
10: 5 48
11: 3 40
12: 4 27
13: 1 18
14: 4 20
15: 5 24
16: 3 25
17: 2 22
18: 1 18
19: 1 43
20: 3 20
education age
<int> <int>
age A1 A2 A3
<int> <int> <int> <int>
1: 17 1 6 5
2: 41 2 5 6
3: 23 1 5 6
4: 17 2 4 5
5: 20 4 4 4
6: 23 5 3 5
7: 20 1 6 4
8: 21 1 4 4
9: 30 1 6 6
10: 48 1 5 4
11: 40 1 5 5
12: 27 5 4 3
13: 18 1 5 4
14: 20 5 6 4
15: 24 2 6 6
16: 25 1 6 6
17: 22 5 5 3
18: 18 2 6 4
19: 43 1 5 3
20: 20 1 6 6
age A1 A2 A3
<int> <int> <int> <int>
(本小节进度:3/4)
## 数据准备:大五人格问卷BFI(部分数据)
data = as.data.table(psych::bfi)
data[, let(
Gender = factor(gender, levels=1:2, labels=c("Male", "Female")),
Age = as.numeric(age),
Edu = as.factor(education)
)]
d = data[2535:2546, c("Gender", "Age", "Edu", paste0("A", 1:5))]
d Gender Age Edu A1 A2 A3 A4 A5
<fctr> <num> <fctr> <int> <int> <int> <int> <int>
1: Female 20 3 1 6 6 6 6
2: Female 56 5 3 5 3 5 5
3: Male 3 5 2 5 5 4 5
4: Male 65 2 5 4 2 6 4
5: Female 34 3 2 4 3 4 4
6: Female 29 4 1 6 5 3 5
7: Female 21 4 4 4 5 4 5
8: Female 32 2 3 5 4 NA 6
9: Male 26 2 2 6 5 4 4
10: Male 27 4 2 2 1 2 3
11: Female 45 4 1 5 5 6 6
12: Male 17 <NA> 2 4 4 3 4
Classes 'data.table' and 'data.frame': 12 obs. of 8 variables:
$ Gender: Factor w/ 2 levels "Male","Female": 2 2 1 1 2 2 2 2 1 1 ...
$ Age : num 20 56 3 65 34 29 21 32 26 27 ...
$ Edu : Factor w/ 5 levels "1","2","3","4",..: 3 5 5 2 3 4 4 2 2 4 ...
$ A1 : int 1 3 2 5 2 1 4 3 2 2 ...
$ A2 : int 6 5 5 4 4 6 4 5 6 2 ...
$ A3 : int 6 3 5 2 3 5 5 4 5 1 ...
$ A4 : int 6 5 4 6 4 3 4 NA 4 2 ...
$ A5 : int 6 5 5 4 4 5 5 6 4 3 ...
- attr(*, ".internal.selfref")=<externalptr>
Gender Age Edu A1 A2 A3 A4 A5
<fctr> <num> <fctr> <int> <int> <int> <int> <int>
1: Female 20 3 1 6 6 6 6
2: Female 56 5 3 5 3 5 5
3: Female 34 3 2 4 3 4 4
4: Female 29 4 1 6 5 3 5
5: Female 21 4 4 4 5 4 5
6: Female 32 2 3 5 4 NA 6
7: Female 45 4 1 5 5 6 6
Gender Age Edu A1 A2 A3 A4 A5
<fctr> <num> <fctr> <int> <int> <int> <int> <int>
1: Male 3 5 2 5 5 4 5
2: Male 65 2 5 4 2 6 4
3: Male 26 2 2 6 5 4 4
4: Male 27 4 2 2 1 2 3
5: Male 17 <NA> 2 4 4 3 4
Gender Age Edu A1 A2 A3 A4 A5
<fctr> <num> <fctr> <int> <int> <int> <int> <int>
1: Male 65 2 5 4 2 6 4
2: Male 26 2 2 6 5 4 4
3: Male 27 4 2 2 1 2 3
4: Male 17 <NA> 2 4 4 3 4
Gender Age Edu A1 A2 A3 A4 A5
<fctr> <num> <fctr> <int> <int> <int> <int> <int>
1: Male 65 2 5 4 2 6 4
2: Male 26 2 2 6 5 4 4
3: Male 27 4 2 2 1 2 3
Gender Age Edu A1 A2 A3 A4 A5
<fctr> <num> <fctr> <int> <int> <int> <int> <int>
1: Male 26 2 2 6 5 4 4
2: Male 27 4 2 2 1 2 3
Gender Age Edu A1 A2 A3 A4 A5
<fctr> <num> <fctr> <int> <int> <int> <int> <int>
1: Male 3 5 2 5 5 4 5
2: Male 65 2 5 4 2 6 4
3: Male 17 <NA> 2 4 4 3 4
Gender Age Edu A1 A2 A3 A4 A5
<fctr> <num> <fctr> <int> <int> <int> <int> <int>
1: Male 3 5 2 5 5 4 5
2: Male 65 2 5 4 2 6 4
3: Male 17 <NA> 2 4 4 3 4
Gender Age Edu A1 A2 A3 A4 A5
<fctr> <num> <fctr> <int> <int> <int> <int> <int>
1: Male 3 5 2 5 5 4 5
Gender Age Edu A1 A2 A3 A4 A5
<fctr> <num> <fctr> <int> <int> <int> <int> <int>
1: Female 20 3 1 6 6 6 6
2: Female 56 5 3 5 3 5 5
3: Male NA 5 2 5 5 4 5
4: Male 65 2 5 4 2 6 4
5: Female 34 3 2 4 3 4 4
6: Female 29 4 1 6 5 3 5
7: Female 21 4 4 4 5 4 5
8: Female 32 2 3 5 4 NA 6
9: Male 26 2 2 6 5 4 4
10: Male 27 4 2 2 1 2 3
11: Female 45 4 1 5 5 6 6
12: Male 17 <NA> 2 4 4 3 4
(本小节进度:1/3)
d = data[2535:2546, c("Gender", "Age", "Edu", paste0("A", 1:5))]
## 直接筛选列
d[, Age:A3] # DT[, j](变量名范围,不会数数字) Age Edu A1 A2 A3
<num> <fctr> <int> <int> <int>
1: 20 3 1 6 6
2: 56 5 3 5 3
3: 3 5 2 5 5
4: 65 2 5 4 2
5: 34 3 2 4 3
6: 29 4 1 6 5
7: 21 4 4 4 5
8: 32 2 3 5 4
9: 26 2 2 6 5
10: 27 4 2 2 1
11: 45 4 1 5 5
12: 17 <NA> 2 4 4
A1 A2 A3 Edu Age
<int> <int> <int> <fctr> <num>
1: 1 6 6 3 20
2: 3 5 3 5 56
3: 2 5 5 5 3
4: 5 4 2 2 65
5: 2 4 3 3 34
6: 1 6 5 4 29
7: 4 4 5 4 21
8: 3 5 4 2 32
9: 2 6 5 2 26
10: 2 2 1 4 27
11: 1 5 5 4 45
12: 2 4 4 <NA> 17
Error in `[.data.table`(d, , vars) :
j(在 [...] 中的第二个参数)是单一符号,但未找到列名 'vars'。如果你打算在调用环境中使用变量选择列,请尝试 DT[, ..vars]。.. 前缀表示上一级,类似于文件系统路径。
Age Edu
<num> <fctr>
1: 20 3
2: 56 5
3: 3 5
4: 65 2
5: 34 3
6: 29 4
7: 21 4
8: 32 2
9: 26 2
10: 27 4
11: 45 4
12: 17 <NA>
Age A1 A2 A3 A4 A5
<num> <int> <int> <int> <int> <int>
1: 20 1 6 6 6 6
2: 56 3 5 3 5 5
3: 3 2 5 5 4 5
4: 65 5 4 2 6 4
5: 34 2 4 3 4 4
6: 29 1 6 5 3 5
7: 21 4 4 5 4 5
8: 32 3 5 4 NA 6
9: 26 2 6 5 4 4
10: 27 2 2 1 2 3
11: 45 1 5 5 6 6
12: 17 2 4 4 3 4
A1 A2 A3 A4 A5
<int> <int> <int> <int> <int>
1: 1 6 6 6 6
2: 3 5 3 5 5
3: 2 5 5 4 5
4: 5 4 2 6 4
5: 2 4 3 4 4
6: 1 6 5 3 5
7: 4 4 5 4 5
8: 3 5 4 NA 6
9: 2 6 5 4 4
10: 2 2 1 2 3
11: 1 5 5 6 6
12: 2 4 4 3 4
A1 A2 A3 A4 A5
<int> <int> <int> <int> <int>
1: 1 6 6 6 6
2: 3 5 3 5 5
3: 2 5 5 4 5
4: 5 4 2 6 4
5: 2 4 3 4 4
6: 1 6 5 3 5
7: 4 4 5 4 5
8: 3 5 4 NA 6
9: 2 6 5 4 4
10: 2 2 1 2 3
11: 1 5 5 6 6
12: 2 4 4 3 4
A1 A2 A3 A4 A5
<int> <int> <int> <int> <int>
1: 1 6 6 6 6
2: 3 5 3 5 5
3: 2 5 5 4 5
4: 5 4 2 6 4
5: 2 4 3 4 4
6: 1 6 5 3 5
7: 4 4 5 4 5
8: 3 5 4 NA 6
9: 2 6 5 4 4
10: 2 2 1 2 3
11: 1 5 5 6 6
12: 2 4 4 3 4
拓展了解:tidyselect变量选择语法
(本小节进度:2/3)
A1 A2 A3 A4 A5 C1 C2 C3 C4 C5 E1 E2
<int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1: 1 5 6 5 6 4 3 2 4 5 2 1
2: 2 4 4 4 3 6 5 6 1 1 2 4
3: 1 5 6 5 4 1 5 6 4 6 6 6
4: 1 5 4 3 5 6 5 5 2 2 3 2
5: 2 6 6 6 6 5 4 5 3 4 2 2
---
413: 1 6 6 5 6 5 5 3 2 4 1 5
414: 1 6 6 6 6 6 6 6 2 1 4 3
415: 2 5 4 5 4 6 5 5 2 4 6 5
416: 2 6 5 4 5 4 4 1 4 1 1 5
417: 3 4 4 3 5 6 6 5 2 1 2 2
E3 E4 E5 N1 N2 N3 N4 N5 O1 O2 O3 O4
<int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1: 2 5 2 2 2 2 2 2 6 1 5 5
2: 4 2 6 3 3 5 3 2 5 2 6 6
3: 2 1 1 1 2 1 3 6 6 6 5 6
4: 3 6 5 1 2 1 2 1 5 1 6 6
5: 4 5 5 2 2 2 2 3 5 2 5 5
---
413: 4 4 2 1 1 1 4 2 6 1 6 6
414: 6 6 6 2 6 4 5 5 6 1 6 6
415: 2 3 5 1 1 3 4 1 5 1 4 6
416: 4 5 5 NA 1 4 2 4 5 4 5 5
417: 4 5 5 4 4 3 2 2 5 4 4 3
O5 gender education age Gender Age Edu
<int> <int> <int> <int> <fctr> <num> <fctr>
1: 2 1 5 68 Male 68 5
2: 1 2 5 51 Female 51 5
3: 1 1 5 23 Male 23 5
4: 1 2 5 48 Female 48 5
5: 1 2 5 24 Female 24 5
---
413: 1 1 5 28 Male 28 5
414: 1 2 5 26 Female 26 5
415: 1 2 5 40 Female 40 5
416: 1 1 5 28 Male 28 5
417: 4 2 5 33 Female 33 5
A1 A2 A3 A4 A5 C1 C2 C3 C4 C5
<int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1: 2 4 3 4 4 2 3 3 4 4
2: 2 4 5 2 5 5 4 4 3 4
3: 5 4 5 4 4 4 5 4 2 5
4: 4 4 6 5 5 4 4 3 5 5
5: 2 3 3 4 5 4 4 5 3 2
---
2796: 6 1 3 3 3 6 6 6 1 1
2797: 2 4 4 3 5 2 3 4 4 3
2798: 2 3 5 2 5 5 5 5 1 1
2799: 5 2 2 4 4 5 5 5 2 6
2800: 2 3 1 4 2 5 5 3 3 3
Gender Age Edu
<fctr> <num> <fctr>
1: Male 16 <NA>
2: Female 18 <NA>
3: Female 17 <NA>
4: Female 17 <NA>
5: Male 17 <NA>
---
2796: Male 19 3
2797: Male 27 4
2798: Female 29 4
2799: Male 31 4
2800: Female 50 4
Gender Edu
<fctr> <fctr>
1: Male <NA>
2: Female <NA>
3: Female <NA>
4: Female <NA>
5: Male <NA>
---
2796: Male 3
2797: Male 4
2798: Female 4
2799: Male 4
2800: Female 4
Gender Age Edu
<fctr> <num> <fctr>
1: Male 68 5
2: Female 51 5
3: Male 23 5
4: Female 48 5
5: Female 24 5
---
413: Male 28 5
414: Female 26 5
415: Female 40 5
416: Male 28 5
417: Female 33 5
A1 A2 A3 A4 A5 C1 C2 C3 C4 C5
<int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1: 2 4 3 4 4 2 3 3 4 4
2: 2 4 5 2 5 5 4 4 3 4
3: 5 4 5 4 4 4 5 4 2 5
4: 4 4 6 5 5 4 4 3 5 5
5: 2 3 3 4 5 4 4 5 3 2
---
2796: 6 1 3 3 3 6 6 6 1 1
2797: 2 4 4 3 5 2 3 4 4 3
2798: 2 3 5 2 5 5 5 5 1 1
2799: 5 2 2 4 4 5 5 5 2 6
2800: 2 3 1 4 2 5 5 3 3 3
E1 E2 E3 E4 E5 education Edu
<int> <int> <int> <int> <int> <int> <fctr>
1: 3 3 3 4 4 NA <NA>
2: 1 1 6 4 3 NA <NA>
3: 2 4 4 4 5 NA <NA>
4: 5 3 4 4 4 NA <NA>
5: 2 2 5 4 5 NA <NA>
---
2796: 1 4 5 5 6 3 3
2797: 2 2 4 4 3 4 4
2798: 2 2 6 3 6 4 4
2799: 2 2 4 5 4 4 4
2800: 3 3 1 2 2 4 4
E1 E2 E3 E4 E5
<int> <int> <int> <int> <int>
1: 3 3 3 4 4
2: 1 1 6 4 3
3: 2 4 4 4 5
4: 5 3 4 4 4
5: 2 2 5 4 5
---
2796: 1 4 5 5 6
2797: 2 2 4 4 3
2798: 2 2 6 3 6
2799: 2 2 4 5 4
2800: 3 3 1 2 2
A1 A2 A3 A4 A5
<int> <int> <int> <int> <int>
1: 2 4 3 4 4
2: 2 4 5 2 5
3: 5 4 5 4 4
4: 4 4 6 5 5
5: 2 3 3 4 5
---
2796: 6 1 3 3 3
2797: 2 4 4 3 5
2798: 2 3 5 2 5
2799: 5 2 2 4 4
2800: 2 3 1 4 2
(本小节进度:3/3)
## 数据准备:大五人格问卷BFI(部分数据)
data = as.data.table(psych::bfi)
data[, let(
Gender = factor(gender, levels=1:2, labels=c("Male", "Female")),
Age = as.numeric(age),
Edu = as.factor(education)
)]
d = data[2535:2546, c("Gender", "Age", "Edu", paste0("A", 1:5))]
d Gender Age Edu A1 A2 A3 A4 A5
<fctr> <num> <fctr> <int> <int> <int> <int> <int>
1: Female 20 3 1 6 6 6 6
2: Female 56 5 3 5 3 5 5
3: Male 3 5 2 5 5 4 5
4: Male 65 2 5 4 2 6 4
5: Female 34 3 2 4 3 4 4
6: Female 29 4 1 6 5 3 5
7: Female 21 4 4 4 5 4 5
8: Female 32 2 3 5 4 NA 6
9: Male 26 2 2 6 5 4 4
10: Male 27 4 2 2 1 2 3
11: Female 45 4 1 5 5 6 6
12: Male 17 <NA> 2 4 4 3 4
Classes 'data.table' and 'data.frame': 12 obs. of 8 variables:
$ Gender: Factor w/ 2 levels "Male","Female": 2 2 1 1 2 2 2 2 1 1 ...
$ Age : num 20 56 3 65 34 29 21 32 26 27 ...
$ Edu : Factor w/ 5 levels "1","2","3","4",..: 3 5 5 2 3 4 4 2 2 4 ...
$ A1 : int 1 3 2 5 2 1 4 3 2 2 ...
$ A2 : int 6 5 5 4 4 6 4 5 6 2 ...
$ A3 : int 6 3 5 2 3 5 5 4 5 1 ...
$ A4 : int 6 5 4 6 4 3 4 NA 4 2 ...
$ A5 : int 6 5 5 4 4 5 5 6 4 3 ...
- attr(*, ".internal.selfref")=<externalptr>
Gender Age Edu A1 A2 A3 A4 A5
<fctr> <num> <fctr> <int> <int> <int> <int> <int>
1: Male 3 5 2 5 5 4 5
2: Male 17 <NA> 2 4 4 3 4
3: Male 26 2 2 6 5 4 4
4: Male 27 4 2 2 1 2 3
5: Male 65 2 5 4 2 6 4
6: Female 20 3 1 6 6 6 6
7: Female 21 4 4 4 5 4 5
8: Female 29 4 1 6 5 3 5
9: Female 32 2 3 5 4 NA 6
10: Female 34 3 2 4 3 4 4
11: Female 45 4 1 5 5 6 6
12: Female 56 5 3 5 3 5 5
Gender Age Edu A1 A2 A3 A4 A5
<fctr> <num> <fctr> <int> <int> <int> <int> <int>
1: Female 56 5 3 5 3 5 5
2: Female 45 4 1 5 5 6 6
3: Female 34 3 2 4 3 4 4
4: Female 32 2 3 5 4 NA 6
5: Female 29 4 1 6 5 3 5
6: Female 21 4 4 4 5 4 5
7: Female 20 3 1 6 6 6 6
8: Male 65 2 5 4 2 6 4
9: Male 27 4 2 2 1 2 3
10: Male 26 2 2 6 5 4 4
11: Male 17 <NA> 2 4 4 3 4
12: Male 3 5 2 5 5 4 5
Gender Age Edu A1 A2 A3 A4 A5
<fctr> <num> <fctr> <int> <int> <int> <int> <int>
1: Female 20 3 1 6 6 6 6
2: Male 17 <NA> 2 4 4 3 4
3: Male 3 5 2 5 5 4 5
(本小节进度:1/2)
Gender Age Edu A1 A2 A3 A4 A5
<fctr> <num> <fctr> <int> <int> <int> <int> <int>
1: Female 20 3 1 6 6 6 6
2: Male 3 5 2 5 5 4 5
Gender Age Edu A1 A2 A3 A4 A5
<fctr> <num> <fctr> <int> <int> <int> <int> <int>
1: Female 20 3 1 6 6 6 6
2: Female 56 5 3 5 3 5 5
3: Male 3 5 2 5 5 4 5
4: Male 65 2 5 4 2 6 4
5: Female 29 4 1 6 5 3 5
6: Female 32 2 3 5 4 NA 6
7: Male 27 4 2 2 1 2 3
8: Male 17 <NA> 2 4 4 3 4
Gender Age Edu A1 A2 A3 A4 A5
<fctr> <num> <fctr> <int> <int> <int> <int> <int>
1: Female 56 5 3 5 3 5 5
2: Female 45 4 1 5 5 6 6
3: Female 34 3 2 4 3 4 4
4: Female 32 2 3 5 4 NA 6
5: Male 65 2 5 4 2 6 4
6: Male 27 4 2 2 1 2 3
7: Male 17 <NA> 2 4 4 3 4
8: Male 3 5 2 5 5 4 5
(本小节进度:2/2)
参考前三小节代码(增删查改、行列筛选、排序去重),基于psych::bfi或期末自选数据,在自己电脑上自由练习数据基本操作,为【阶段作业①】做准备。
## 数据准备:大五人格问卷BFI(部分数据)
data = as.data.table(psych::bfi)
data[, let(
Gender = factor(gender, levels=1:2, labels=c("Male", "Female")),
Age = as.numeric(age),
Edu = as.factor(education)
)]
d = data[2535:2546, c("Gender", "Age", "Edu", paste0("A", 1:5))]
d Gender Age Edu A1 A2 A3 A4 A5
<fctr> <num> <fctr> <int> <int> <int> <int> <int>
1: Female 20 3 1 6 6 6 6
2: Female 56 5 3 5 3 5 5
3: Male 3 5 2 5 5 4 5
4: Male 65 2 5 4 2 6 4
5: Female 34 3 2 4 3 4 4
6: Female 29 4 1 6 5 3 5
7: Female 21 4 4 4 5 4 5
8: Female 32 2 3 5 4 NA 6
9: Male 26 2 2 6 5 4 4
10: Male 27 4 2 2 1 2 3
11: Female 45 4 1 5 5 6 6
12: Male 17 <NA> 2 4 4 3 4
Classes 'data.table' and 'data.frame': 12 obs. of 8 variables:
$ Gender: Factor w/ 2 levels "Male","Female": 2 2 1 1 2 2 2 2 1 1 ...
$ Age : num 20 56 3 65 34 29 21 32 26 27 ...
$ Edu : Factor w/ 5 levels "1","2","3","4",..: 3 5 5 2 3 4 4 2 2 4 ...
$ A1 : int 1 3 2 5 2 1 4 3 2 2 ...
$ A2 : int 6 5 5 4 4 6 4 5 6 2 ...
$ A3 : int 6 3 5 2 3 5 5 4 5 1 ...
$ A4 : int 6 5 4 6 4 3 4 NA 4 2 ...
$ A5 : int 6 5 5 4 4 5 5 6 4 3 ...
- attr(*, ".internal.selfref")=<externalptr>
DT[i, j, by]
by:定义分组变量(原始顺序)keyby:定义分组变量(重新排序) Gender N
<fctr> <int>
1: Female 7
2: Male 5
Key: <Gender>
Gender N
<fctr> <int>
1: Male 5
2: Female 7
Key: <Gender>
Gender N Mean.Age
<fctr> <int> <num>
1: Male 5 27.60
2: Female 7 33.86
Key: <Gender>
Gender N Mean.Age SD.Age
<fctr> <int> <num> <num>
1: Male 5 27.60 23.02
2: Female 7 33.86 12.90
Key: <Gender>
Gender N Mean.Age SD.Age
<fctr> <int> <num> <num>
1: Male 4 33.75 21.31
2: Female 7 33.86 12.90
## 全部数据的分组统计
# Education:
# 1 = high school
# 2 = finished high school
# 3 = some college
# 4 = college graduate
# 5 = graduate degree
data[, .(
.N,
Mean.Age = mean(Age)
), keyby=.(Gender, Edu)]Key: <Gender, Edu>
Gender Edu N Mean.Age
<fctr> <fctr> <int> <num>
1: Male <NA> 81 18.88
2: Male 1 93 25.15
3: Male 2 103 31.47
4: Male 3 356 25.39
5: Male 4 134 33.17
6: Male 5 152 33.95
7: Female <NA> 142 17.42
8: Female 1 131 25.12
9: Female 2 189 31.54
10: Female 3 893 27.96
11: Female 4 260 32.88
12: Female 5 266 36.08
(本小节进度:1/2)
Gender Age Edu A1 A2 A3 A4 A5 Mean.Age
<fctr> <num> <fctr> <int> <int> <int> <int> <int> <num>
1: Female 20 3 1 6 6 6 6 31.25
2: Female 56 5 3 5 3 5 5 31.25
3: Male 3 5 2 5 5 4 5 31.25
4: Male 65 2 5 4 2 6 4 31.25
5: Female 34 3 2 4 3 4 4 31.25
6: Female 29 4 1 6 5 3 5 31.25
7: Female 21 4 4 4 5 4 5 31.25
8: Female 32 2 3 5 4 NA 6 31.25
9: Male 26 2 2 6 5 4 4 31.25
10: Male 27 4 2 2 1 2 3 31.25
11: Female 45 4 1 5 5 6 6 31.25
12: Male 17 <NA> 2 4 4 3 4 31.25
Key: <Gender>
Gender Age Edu A1 A2 A3 A4 A5 Mean.Age Mean.Age.G
<fctr> <num> <fctr> <int> <int> <int> <int> <int> <num> <num>
1: Male 3 5 2 5 5 4 5 31.25 27.60
2: Male 65 2 5 4 2 6 4 31.25 27.60
3: Male 26 2 2 6 5 4 4 31.25 27.60
4: Male 27 4 2 2 1 2 3 31.25 27.60
5: Male 17 <NA> 2 4 4 3 4 31.25 27.60
6: Female 20 3 1 6 6 6 6 31.25 33.86
7: Female 56 5 3 5 3 5 5 31.25 33.86
8: Female 34 3 2 4 3 4 4 31.25 33.86
9: Female 29 4 1 6 5 3 5 31.25 33.86
10: Female 21 4 4 4 5 4 5 31.25 33.86
11: Female 32 2 3 5 4 NA 6 31.25 33.86
12: Female 45 4 1 5 5 6 6 31.25 33.86
Key: <Gender>
Gender Age Edu A1 A2 A3 A4 A5 Mean.Age Mean.Age.G
<fctr> <num> <fctr> <int> <int> <int> <int> <int> <num> <num>
1: Male 3 5 2 5 5 4 5 31.25 27.60
2: Male 65 2 5 4 2 6 4 31.25 27.60
3: Male 26 2 2 6 5 4 4 31.25 27.60
4: Male 27 4 2 2 1 2 3 31.25 27.60
5: Male 17 <NA> 2 4 4 3 4 31.25 27.60
6: Female 20 3 1 6 6 6 6 31.25 33.86
7: Female 56 5 3 5 3 5 5 31.25 33.86
8: Female 34 3 2 4 3 4 4 31.25 33.86
9: Female 29 4 1 6 5 3 5 31.25 33.86
10: Female 21 4 4 4 5 4 5 31.25 33.86
11: Female 32 2 3 5 4 NA 6 31.25 33.86
12: Female 45 4 1 5 5 6 6 31.25 33.86
AgeGroup
<char>
1: Young
2: Middle
3: Young
4: Young
5: Young
6: <NA>
7: <NA>
8: <NA>
9: <NA>
10: <NA>
11: <NA>
12: <NA>
Key: <Gender>
Gender Age Mean.Age Age.Grand.C
<fctr> <num> <num> <num>
1: Male 3 31.25 -28.25
2: Male 65 31.25 33.75
3: Male 26 31.25 -5.25
4: Male 27 31.25 -4.25
5: Male 17 31.25 -14.25
6: Female 20 31.25 -11.25
7: Female 56 31.25 24.75
8: Female 34 31.25 2.75
9: Female 29 31.25 -2.25
10: Female 21 31.25 -10.25
11: Female 32 31.25 0.75
12: Female 45 31.25 13.75
## 组均值中心化
d[, Age.Group.C := Age - mean(Age), keyby=Gender]
d[, .(Gender, Age, Mean.Age, Mean.Age.G, Age.Group.C)]Key: <Gender>
Gender Age Mean.Age Mean.Age.G Age.Group.C
<fctr> <num> <num> <num> <num>
1: Male 3 31.25 27.60 -24.6000
2: Male 65 31.25 27.60 37.4000
3: Male 26 31.25 27.60 -1.6000
4: Male 27 31.25 27.60 -0.6000
5: Male 17 31.25 27.60 -10.6000
6: Female 20 31.25 33.86 -13.8571
7: Female 56 31.25 33.86 22.1429
8: Female 34 31.25 33.86 0.1429
9: Female 29 31.25 33.86 -4.8571
10: Female 21 31.25 33.86 -12.8571
11: Female 32 31.25 33.86 -1.8571
12: Female 45 31.25 33.86 11.1429
两个数据d1和d2具有相同变量"var":
left_join(d1, d2, by="var"):向d1合并⭐️(最常用)right_join(d1, d2, by="var"):向d2合并inner_join(d1, d2, by="var"):取d1和d2交集合并full_join(d1, d2, by="var"):取d1和d2并集合并(data.table也能完成匹配拼接操作,但比dplyr难用)
name band
<char> <char>
1: Mick Stones
2: John Beatles
3: Paul Beatles
name plays
<char> <char>
1: John guitar
2: Paul bass
3: Keith guitar
artist plays
<char> <char>
1: John guitar
2: Paul bass
3: Keith guitar
name band plays
<char> <char> <char>
1: Mick Stones <NA>
2: John Beatles guitar
3: Paul Beatles bass
name band plays
<char> <char> <char>
1: John Beatles guitar
2: Paul Beatles bass
3: Keith <NA> guitar
name band plays
<char> <char> <char>
1: John Beatles guitar
2: Paul Beatles bass
name band plays
<char> <char> <char>
1: Mick Stones <NA>
2: John Beatles guitar
3: Paul Beatles bass
4: Keith <NA> guitar
[1] "name" "band"
[1] "artist" "plays"
name band plays
<char> <char> <char>
1: Mick Stones <NA>
2: John Beatles guitar
3: Paul Beatles bass
(本小节进度:1/2)
x city
<num> <int>
1: -0.6265 1
2: 0.1836 1
3: -0.8356 1
4: 1.5953 2
5: 0.3295 2
6: -0.8205 2
7: 0.4874 3
8: 0.7383 3
9: 0.5758 3
city label GDP
<int> <char> <num>
1: 1 北京 521
2: 2 上海 567
3: 3 广州 310
x city label GDP
<num> <int> <char> <num>
1: -0.6265 1 北京 521
2: 0.1836 1 北京 521
3: -0.8356 1 北京 521
4: 1.5953 2 上海 567
5: 0.3295 2 上海 567
6: -0.8205 2 上海 567
7: 0.4874 3 广州 310
8: 0.7383 3 广州 310
9: 0.5758 3 广州 310
(本小节进度:2/2)
ggplot2等工具进行数据可视化,也便于进行分组、聚合和复杂变换,数据库存储也常采用类似结构 ID A1 A2 A3 A4
1 S1 3 4 8 9
2 S2 6 6 9 8
3 S3 4 4 8 8
4 S4 3 2 7 7
5 S5 5 4 5 12
6 S6 7 5 6 13
7 S7 5 3 7 12
8 S8 2 3 6 11
d.long.1 = pivot_longer(
data = within.1,
cols = A1:A4,
names_to = "Condition",
values_to = "Y"
) %>% as.data.table()
d.long.1 # 长数据 ID Condition Y
<char> <char> <num>
1: S1 A1 3
2: S1 A2 4
3: S1 A3 8
4: S1 A4 9
5: S2 A1 6
6: S2 A2 6
7: S2 A3 9
8: S2 A4 8
9: S3 A1 4
10: S3 A2 4
11: S3 A3 8
12: S3 A4 8
13: S4 A1 3
14: S4 A2 2
15: S4 A3 7
16: S4 A4 7
17: S5 A1 5
18: S5 A2 4
19: S5 A3 5
20: S5 A4 12
21: S6 A1 7
22: S6 A2 5
23: S6 A3 6
24: S6 A4 13
25: S7 A1 5
26: S7 A2 3
27: S7 A3 7
28: S7 A4 12
29: S8 A1 2
30: S8 A2 3
31: S8 A3 6
32: S8 A4 11
ID Condition Y
<char> <char> <num>
ID A1B1 A1B2 A1B3 A2B1 A2B2 A2B3
1 S1 3 4 5 4 8 12
2 S2 6 6 7 5 9 13
3 S3 4 4 5 3 8 12
4 S4 3 2 2 3 7 11
d.long.2 = pivot_longer(
data = within.2,
cols = A1B1:A2B3,
names_to = c("Cond.A", "Cond.B"),
names_pattern = "A(.)B(.)",
values_to = "Y"
) %>% as.data.table()
d.long.2 # 长数据 ID Cond.A Cond.B Y
<char> <char> <char> <num>
1: S1 1 1 3
2: S1 1 2 4
3: S1 1 3 5
4: S1 2 1 4
5: S1 2 2 8
6: S1 2 3 12
7: S2 1 1 6
8: S2 1 2 6
9: S2 1 3 7
10: S2 2 1 5
11: S2 2 2 9
12: S2 2 3 13
13: S3 1 1 4
14: S3 1 2 4
15: S3 1 3 5
16: S3 2 1 3
17: S3 2 2 8
18: S3 2 3 12
19: S4 1 1 3
20: S4 1 2 2
21: S4 1 3 2
22: S4 2 1 3
23: S4 2 2 7
24: S4 2 3 11
ID Cond.A Cond.B Y
<char> <char> <char> <num>
(本小节进度:1/3)
ID Condition Y
<char> <char> <num>
1: S1 A1 3
2: S1 A2 4
3: S1 A3 8
4: S1 A4 9
5: S2 A1 6
6: S2 A2 6
7: S2 A3 9
8: S2 A4 8
9: S3 A1 4
10: S3 A2 4
11: S3 A3 8
12: S3 A4 8
13: S4 A1 3
14: S4 A2 2
15: S4 A3 7
16: S4 A4 7
17: S5 A1 5
18: S5 A2 4
19: S5 A3 5
20: S5 A4 12
21: S6 A1 7
22: S6 A2 5
23: S6 A3 6
24: S6 A4 13
25: S7 A1 5
26: S7 A2 3
27: S7 A3 7
28: S7 A4 12
29: S8 A1 2
30: S8 A2 3
31: S8 A3 6
32: S8 A4 11
ID Condition Y
<char> <char> <num>
d.wide.1 = pivot_wider(
data = d.long.1,
id_cols = ID,
names_from = "Condition",
values_from = "Y"
) %>% as.data.table()
d.wide.1 # 宽数据 ID A1 A2 A3 A4
<char> <num> <num> <num> <num>
1: S1 3 4 8 9
2: S2 6 6 9 8
3: S3 4 4 8 8
4: S4 3 2 7 7
5: S5 5 4 5 12
6: S6 7 5 6 13
7: S7 5 3 7 12
8: S8 2 3 6 11
ID Cond.A Cond.B Y
<char> <char> <char> <num>
1: S1 1 1 3
2: S1 1 2 4
3: S1 1 3 5
4: S1 2 1 4
5: S1 2 2 8
6: S1 2 3 12
7: S2 1 1 6
8: S2 1 2 6
9: S2 1 3 7
10: S2 2 1 5
11: S2 2 2 9
12: S2 2 3 13
13: S3 1 1 4
14: S3 1 2 4
15: S3 1 3 5
16: S3 2 1 3
17: S3 2 2 8
18: S3 2 3 12
19: S4 1 1 3
20: S4 1 2 2
21: S4 1 3 2
22: S4 2 1 3
23: S4 2 2 7
24: S4 2 3 11
ID Cond.A Cond.B Y
<char> <char> <char> <num>
d.wide.2 = pivot_wider(
data = d.long.2,
id_cols = ID,
names_from = c("Cond.A", "Cond.B"),
names_prefix = "A",
names_sep = "_B",
values_from = "Y"
) %>% as.data.table()
d.wide.2 # 宽数据 ID A1_B1 A1_B2 A1_B3 A2_B1 A2_B2 A2_B3
<char> <num> <num> <num> <num> <num> <num>
1: S1 3 4 5 4 8 12
2: S2 6 6 7 5 9 13
3: S3 4 4 5 3 8 12
4: S4 3 2 2 3 7 11
(本小节进度:2/3)
ID A1_B1 A1_B2 A1_B3 A2_B1 A2_B2 A2_B3
<char> <num> <num> <num> <num> <num> <num>
1: S1 3 4 5 4 8 12
2: S2 6 6 7 5 9 13
3: S3 4 4 5 3 8 12
4: S4 3 2 2 3 7 11
d.long.3 = pivot_longer(
data = d.wide.2,
cols = starts_with("A"),
names_to = c("VariableA", ".value"),
names_pattern = "A(.)_(.+)"
) %>% as.data.table()
d.long.3 # A变长,B保持宽 ID VariableA B1 B2 B3
<char> <char> <num> <num> <num>
1: S1 1 3 4 5
2: S1 2 4 8 12
3: S2 1 6 6 7
4: S2 2 5 9 13
5: S3 1 4 4 5
6: S3 2 3 8 12
7: S4 1 3 2 2
8: S4 2 3 7 11
d.wide.3 = pivot_wider(
data = d.long.3,
names_from = "VariableA",
names_glue = "A{VariableA}{.value}",
values_from = c("B1", "B2", "B3")
) %>% as.data.table()
d.wide.3 ID A1B1 A2B1 A1B2 A2B2 A1B3 A2B3
<char> <num> <num> <num> <num> <num> <num>
1: S1 3 4 4 8 5 12
2: S2 6 5 6 9 7 13
3: S3 4 3 4 8 5 12
4: S4 3 3 2 7 2 11
(本小节进度:3/3)
作业要求:
平台提交:
学号-姓名-R阶段作业1.html