Extracts one or more variable subsets from a CorrCombo
object as data frames.
Typically used after corrSelect
or MatSelect
to obtain filtered
versions of the original dataset containing only low‐correlation variable combinations.
Arguments
- res
A
CorrCombo
object returned bycorrSelect
orMatSelect
.- df
A data frame or matrix. Must contain all variables listed in
res@names
. Columns not inres@names
are ignored unlesskeepExtra = TRUE
.- which
Subsets to extract. One of:
"best"
(default) or1
: the top‐ranked subset.A single integer (e.g.
2
): the nth ranked subset.A vector of integers (e.g.
1:3
): multiple subsets."all"
: all available subsets.
Subsets are ranked by decreasing size, then increasing average correlation.
- keepExtra
Logical. If
TRUE
, columns indf
not inres@names
(e.g., factors, characters) are retained. Defaults toFALSE
.
Value
A data frame if a single subset is extracted, or a list of data frames if multiple subsets are extracted. Each data frame contains the selected variables (and optionally extras).
Examples
# Simulate input data
set.seed(123)
df <- as.data.frame(matrix(rnorm(100), nrow = 10))
colnames(df) <- paste0("V", 1:10)
# Compute correlation matrix
cmat <- cor(df)
# Select subsets using corrSelect
res <- corrSelect(cmat, threshold = 0.5)
# Extract the best subset (default)
corrSubset(res, df)
#> V2 V5 V10 V7
#> 1 1.2240818 -0.69470698 0.9935039 0.37963948
#> 2 0.3598138 -0.20791728 0.5483970 -0.50232345
#> 3 0.4007715 -1.26539635 0.2387317 -0.33320738
#> 4 0.1106827 2.16895597 -0.6279061 -1.01857538
#> 5 -0.5558411 1.20796200 1.3606524 -1.07179123
#> 6 1.7869131 -1.12310858 -0.6002596 0.30352864
#> 7 0.4978505 -0.40288484 2.1873330 0.44820978
#> 8 -1.9666172 -0.46665535 1.5326106 0.05300423
#> 9 0.7013559 0.77996512 -0.2357004 0.92226747
#> 10 -0.4727914 -0.08336907 -1.0264209 2.05008469
# Extract the second-best subset
corrSubset(res, df, which = 2)
#> V1 V5 V10 V7
#> 1 -0.56047565 -0.69470698 0.9935039 0.37963948
#> 2 -0.23017749 -0.20791728 0.5483970 -0.50232345
#> 3 1.55870831 -1.26539635 0.2387317 -0.33320738
#> 4 0.07050839 2.16895597 -0.6279061 -1.01857538
#> 5 0.12928774 1.20796200 1.3606524 -1.07179123
#> 6 1.71506499 -1.12310858 -0.6002596 0.30352864
#> 7 0.46091621 -0.40288484 2.1873330 0.44820978
#> 8 -1.26506123 -0.46665535 1.5326106 0.05300423
#> 9 -0.68685285 0.77996512 -0.2357004 0.92226747
#> 10 -0.44566197 -0.08336907 -1.0264209 2.05008469
# Extract the first three subsets
corrSubset(res, df, which = 1:3)
#> $Subset1
#> V2 V5 V10 V7
#> 1 1.2240818 -0.69470698 0.9935039 0.37963948
#> 2 0.3598138 -0.20791728 0.5483970 -0.50232345
#> 3 0.4007715 -1.26539635 0.2387317 -0.33320738
#> 4 0.1106827 2.16895597 -0.6279061 -1.01857538
#> 5 -0.5558411 1.20796200 1.3606524 -1.07179123
#> 6 1.7869131 -1.12310858 -0.6002596 0.30352864
#> 7 0.4978505 -0.40288484 2.1873330 0.44820978
#> 8 -1.9666172 -0.46665535 1.5326106 0.05300423
#> 9 0.7013559 0.77996512 -0.2357004 0.92226747
#> 10 -0.4727914 -0.08336907 -1.0264209 2.05008469
#>
#> $Subset2
#> V1 V5 V10 V7
#> 1 -0.56047565 -0.69470698 0.9935039 0.37963948
#> 2 -0.23017749 -0.20791728 0.5483970 -0.50232345
#> 3 1.55870831 -1.26539635 0.2387317 -0.33320738
#> 4 0.07050839 2.16895597 -0.6279061 -1.01857538
#> 5 0.12928774 1.20796200 1.3606524 -1.07179123
#> 6 1.71506499 -1.12310858 -0.6002596 0.30352864
#> 7 0.46091621 -0.40288484 2.1873330 0.44820978
#> 8 -1.26506123 -0.46665535 1.5326106 0.05300423
#> 9 -0.68685285 0.77996512 -0.2357004 0.92226747
#> 10 -0.44566197 -0.08336907 -1.0264209 2.05008469
#>
#> $Subset3
#> V2 V5 V6 V7
#> 1 1.2240818 -0.69470698 0.25331851 0.37963948
#> 2 0.3598138 -0.20791728 -0.02854676 -0.50232345
#> 3 0.4007715 -1.26539635 -0.04287046 -0.33320738
#> 4 0.1106827 2.16895597 1.36860228 -1.01857538
#> 5 -0.5558411 1.20796200 -0.22577099 -1.07179123
#> 6 1.7869131 -1.12310858 1.51647060 0.30352864
#> 7 0.4978505 -0.40288484 -1.54875280 0.44820978
#> 8 -1.9666172 -0.46665535 0.58461375 0.05300423
#> 9 0.7013559 0.77996512 0.12385424 0.92226747
#> 10 -0.4727914 -0.08336907 0.21594157 2.05008469
#>
# Extract all subsets
corrSubset(res, df, which = "all")
#> $Subset1
#> V2 V5 V10 V7
#> 1 1.2240818 -0.69470698 0.9935039 0.37963948
#> 2 0.3598138 -0.20791728 0.5483970 -0.50232345
#> 3 0.4007715 -1.26539635 0.2387317 -0.33320738
#> 4 0.1106827 2.16895597 -0.6279061 -1.01857538
#> 5 -0.5558411 1.20796200 1.3606524 -1.07179123
#> 6 1.7869131 -1.12310858 -0.6002596 0.30352864
#> 7 0.4978505 -0.40288484 2.1873330 0.44820978
#> 8 -1.9666172 -0.46665535 1.5326106 0.05300423
#> 9 0.7013559 0.77996512 -0.2357004 0.92226747
#> 10 -0.4727914 -0.08336907 -1.0264209 2.05008469
#>
#> $Subset2
#> V1 V5 V10 V7
#> 1 -0.56047565 -0.69470698 0.9935039 0.37963948
#> 2 -0.23017749 -0.20791728 0.5483970 -0.50232345
#> 3 1.55870831 -1.26539635 0.2387317 -0.33320738
#> 4 0.07050839 2.16895597 -0.6279061 -1.01857538
#> 5 0.12928774 1.20796200 1.3606524 -1.07179123
#> 6 1.71506499 -1.12310858 -0.6002596 0.30352864
#> 7 0.46091621 -0.40288484 2.1873330 0.44820978
#> 8 -1.26506123 -0.46665535 1.5326106 0.05300423
#> 9 -0.68685285 0.77996512 -0.2357004 0.92226747
#> 10 -0.44566197 -0.08336907 -1.0264209 2.05008469
#>
#> $Subset3
#> V2 V5 V6 V7
#> 1 1.2240818 -0.69470698 0.25331851 0.37963948
#> 2 0.3598138 -0.20791728 -0.02854676 -0.50232345
#> 3 0.4007715 -1.26539635 -0.04287046 -0.33320738
#> 4 0.1106827 2.16895597 1.36860228 -1.01857538
#> 5 -0.5558411 1.20796200 -0.22577099 -1.07179123
#> 6 1.7869131 -1.12310858 1.51647060 0.30352864
#> 7 0.4978505 -0.40288484 -1.54875280 0.44820978
#> 8 -1.9666172 -0.46665535 0.58461375 0.05300423
#> 9 0.7013559 0.77996512 0.12385424 0.92226747
#> 10 -0.4727914 -0.08336907 0.21594157 2.05008469
#>
#> $Subset4
#> V1 V5 V6 V7
#> 1 -0.56047565 -0.69470698 0.25331851 0.37963948
#> 2 -0.23017749 -0.20791728 -0.02854676 -0.50232345
#> 3 1.55870831 -1.26539635 -0.04287046 -0.33320738
#> 4 0.07050839 2.16895597 1.36860228 -1.01857538
#> 5 0.12928774 1.20796200 -0.22577099 -1.07179123
#> 6 1.71506499 -1.12310858 1.51647060 0.30352864
#> 7 0.46091621 -0.40288484 -1.54875280 0.44820978
#> 8 -1.26506123 -0.46665535 0.58461375 0.05300423
#> 9 -0.68685285 0.77996512 0.12385424 0.92226747
#> 10 -0.44566197 -0.08336907 0.21594157 2.05008469
#>
#> $Subset5
#> V4 V5 V10
#> 1 0.42646422 -0.69470698 0.9935039
#> 2 -0.29507148 -0.20791728 0.5483970
#> 3 0.89512566 -1.26539635 0.2387317
#> 4 0.87813349 2.16895597 -0.6279061
#> 5 0.82158108 1.20796200 1.3606524
#> 6 0.68864025 -1.12310858 -0.6002596
#> 7 0.55391765 -0.40288484 2.1873330
#> 8 -0.06191171 -0.46665535 1.5326106
#> 9 -0.30596266 0.77996512 -0.2357004
#> 10 -0.38047100 -0.08336907 -1.0264209
#>
#> $Subset6
#> V9 V5 V10
#> 1 0.005764186 -0.69470698 0.9935039
#> 2 0.385280401 -0.20791728 0.5483970
#> 3 -0.370660032 -1.26539635 0.2387317
#> 4 0.644376549 2.16895597 -0.6279061
#> 5 -0.220486562 1.20796200 1.3606524
#> 6 0.331781964 -1.12310858 -0.6002596
#> 7 1.096839013 -0.40288484 2.1873330
#> 8 0.435181491 -0.46665535 1.5326106
#> 9 -0.325931586 0.77996512 -0.2357004
#> 10 1.148807618 -0.08336907 -1.0264209
#>
#> $Subset7
#> V3 V5 V10
#> 1 -1.0678237 -0.69470698 0.9935039
#> 2 -0.2179749 -0.20791728 0.5483970
#> 3 -1.0260044 -1.26539635 0.2387317
#> 4 -0.7288912 2.16895597 -0.6279061
#> 5 -0.6250393 1.20796200 1.3606524
#> 6 -1.6866933 -1.12310858 -0.6002596
#> 7 0.8377870 -0.40288484 2.1873330
#> 8 0.1533731 -0.46665535 1.5326106
#> 9 -1.1381369 0.77996512 -0.2357004
#> 10 1.2538149 -0.08336907 -1.0264209
#>
#> $Subset8
#> V4 V5 V6
#> 1 0.42646422 -0.69470698 0.25331851
#> 2 -0.29507148 -0.20791728 -0.02854676
#> 3 0.89512566 -1.26539635 -0.04287046
#> 4 0.87813349 2.16895597 1.36860228
#> 5 0.82158108 1.20796200 -0.22577099
#> 6 0.68864025 -1.12310858 1.51647060
#> 7 0.55391765 -0.40288484 -1.54875280
#> 8 -0.06191171 -0.46665535 0.58461375
#> 9 -0.30596266 0.77996512 0.12385424
#> 10 -0.38047100 -0.08336907 0.21594157
#>
#> $Subset9
#> V8 V6 V7
#> 1 -0.4910312 0.25331851 0.37963948
#> 2 -2.3091689 -0.02854676 -0.50232345
#> 3 1.0057385 -0.04287046 -0.33320738
#> 4 -0.7092008 1.36860228 -1.01857538
#> 5 -0.6880086 -0.22577099 -1.07179123
#> 6 1.0255714 1.51647060 0.30352864
#> 7 -0.2847730 -1.54875280 0.44820978
#> 8 -1.2207177 0.58461375 0.05300423
#> 9 0.1813035 0.12385424 0.92226747
#> 10 -0.1388914 0.21594157 2.05008469
#>
#> $Subset10
#> V9 V5 V6
#> 1 0.005764186 -0.69470698 0.25331851
#> 2 0.385280401 -0.20791728 -0.02854676
#> 3 -0.370660032 -1.26539635 -0.04287046
#> 4 0.644376549 2.16895597 1.36860228
#> 5 -0.220486562 1.20796200 -0.22577099
#> 6 0.331781964 -1.12310858 1.51647060
#> 7 1.096839013 -0.40288484 -1.54875280
#> 8 0.435181491 -0.46665535 0.58461375
#> 9 -0.325931586 0.77996512 0.12385424
#> 10 1.148807618 -0.08336907 0.21594157
#>
# Extract best subset and retain additional numeric column
df$CopyV1 <- df$V1
corrSubset(res, df, which = 1, keepExtra = TRUE)
#> V2 V5 V10 V7 CopyV1
#> 1 1.2240818 -0.69470698 0.9935039 0.37963948 -0.56047565
#> 2 0.3598138 -0.20791728 0.5483970 -0.50232345 -0.23017749
#> 3 0.4007715 -1.26539635 0.2387317 -0.33320738 1.55870831
#> 4 0.1106827 2.16895597 -0.6279061 -1.01857538 0.07050839
#> 5 -0.5558411 1.20796200 1.3606524 -1.07179123 0.12928774
#> 6 1.7869131 -1.12310858 -0.6002596 0.30352864 1.71506499
#> 7 0.4978505 -0.40288484 2.1873330 0.44820978 0.46091621
#> 8 -1.9666172 -0.46665535 1.5326106 0.05300423 -1.26506123
#> 9 0.7013559 0.77996512 -0.2357004 0.92226747 -0.68685285
#> 10 -0.4727914 -0.08336907 -1.0264209 2.05008469 -0.44566197