Skip to contents

Extracts one or more variable subsets from a CorrCombo object as data frames. Typically used after corrSelect or MatSelect to obtain filtered versions of the original dataset containing only low‐correlation variable combinations.

Usage

corrSubset(res, df, which = "best", keepExtra = FALSE)

Arguments

res

A CorrCombo object returned by corrSelect or MatSelect.

df

A data frame or matrix. Must contain all variables listed in res@names. Columns not in res@names are ignored unless keepExtra = TRUE.

which

Subsets to extract. One of:

  • "best" (default) or 1: the top‐ranked subset.

  • A single integer (e.g. 2): the nth ranked subset.

  • A vector of integers (e.g. 1:3): multiple subsets.

  • "all": all available subsets.

Subsets are ranked by decreasing size, then increasing average correlation.

keepExtra

Logical. If TRUE, columns in df not in res@names (e.g., factors, characters) are retained. Defaults to FALSE.

Value

A data frame if a single subset is extracted, or a list of data frames if multiple subsets are extracted. Each data frame contains the selected variables (and optionally extras).

Note

A warning is issued if any rows contain missing values in the selected variables.

Examples

# Simulate input data
set.seed(123)
df <- as.data.frame(matrix(rnorm(100), nrow = 10))
colnames(df) <- paste0("V", 1:10)

# Compute correlation matrix
cmat <- cor(df)

# Select subsets using corrSelect
res <- corrSelect(cmat, threshold = 0.5)

# Extract the best subset (default)
corrSubset(res, df)
#>            V2          V5        V10          V7
#> 1   1.2240818 -0.69470698  0.9935039  0.37963948
#> 2   0.3598138 -0.20791728  0.5483970 -0.50232345
#> 3   0.4007715 -1.26539635  0.2387317 -0.33320738
#> 4   0.1106827  2.16895597 -0.6279061 -1.01857538
#> 5  -0.5558411  1.20796200  1.3606524 -1.07179123
#> 6   1.7869131 -1.12310858 -0.6002596  0.30352864
#> 7   0.4978505 -0.40288484  2.1873330  0.44820978
#> 8  -1.9666172 -0.46665535  1.5326106  0.05300423
#> 9   0.7013559  0.77996512 -0.2357004  0.92226747
#> 10 -0.4727914 -0.08336907 -1.0264209  2.05008469

# Extract the second-best subset
corrSubset(res, df, which = 2)
#>             V1          V5        V10          V7
#> 1  -0.56047565 -0.69470698  0.9935039  0.37963948
#> 2  -0.23017749 -0.20791728  0.5483970 -0.50232345
#> 3   1.55870831 -1.26539635  0.2387317 -0.33320738
#> 4   0.07050839  2.16895597 -0.6279061 -1.01857538
#> 5   0.12928774  1.20796200  1.3606524 -1.07179123
#> 6   1.71506499 -1.12310858 -0.6002596  0.30352864
#> 7   0.46091621 -0.40288484  2.1873330  0.44820978
#> 8  -1.26506123 -0.46665535  1.5326106  0.05300423
#> 9  -0.68685285  0.77996512 -0.2357004  0.92226747
#> 10 -0.44566197 -0.08336907 -1.0264209  2.05008469

# Extract the first three subsets
corrSubset(res, df, which = 1:3)
#> $Subset1
#>            V2          V5        V10          V7
#> 1   1.2240818 -0.69470698  0.9935039  0.37963948
#> 2   0.3598138 -0.20791728  0.5483970 -0.50232345
#> 3   0.4007715 -1.26539635  0.2387317 -0.33320738
#> 4   0.1106827  2.16895597 -0.6279061 -1.01857538
#> 5  -0.5558411  1.20796200  1.3606524 -1.07179123
#> 6   1.7869131 -1.12310858 -0.6002596  0.30352864
#> 7   0.4978505 -0.40288484  2.1873330  0.44820978
#> 8  -1.9666172 -0.46665535  1.5326106  0.05300423
#> 9   0.7013559  0.77996512 -0.2357004  0.92226747
#> 10 -0.4727914 -0.08336907 -1.0264209  2.05008469
#> 
#> $Subset2
#>             V1          V5        V10          V7
#> 1  -0.56047565 -0.69470698  0.9935039  0.37963948
#> 2  -0.23017749 -0.20791728  0.5483970 -0.50232345
#> 3   1.55870831 -1.26539635  0.2387317 -0.33320738
#> 4   0.07050839  2.16895597 -0.6279061 -1.01857538
#> 5   0.12928774  1.20796200  1.3606524 -1.07179123
#> 6   1.71506499 -1.12310858 -0.6002596  0.30352864
#> 7   0.46091621 -0.40288484  2.1873330  0.44820978
#> 8  -1.26506123 -0.46665535  1.5326106  0.05300423
#> 9  -0.68685285  0.77996512 -0.2357004  0.92226747
#> 10 -0.44566197 -0.08336907 -1.0264209  2.05008469
#> 
#> $Subset3
#>            V2          V5          V6          V7
#> 1   1.2240818 -0.69470698  0.25331851  0.37963948
#> 2   0.3598138 -0.20791728 -0.02854676 -0.50232345
#> 3   0.4007715 -1.26539635 -0.04287046 -0.33320738
#> 4   0.1106827  2.16895597  1.36860228 -1.01857538
#> 5  -0.5558411  1.20796200 -0.22577099 -1.07179123
#> 6   1.7869131 -1.12310858  1.51647060  0.30352864
#> 7   0.4978505 -0.40288484 -1.54875280  0.44820978
#> 8  -1.9666172 -0.46665535  0.58461375  0.05300423
#> 9   0.7013559  0.77996512  0.12385424  0.92226747
#> 10 -0.4727914 -0.08336907  0.21594157  2.05008469
#> 

# Extract all subsets
corrSubset(res, df, which = "all")
#> $Subset1
#>            V2          V5        V10          V7
#> 1   1.2240818 -0.69470698  0.9935039  0.37963948
#> 2   0.3598138 -0.20791728  0.5483970 -0.50232345
#> 3   0.4007715 -1.26539635  0.2387317 -0.33320738
#> 4   0.1106827  2.16895597 -0.6279061 -1.01857538
#> 5  -0.5558411  1.20796200  1.3606524 -1.07179123
#> 6   1.7869131 -1.12310858 -0.6002596  0.30352864
#> 7   0.4978505 -0.40288484  2.1873330  0.44820978
#> 8  -1.9666172 -0.46665535  1.5326106  0.05300423
#> 9   0.7013559  0.77996512 -0.2357004  0.92226747
#> 10 -0.4727914 -0.08336907 -1.0264209  2.05008469
#> 
#> $Subset2
#>             V1          V5        V10          V7
#> 1  -0.56047565 -0.69470698  0.9935039  0.37963948
#> 2  -0.23017749 -0.20791728  0.5483970 -0.50232345
#> 3   1.55870831 -1.26539635  0.2387317 -0.33320738
#> 4   0.07050839  2.16895597 -0.6279061 -1.01857538
#> 5   0.12928774  1.20796200  1.3606524 -1.07179123
#> 6   1.71506499 -1.12310858 -0.6002596  0.30352864
#> 7   0.46091621 -0.40288484  2.1873330  0.44820978
#> 8  -1.26506123 -0.46665535  1.5326106  0.05300423
#> 9  -0.68685285  0.77996512 -0.2357004  0.92226747
#> 10 -0.44566197 -0.08336907 -1.0264209  2.05008469
#> 
#> $Subset3
#>            V2          V5          V6          V7
#> 1   1.2240818 -0.69470698  0.25331851  0.37963948
#> 2   0.3598138 -0.20791728 -0.02854676 -0.50232345
#> 3   0.4007715 -1.26539635 -0.04287046 -0.33320738
#> 4   0.1106827  2.16895597  1.36860228 -1.01857538
#> 5  -0.5558411  1.20796200 -0.22577099 -1.07179123
#> 6   1.7869131 -1.12310858  1.51647060  0.30352864
#> 7   0.4978505 -0.40288484 -1.54875280  0.44820978
#> 8  -1.9666172 -0.46665535  0.58461375  0.05300423
#> 9   0.7013559  0.77996512  0.12385424  0.92226747
#> 10 -0.4727914 -0.08336907  0.21594157  2.05008469
#> 
#> $Subset4
#>             V1          V5          V6          V7
#> 1  -0.56047565 -0.69470698  0.25331851  0.37963948
#> 2  -0.23017749 -0.20791728 -0.02854676 -0.50232345
#> 3   1.55870831 -1.26539635 -0.04287046 -0.33320738
#> 4   0.07050839  2.16895597  1.36860228 -1.01857538
#> 5   0.12928774  1.20796200 -0.22577099 -1.07179123
#> 6   1.71506499 -1.12310858  1.51647060  0.30352864
#> 7   0.46091621 -0.40288484 -1.54875280  0.44820978
#> 8  -1.26506123 -0.46665535  0.58461375  0.05300423
#> 9  -0.68685285  0.77996512  0.12385424  0.92226747
#> 10 -0.44566197 -0.08336907  0.21594157  2.05008469
#> 
#> $Subset5
#>             V4          V5        V10
#> 1   0.42646422 -0.69470698  0.9935039
#> 2  -0.29507148 -0.20791728  0.5483970
#> 3   0.89512566 -1.26539635  0.2387317
#> 4   0.87813349  2.16895597 -0.6279061
#> 5   0.82158108  1.20796200  1.3606524
#> 6   0.68864025 -1.12310858 -0.6002596
#> 7   0.55391765 -0.40288484  2.1873330
#> 8  -0.06191171 -0.46665535  1.5326106
#> 9  -0.30596266  0.77996512 -0.2357004
#> 10 -0.38047100 -0.08336907 -1.0264209
#> 
#> $Subset6
#>              V9          V5        V10
#> 1   0.005764186 -0.69470698  0.9935039
#> 2   0.385280401 -0.20791728  0.5483970
#> 3  -0.370660032 -1.26539635  0.2387317
#> 4   0.644376549  2.16895597 -0.6279061
#> 5  -0.220486562  1.20796200  1.3606524
#> 6   0.331781964 -1.12310858 -0.6002596
#> 7   1.096839013 -0.40288484  2.1873330
#> 8   0.435181491 -0.46665535  1.5326106
#> 9  -0.325931586  0.77996512 -0.2357004
#> 10  1.148807618 -0.08336907 -1.0264209
#> 
#> $Subset7
#>            V3          V5        V10
#> 1  -1.0678237 -0.69470698  0.9935039
#> 2  -0.2179749 -0.20791728  0.5483970
#> 3  -1.0260044 -1.26539635  0.2387317
#> 4  -0.7288912  2.16895597 -0.6279061
#> 5  -0.6250393  1.20796200  1.3606524
#> 6  -1.6866933 -1.12310858 -0.6002596
#> 7   0.8377870 -0.40288484  2.1873330
#> 8   0.1533731 -0.46665535  1.5326106
#> 9  -1.1381369  0.77996512 -0.2357004
#> 10  1.2538149 -0.08336907 -1.0264209
#> 
#> $Subset8
#>             V4          V5          V6
#> 1   0.42646422 -0.69470698  0.25331851
#> 2  -0.29507148 -0.20791728 -0.02854676
#> 3   0.89512566 -1.26539635 -0.04287046
#> 4   0.87813349  2.16895597  1.36860228
#> 5   0.82158108  1.20796200 -0.22577099
#> 6   0.68864025 -1.12310858  1.51647060
#> 7   0.55391765 -0.40288484 -1.54875280
#> 8  -0.06191171 -0.46665535  0.58461375
#> 9  -0.30596266  0.77996512  0.12385424
#> 10 -0.38047100 -0.08336907  0.21594157
#> 
#> $Subset9
#>            V8          V6          V7
#> 1  -0.4910312  0.25331851  0.37963948
#> 2  -2.3091689 -0.02854676 -0.50232345
#> 3   1.0057385 -0.04287046 -0.33320738
#> 4  -0.7092008  1.36860228 -1.01857538
#> 5  -0.6880086 -0.22577099 -1.07179123
#> 6   1.0255714  1.51647060  0.30352864
#> 7  -0.2847730 -1.54875280  0.44820978
#> 8  -1.2207177  0.58461375  0.05300423
#> 9   0.1813035  0.12385424  0.92226747
#> 10 -0.1388914  0.21594157  2.05008469
#> 
#> $Subset10
#>              V9          V5          V6
#> 1   0.005764186 -0.69470698  0.25331851
#> 2   0.385280401 -0.20791728 -0.02854676
#> 3  -0.370660032 -1.26539635 -0.04287046
#> 4   0.644376549  2.16895597  1.36860228
#> 5  -0.220486562  1.20796200 -0.22577099
#> 6   0.331781964 -1.12310858  1.51647060
#> 7   1.096839013 -0.40288484 -1.54875280
#> 8   0.435181491 -0.46665535  0.58461375
#> 9  -0.325931586  0.77996512  0.12385424
#> 10  1.148807618 -0.08336907  0.21594157
#> 

# Extract best subset and retain additional numeric column
df$CopyV1 <- df$V1
corrSubset(res, df, which = 1, keepExtra = TRUE)
#>            V2          V5        V10          V7      CopyV1
#> 1   1.2240818 -0.69470698  0.9935039  0.37963948 -0.56047565
#> 2   0.3598138 -0.20791728  0.5483970 -0.50232345 -0.23017749
#> 3   0.4007715 -1.26539635  0.2387317 -0.33320738  1.55870831
#> 4   0.1106827  2.16895597 -0.6279061 -1.01857538  0.07050839
#> 5  -0.5558411  1.20796200  1.3606524 -1.07179123  0.12928774
#> 6   1.7869131 -1.12310858 -0.6002596  0.30352864  1.71506499
#> 7   0.4978505 -0.40288484  2.1873330  0.44820978  0.46091621
#> 8  -1.9666172 -0.46665535  1.5326106  0.05300423 -1.26506123
#> 9   0.7013559  0.77996512 -0.2357004  0.92226747 -0.68685285
#> 10 -0.4727914 -0.08336907 -1.0264209  2.05008469 -0.44566197