pkgdown/extra.css

Skip to contents

impute_mice() imputes missing values in a dataset using the MICE algorithm. It allows the user to exclude certain columns from imputation and can also display the percentage of missing values in each column before imputation. The user can also specify the number of imputed datasets to create, the maximum number of iterations, and the imputation method to use.

Usage

impute_mice(
  olink_data,
  wide = TRUE,
  m = 5,
  maxit = 5,
  method = "pmm",
  exclude_cols = c("DAid", "Disease"),
  show_na_percentage = TRUE
)

Arguments

The input dataset.

wide

If TRUE, the data is in wide format.

m

The number of imputed datasets to create. Default is 5.

maxit

The maximum number of iterations. Default is 5.

method

The imputation method to use. Type methods(mice::mice) after library(mice) for all options. Default is "pmm".

exclude_cols

The columns to exclude from imputation.

show_na_percentage

If TRUE, the percentage of missing values in each column is displayed.

Value

The imputed dataset.

Examples

# Data before imputation
test_data <- example_data |>
  dplyr::select(DAid, Assay, NPX) |>
  tidyr::pivot_wider(names_from = "Assay", values_from = "NPX") |>
  dplyr::slice_head(n = 100)
test_data
#> # A tibble: 100 × 101
#>    DAid    AARSD1   ABL1  ACAA1    ACAN    ACE2  ACOX1   ACP5    ACP6  ACTA2
#>    <chr>    <dbl>  <dbl>  <dbl>   <dbl>   <dbl>  <dbl>  <dbl>   <dbl>  <dbl>
#>  1 DA00001   3.39  2.76   1.71   0.0333  1.76   -0.919 1.54    2.15    2.81 
#>  2 DA00002   1.42  1.25  -0.816 -0.459   0.826  -0.902 0.647   1.30    0.798
#>  3 DA00003  NA    NA     NA      0.989  NA       0.330 1.37   NA      NA    
#>  4 DA00004   3.41  3.38   1.69  NA       1.52   NA     0.841   0.582   1.70 
#>  5 DA00005   5.01  5.05   0.128  0.401  -0.933  -0.584 0.0265  1.16    2.73 
#>  6 DA00006   6.83  1.18  -1.74  -0.156   1.53   -0.721 0.620   0.527   0.772
#>  7 DA00007  NA    NA      3.96   0.682   3.14    2.62  1.47    2.25    2.01 
#>  8 DA00008   2.78  0.812 -0.552  0.982  -0.101  -0.304 0.376  -0.826   1.52 
#>  9 DA00009   4.39  3.34  -0.452 -0.868   0.395   1.71  1.49   -0.0285  0.200
#> 10 DA00010   1.83  1.21  -0.912 -1.04   -0.0918 -0.304 1.69    0.0920  2.04 
#> # ℹ 90 more rows
#> # ℹ 91 more variables: ACTN4 <dbl>, ACY1 <dbl>, ADA <dbl>, ADA2 <dbl>,
#> #   ADAM15 <dbl>, ADAM23 <dbl>, ADAM8 <dbl>, ADAMTS13 <dbl>, ADAMTS15 <dbl>,
#> #   ADAMTS16 <dbl>, ADAMTS8 <dbl>, ADCYAP1R1 <dbl>, ADGRE2 <dbl>, ADGRE5 <dbl>,
#> #   ADGRG1 <dbl>, ADGRG2 <dbl>, ADH4 <dbl>, ADM <dbl>, AGER <dbl>, AGR2 <dbl>,
#> #   AGR3 <dbl>, AGRN <dbl>, AGRP <dbl>, AGXT <dbl>, AHCY <dbl>, AHSP <dbl>,
#> #   AIF1 <dbl>, AIFM1 <dbl>, AK1 <dbl>, AKR1B1 <dbl>, AKR1C4 <dbl>, …

# Data after imputation
impute_mice(test_data)
#> # A tibble: 88 × 2
#>    column  na_percentage
#>    <chr>           <dbl>
#>  1 ADA2                7
#>  2 ANG                 7
#>  3 ANGPTL3             7
#>  4 ANPEP               7
#>  5 AOC3                7
#>  6 APOM                7
#>  7 ART3                7
#>  8 AXL                 7
#>  9 ADAMTS8             6
#> 10 AHSP                6
#> # ℹ 78 more rows
#> Warning: Number of logged events: 2645
#> # A tibble: 100 × 101
#>    DAid  AARSD1  ABL1  ACAA1    ACAN    ACE2  ACOX1   ACP5    ACP6 ACTA2   ACTN4
#>    <chr>  <dbl> <dbl>  <dbl>   <dbl>   <dbl>  <dbl>  <dbl>   <dbl> <dbl>   <dbl>
#>  1 DA00…   3.39 2.76   1.71   0.0333  1.76   -0.919 1.54    2.15   2.81   0.742 
#>  2 DA00…   1.42 1.25  -0.816 -0.459   0.826  -0.902 0.647   1.30   0.798 -0.0659
#>  3 DA00…   3.48 7.25  -2.09   0.989   4.48    0.330 1.37    1.34   4.48   1.52  
#>  4 DA00…   3.41 3.38   1.69   0.401   1.52    2.82  0.841   0.582  1.70   0.108 
#>  5 DA00…   5.01 5.05   0.128  0.401  -0.933  -0.584 0.0265  1.16   2.73   0.350 
#>  6 DA00…   6.83 1.18  -1.74  -0.156   1.53   -0.721 0.620   0.527  0.772  1.52  
#>  7 DA00…   4.92 6.57   3.96   0.682   3.14    2.62  1.47    2.25   2.01   0.170 
#>  8 DA00…   2.78 0.812 -0.552  0.982  -0.101  -0.304 0.376  -0.826  1.52  -0.597 
#>  9 DA00…   4.39 3.34  -0.452 -0.868   0.395   1.71  1.49   -0.0285 0.200 -0.532 
#> 10 DA00…   1.83 1.21  -0.912 -1.04   -0.0918 -0.304 1.69    0.0920 2.04   0.501 
#> # ℹ 90 more rows
#> # ℹ 90 more variables: ACY1 <dbl>, ADA <dbl>, ADA2 <dbl>, ADAM15 <dbl>,
#> #   ADAM23 <dbl>, ADAM8 <dbl>, ADAMTS13 <dbl>, ADAMTS15 <dbl>, ADAMTS16 <dbl>,
#> #   ADAMTS8 <dbl>, ADCYAP1R1 <dbl>, ADGRE2 <dbl>, ADGRE5 <dbl>, ADGRG1 <dbl>,
#> #   ADGRG2 <dbl>, ADH4 <dbl>, ADM <dbl>, AGER <dbl>, AGR2 <dbl>, AGR3 <dbl>,
#> #   AGRN <dbl>, AGRP <dbl>, AGXT <dbl>, AHCY <dbl>, AHSP <dbl>, AIF1 <dbl>,
#> #   AIFM1 <dbl>, AK1 <dbl>, AKR1B1 <dbl>, AKR1C4 <dbl>, AKT1S1 <dbl>, …