pkgdown/extra.css

Skip to contents

normalize_data() normalizes the data by scaling them and removing their batch effects. It first converts the data to wide format if they are not already. It then removes the batch effects and scales or centers the data. To remove batch effects, it uses the remove_batch_effects(), that utilizes limma package. For scaling, it uses the scale() from base R.

Usage

normalize_data(
  olink_data,
  metadata = NULL,
  wide = TRUE,
  center = TRUE,
  scale = TRUE,
  batch = NULL,
  batch2 = NULL,
  return_long = FALSE,
  save = FALSE,
  file_name = "normalized_data"
)

Arguments

A dataset containing Olink data to be normalized.

metadata

A dataset containing the metadata information.

wide

A logical value indicating whether the data is in wide format. Default is TRUE.

center

A logical value indicating whether to center the data. Default is TRUE.

scale

A logical value indicating whether to scale the data. Default is TRUE.

batch

The metadata column containing the batch information. In order to correct for batch effects, this parameter should be provided. Default is NULL.

batch2

The metadata column containing the second batch information. Default is NULL.

return_long

A logical value indicating whether to return the data in long format. Default is FALSE.

save

A logical value indicating whether to save the data. Default is FALSE.

file_name

The name of the file to be saved. Default is "normalized_data".

Value

A tibble containing the normalized data.

Examples

# Non-normalized data
example_data |>
  dplyr::select(DAid, Assay, NPX) |>
  tidyr::pivot_wider(names_from = "Assay", values_from = "NPX")
#> # A tibble: 586 × 101
#>    DAid    AARSD1   ABL1  ACAA1    ACAN    ACE2  ACOX1   ACP5    ACP6  ACTA2
#>    <chr>    <dbl>  <dbl>  <dbl>   <dbl>   <dbl>  <dbl>  <dbl>   <dbl>  <dbl>
#>  1 DA00001   3.39  2.76   1.71   0.0333  1.76   -0.919 1.54    2.15    2.81 
#>  2 DA00002   1.42  1.25  -0.816 -0.459   0.826  -0.902 0.647   1.30    0.798
#>  3 DA00003  NA    NA     NA      0.989  NA       0.330 1.37   NA      NA    
#>  4 DA00004   3.41  3.38   1.69  NA       1.52   NA     0.841   0.582   1.70 
#>  5 DA00005   5.01  5.05   0.128  0.401  -0.933  -0.584 0.0265  1.16    2.73 
#>  6 DA00006   6.83  1.18  -1.74  -0.156   1.53   -0.721 0.620   0.527   0.772
#>  7 DA00007  NA    NA      3.96   0.682   3.14    2.62  1.47    2.25    2.01 
#>  8 DA00008   2.78  0.812 -0.552  0.982  -0.101  -0.304 0.376  -0.826   1.52 
#>  9 DA00009   4.39  3.34  -0.452 -0.868   0.395   1.71  1.49   -0.0285  0.200
#> 10 DA00010   1.83  1.21  -0.912 -1.04   -0.0918 -0.304 1.69    0.0920  2.04 
#> # ℹ 576 more rows
#> # ℹ 91 more variables: ACTN4 <dbl>, ACY1 <dbl>, ADA <dbl>, ADA2 <dbl>,
#> #   ADAM15 <dbl>, ADAM23 <dbl>, ADAM8 <dbl>, ADAMTS13 <dbl>, ADAMTS15 <dbl>,
#> #   ADAMTS16 <dbl>, ADAMTS8 <dbl>, ADCYAP1R1 <dbl>, ADGRE2 <dbl>, ADGRE5 <dbl>,
#> #   ADGRG1 <dbl>, ADGRG2 <dbl>, ADH4 <dbl>, ADM <dbl>, AGER <dbl>, AGR2 <dbl>,
#> #   AGR3 <dbl>, AGRN <dbl>, AGRP <dbl>, AGXT <dbl>, AHCY <dbl>, AHSP <dbl>,
#> #   AIF1 <dbl>, AIFM1 <dbl>, AK1 <dbl>, AKR1B1 <dbl>, AKR1C4 <dbl>, …

# Center data
normalize_data(example_data, example_metadata, wide = FALSE, center = TRUE, scale = FALSE)
#> # A tibble: 586 × 101
#>    DAid    AARSD1   ABL1  ACAA1   ACAN   ACE2  ACOX1    ACP5    ACP6   ACTA2
#>    <chr>    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>   <dbl>   <dbl>   <dbl>
#>  1 DA00001  0.259  0.949  0.697 -0.532  0.827 -1.42   0.612   1.02    1.20  
#>  2 DA00002 -1.71  -0.563 -1.83  -1.02  -0.102 -1.40  -0.278   0.168  -0.813 
#>  3 DA00003 NA     NA     NA      0.424 NA     -0.167  0.447  NA      NA     
#>  4 DA00004  0.278  1.57   0.683 NA      0.593 NA     -0.0839 -0.551   0.0892
#>  5 DA00005  1.88   3.24  -0.882 -0.165 -1.86  -1.08  -0.898   0.0236  1.12  
#>  6 DA00006  3.70  -0.628 -2.75  -0.721  0.600 -1.22  -0.305  -0.606  -0.840 
#>  7 DA00007 NA     NA      2.95   0.117  2.21   2.12   0.548   1.12    0.398 
#>  8 DA00008 -0.351 -0.998 -1.56   0.416 -1.03  -0.800 -0.549  -1.96   -0.0901
#>  9 DA00009  1.26   1.53  -1.46  -1.43  -0.533  1.21   0.562  -1.16   -1.41  
#> 10 DA00010 -1.30  -0.596 -1.92  -1.60  -1.02  -0.801  0.765  -1.04    0.427 
#> # ℹ 576 more rows
#> # ℹ 91 more variables: ACTN4 <dbl>, ACY1 <dbl>, ADA <dbl>, ADA2 <dbl>,
#> #   ADAM15 <dbl>, ADAM23 <dbl>, ADAM8 <dbl>, ADAMTS13 <dbl>, ADAMTS15 <dbl>,
#> #   ADAMTS16 <dbl>, ADAMTS8 <dbl>, ADCYAP1R1 <dbl>, ADGRE2 <dbl>, ADGRE5 <dbl>,
#> #   ADGRG1 <dbl>, ADGRG2 <dbl>, ADH4 <dbl>, ADM <dbl>, AGER <dbl>, AGR2 <dbl>,
#> #   AGR3 <dbl>, AGRN <dbl>, AGRP <dbl>, AGXT <dbl>, AHCY <dbl>, AHSP <dbl>,
#> #   AIF1 <dbl>, AIFM1 <dbl>, AK1 <dbl>, AKR1B1 <dbl>, AKR1C4 <dbl>, …

# Center and scale data (z-score scaling)
normalize_data(example_data, example_metadata, wide = FALSE, center = TRUE, scale = TRUE)
#> # A tibble: 586 × 101
#>    DAid    AARSD1   ABL1  ACAA1   ACAN    ACE2  ACOX1   ACP5    ACP6   ACTA2
#>    <chr>    <dbl>  <dbl>  <dbl>  <dbl>   <dbl>  <dbl>  <dbl>   <dbl>   <dbl>
#>  1 DA00001  0.240  0.685  0.498 -0.753  0.722  -1.39   0.800  0.991   1.16  
#>  2 DA00002 -1.58  -0.406 -1.30  -1.45  -0.0885 -1.37  -0.364  0.163  -0.786 
#>  3 DA00003 NA     NA     NA      0.600 NA      -0.163  0.584 NA      NA     
#>  4 DA00004  0.257  1.14   0.488 NA      0.517  NA     -0.110 -0.536   0.0862
#>  5 DA00005  1.74   2.34  -0.629 -0.233 -1.62   -1.06  -1.18   0.0230  1.08  
#>  6 DA00006  3.42  -0.453 -1.96  -1.02   0.523  -1.19  -0.399 -0.590  -0.812 
#>  7 DA00007 NA     NA      2.11   0.165  1.93    2.08   0.717  1.09    0.385 
#>  8 DA00008 -0.325 -0.721 -1.12   0.589 -0.898  -0.783 -0.719 -1.90   -0.0871
#>  9 DA00009  1.17   1.11  -1.04  -2.03  -0.464   1.18   0.735 -1.13   -1.36  
#> 10 DA00010 -1.20  -0.431 -1.37  -2.27  -0.889  -0.784  1.00  -1.01    0.413 
#> # ℹ 576 more rows
#> # ℹ 91 more variables: ACTN4 <dbl>, ACY1 <dbl>, ADA <dbl>, ADA2 <dbl>,
#> #   ADAM15 <dbl>, ADAM23 <dbl>, ADAM8 <dbl>, ADAMTS13 <dbl>, ADAMTS15 <dbl>,
#> #   ADAMTS16 <dbl>, ADAMTS8 <dbl>, ADCYAP1R1 <dbl>, ADGRE2 <dbl>, ADGRE5 <dbl>,
#> #   ADGRG1 <dbl>, ADGRG2 <dbl>, ADH4 <dbl>, ADM <dbl>, AGER <dbl>, AGR2 <dbl>,
#> #   AGR3 <dbl>, AGRN <dbl>, AGRP <dbl>, AGXT <dbl>, AHCY <dbl>, AHSP <dbl>,
#> #   AIF1 <dbl>, AIFM1 <dbl>, AK1 <dbl>, AKR1B1 <dbl>, AKR1C4 <dbl>, …

# Center, scale and remove batch effects
normalize_data(example_data, example_metadata, wide = FALSE, batch = "Cohort")
#> # A tibble: 586 × 101
#>    DAid   AARSD1   ABL1  ACAA1   ACAN   ACE2  ACOX1   ACP5    ACP6  ACTA2  ACTN4
#>    <chr>   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>   <dbl>  <dbl>  <dbl>
#>  1 DA000…  0.104  0.476  0.391 -0.779  0.544 -1.46   0.701  0.915   0.985  0.516
#>  2 DA000… -1.74  -0.656 -1.43  -1.48  -0.286 -1.44  -0.471  0.0838 -1.02  -0.761
#>  3 DA000… NA     NA     NA      0.574 NA     -0.232  0.484 NA      NA     NA    
#>  4 DA000…  0.121  0.944  0.380 NA      0.335 NA     -0.215 -0.618  -0.122 -0.486
#>  5 DA000…  1.62   2.20  -0.746 -0.259 -1.86  -1.13  -1.29  -0.0569  0.903 -0.103
#>  6 DA000…  3.32  -0.705 -2.09  -1.05   0.341 -1.26  -0.507 -0.672  -1.05  NA    
#>  7 DA000… NA     NA      2.02   0.140  1.78   2.01   0.618  1.01    0.186 -0.388
#>  8 DA000… -0.468 -0.983 -1.24   0.563 -1.12  -0.854 -0.828 -1.99   -0.300 -1.60 
#>  9 DA000…  1.04   0.915 -1.16  -2.05  -0.671  1.12   0.636 -1.21   -1.61  -1.50 
#> 10 DA000… -1.36  -0.681 -1.49  -2.30  -1.11  -0.855  0.903 -1.10    0.214  0.136
#> # ℹ 576 more rows
#> # ℹ 90 more variables: ACY1 <dbl>, ADA <dbl>, ADA2 <dbl>, ADAM15 <dbl>,
#> #   ADAM23 <dbl>, ADAM8 <dbl>, ADAMTS13 <dbl>, ADAMTS15 <dbl>, ADAMTS16 <dbl>,
#> #   ADAMTS8 <dbl>, ADCYAP1R1 <dbl>, ADGRE2 <dbl>, ADGRE5 <dbl>, ADGRG1 <dbl>,
#> #   ADGRG2 <dbl>, ADH4 <dbl>, ADM <dbl>, AGER <dbl>, AGR2 <dbl>, AGR3 <dbl>,
#> #   AGRN <dbl>, AGRP <dbl>, AGXT <dbl>, AHCY <dbl>, AHSP <dbl>, AIF1 <dbl>,
#> #   AIFM1 <dbl>, AK1 <dbl>, AKR1B1 <dbl>, AKR1C4 <dbl>, AKT1S1 <dbl>, …