Replicates

This notebook processes replicate Orbitrap IRMS analyses of MRFA and generates Figure 11 of Kantnerova et al. (2024)

Author

Kantnerova et al. (2024)

Published

October 2, 2025

Setup

Using R version 4.5.0 (2025-04-11) , tidyverse version 2.0.0, and isoorbi version 1.5.1.

# load packages
library(isoorbi) # for orbitrap function
library(dplyr) # for mutating data frames
library(stringr) # for extarcting text information
library(forcats) # for recoding factors
library(ggplot2) # for data visualization
library(cowplot) # arrange multipanel plots

Data from RAW (recommend)

# read files
raw_files <- 
  "data/replicates" |>
  orbi_find_raw() |>
  orbi_read_raw(include_spectra = 1) |>
  orbi_aggregate_raw() |>
  orbi_identify_isotopocules("data/replicates/isotopologs.tsv") |>
  orbi_filter_isotopocules()
 [118ms] orbi_read_raw() read
MRFA_MS2_120kRes_1E6AGC_50NCE_10min_10uscans_Replicate_1.raw from cache,
included the spectrum from 1 scan
 [41ms] orbi_read_raw() read
MRFA_MS2_120kRes_1E6AGC_50NCE_10min_10uscans_Replicate_2.raw from cache,
included the spectrum from 1 scan
 [31ms] orbi_read_raw() read
MRFA_MS2_120kRes_1E6AGC_50NCE_10min_10uscans_Replicate_3.raw from cache,
included the spectrum from 1 scan
 [31ms] orbi_read_raw() read
MRFA_MS2_120kRes_1E6AGC_50NCE_10min_10uscans_Replicate_4.raw from cache,
included the spectrum from 1 scan
 [34ms] orbi_read_raw() read
MRFA_MS2_120kRes_1E6AGC_50NCE_10min_10uscans_Replicate_5.raw from cache,
included the spectrum from 1 scan
 [316ms] orbi_read_raw() finished reading 5 files
 [467ms] orbi_aggregate_raw() aggregated file_info (5), scans (1.15k), peaks
(1.06M), and spectra (77.71k) from 5 files using the standard aggregator
! [1.3s] orbi_identify_isotopocules() identified 19.58k/1.06M peaks (1.9%)
representing 32% of the total ion current (TIC) as isotopocules M0, 15N, 13C,
2H, 33S, and 34S but encountered 2 warnings! isotopocules 13C and 15N match multiple peaks in some same scans (14
  multi-matched peaks in total) - make sure to run orbi_flag_satellite_peaks()
  and orbi_plot_satellite_peak()! isotopocules 15N, 2H, M0, 33S, and 34S are missing from some scans (1.16k
  missing peaks in total) - make sure to evaluate coverage with e.g.
  orbi_plot_isotopocule_coverage()
 [7ms] orbi_filter_isotopocules() removed 1.04M / 1.06M peaks (98%) because
they were missing isotopocules (1.16k), or unidentified peaks (1.04M).
Remaining isotopocules: M0, 15N, 13C, 2H, 33S, and 34S.

# get replicate information from file name
raw_files$file_info$replicate <- raw_files$file_info$filename |>
  str_extract("Replicate_\\d") |> str_replace("_", " ")

Bonus Figure: MFRA Spectra

# Arginine mass window
raw_files |> orbi_plot_spectra(mz_min = 102, mz_max = 110)
Figure 1: MRFA spectra - alanine

Data from ISOX (legacy)

# originally, this is how the data was read in
isox_files_aas <-
  "data/replicates" |>
  orbi_find_isox() |>
  orbi_read_isox()

Calculations

# isotopocules data
data_all <- 
  raw_files |>
  orbi_flag_satellite_peaks() |> 
  orbi_flag_weak_isotopocules(min_percent = 90) |> 
  # tight agc data, discard anything that's 5% above or below the mean
  orbi_flag_outliers(agc_fold_cutoff = 1.05) 
 [121ms] orbi_flag_satellite_peaks() flagged 14/19.58k peaks in 2 isotopocules
(15N and 13C) as satellite peaks (0.072%)
 [18ms] orbi_flag_weak_isotopocules() flagged 5 of 90 isotopocules as weak
because they were NOT present in at least 90% of scans in each of the 90 data
groups (based on uidx, compound, and isotopocule) → use
orbi_plot_isotopocule_coverage() to visualize them
 [7ms] orbi_flag_outliers() flagged 4/1151 scans (0.35%) as outliers based on
1.05 fold AGC cutoff, i.e. based on scans below 1/1.05 and above 1.05 times the
average number of ions tic * it.ms in the Orbitrap analyzer, in 5 data groups
(based on uidx) → use orbi_plot_raw_data(y = tic * it.ms) to visualize them
  
# summarize data
ratios <- data_all |>
  # define base peak
  orbi_define_basepeak("M0") |>
  # pull out data with `replicate` from file info added
  orbi_get_data(file_info = "replicate", scans = everything(), peaks = everything()) |>
  # so we can add it to the grouping
  orbi_summarize_results(ratio_method = "sum", .by = "replicate") 
! Warning: 1/4.60k data groups (filename + compound + scan.no) cannot be used
  because the M0 isotopocule is missing
 To investigate, use orbi_flag_weak_isotopocules() and
  orbi_plot_isotopocule_coverage() or orbi_get_isotopocule_coverage() before
  defining the base peak
 [191ms] orbi_define_basepeak() set M0 as the ratio denominator and calculated
14.97k ratio values for 5 isotopocules (15N, 13C, 2H, 33S, and 34S)
 [6ms] orbi_get_data() retrieved 14.97k records from the combination of
file_info (5), scans (1.15k), and peaks (14.97k) via uidx and scan.no
 [129ms] orbi_summarize_results() summarized ratios from 14.86k peak
(excluding 115 flagged peaks; excluding 0 unused peaks) using the sum method
and grouping the data by uidx, compound, basepeak, isotopocule, and replicate

Figure 11 top panel

p_top <- data_all |>
  orbi_plot_raw_data(
    isotopocules = "M0",
    x_breaks = c(5, 10),
    y = intensity,
    y_scale = "linear",
    color = compound,
    show_outliers = FALSE
  ) +
  facet_wrap(~replicate, nrow = 1) +
  labs(x = "time / min", y = "EIC / arb. unit") +
  theme(strip.text = element_text(size = 16))
p_top
Figure 2

Figure 11 bottom panel

p_bottom <- 
  ratios |> filter(isotopocule == "13C") |>
  ggplot() +
  aes(
    replicate, ratio, color = compound, shape = compound,
    ymin = ratio - ratio_sem, ymax = ratio + ratio_sem
  ) +
  # standard deviations
  geom_rect(
    data = function(df) 
      df |> group_by(compound) |>
      summarize(ratio_mean = mean(ratio), ratio_sd = sd(ratio)),
    map = aes(
      ymin = ratio_mean - ratio_sd, ymax = ratio_mean + ratio_sd,
      x = NULL, y = NULL, xmin = -Inf, xmax = +Inf, color = NULL),
    fill = "gray90", show.legend = FALSE
  ) +
  # averages
  geom_hline(
    data = function(df) 
      df |> group_by(compound) |>
      summarize(ratio_mean = mean(ratio)),
    map = aes(yintercept = ratio_mean, color = compound),
    show.legend = FALSE
  ) +
  # data points
  geom_errorbar(width = 0.1, show.legend = FALSE) + 
  geom_point(fill = "white", size = 3) +
  facet_grid(compound ~ ., scales = "free_y") +
  # scales
  scale_color_brewer(palette = "Dark2") +
  scale_shape_manual(values = c(21:25)) +
  scale_y_continuous(breaks = scales::pretty_breaks(3)) +
  # labels
  labs(x = NULL, color = NULL, shape = NULL,
       y = expression("ratio ("^13*C/M0*")")) +
  # theme
  theme_bw() +
  theme(
    text = element_text(size = 16),
    panel.grid = element_blank(),
    strip.background = element_blank(),
    strip.text = element_blank(),
    axis.ticks.x = element_blank(),
    legend.position = "top"
  )
p_bottom
Figure 3

Figure 11 combined

plot_grid(
  p_top + theme(legend.position = "none"), p_bottom,
  ncol = 1, rel_heights = c(1, 2), align = "v", axis = "lr"
)
Figure 4: amino acids replicates