Source code for ms_deisotope.deconvolution.composition_list

"""Deconvolution strategies using a list of compositions.
"""

from ms_deisotope.averagine import (
    PROTON, isotopic_variants,
    TheoreticalIsotopicPattern,
    neutral_mass)

from ms_deisotope.envelope_statistics import average_mz, a_to_a2_ratio, most_abundant_mz
from ms_deisotope.constants import (
    IGNORE_BELOW,
    TRUNCATE_AFTER,
    ERROR_TOLERANCE,
    MAX_ITERATION,
    CONVERGENCE)

from ms_deisotope.peak_dependency_network import PeakDependenceGraph
from ms_deisotope.peak_set import DeconvolutedPeakSolution, DeconvolutedPeakSet
from ms_deisotope.scoring import IsotopicFitRecord

from .base import (DeconvoluterBase)


from .utils import (
    count_placeholders, prepare_peaklist,
    drop_placeholders, first_peak, mean, charge_range_,
    )


[docs]class CompositionListDeconvoluterBase(DeconvoluterBase): """A mixin class to provide common features for deconvoluters which process spectra using a list of targeted compositions. Attributes ---------- composition_list : list of :class:`~.Mapping` A series of objects which represent elemental compositions and support the :class:`~.Mapping` interface to access their individual elements. """ def __init__(self, composition_list, *args, **kwargs): self.composition_list = list(composition_list) self.incremental_truncation = kwargs.get( "incremental_truncation", None) super(CompositionListDeconvoluterBase, self).__init__(*args, **kwargs)
[docs] def generate_theoretical_isotopic_cluster(self, composition, charge, truncate_after=TRUNCATE_AFTER, mass_shift=None, charge_carrier=PROTON, ignore_below=IGNORE_BELOW): """Generate a theoretical isotopic pattern for ``composition`` Parameters ---------- composition : :class:`~.Mapping` An object representing an elemental composition charge : int The charge state to generate the isotopic pattern for truncate_after : float, optional The percent of intensity to ensure is included in a theoretical isotopic pattern starting from the monoisotopic peak. This will cause theoretical isotopic patterns to be truncated, excluding trailing peaks which do not contribute substantially to the overall shape of the isotopic pattern. mass_shift : float, optional An arbitrary mass shift to apply to the generated theoretical isotopic pattern, moving all peaks forward by that mass charge ratio transformed mass. charge_carrier : float, optional The mass of the charge carrier, or more specifically, the moiety which is added for each incremental change in charge state. Defaults to |PROTON| Returns ------- :class:`~.TheoreticalIsotopicPattern` The theoretical isotopic pattern generated """ tid = isotopic_variants( composition, charge=charge, charge_carrier=charge_carrier) tid = TheoreticalIsotopicPattern(tid, tid[0].mz) tid.truncate_after(truncate_after) tid.ignore_below(ignore_below) if mass_shift is not None: tid.shift(tid[0].mz + mass_shift / abs(charge)) return tid
[docs] def recalibrate_theoretical_mz(self, theoretical_distribution, experimental_mz): """Recalibrate the m/z of the theoretical isotopic pattern to start from the peak matching the experimental monoisotopic m/z Parameters ---------- theoretical_distribution : :class:`TheoreticalIsotopicPattern` The theoretical isotopic pattern to adjust experimental_mz : float The experimental monoisotopic peak m/z Returns ------- TheoreticalIsotopicPattern """ theoretical_distribution.shift(experimental_mz) return theoretical_distribution
[docs] def fit_composition_at_charge(self, composition, charge, error_tolerance=ERROR_TOLERANCE, charge_carrier=PROTON, truncate_after=TRUNCATE_AFTER, ignore_below=IGNORE_BELOW, mass_shift=None): """Produce an isotopic fit for `composition` at `charge` against the experimental peak set. This method requires that the instance also possess a method named `match_theoretical_isotopic_distribution` such as the one implemented in :class:`DeconvoluterBase`. Parameters ---------- composition : :class:`~.Mapping` An object representing an elemental composition charge : int The charge state to generate the isotopic pattern for error_tolerance : float The mass accuracy required to for peak matches truncate_after : float, optional The percent of intensity to ensure is included in a theoretical isotopic pattern starting from the monoisotopic peak. This will cause theoretical isotopic patterns to be truncated, excluding trailing peaks which do not contribute substantially to the overall shape of the isotopic pattern. mass_shift : float, optional An arbitrary mass shift to apply to the generated theoretical isotopic pattern, moving all peaks forward by that mass charge ratio transformed mass. charge_carrier : float, optional The mass of the charge carrier, or more specifically, the moiety which is added for each incremental change in charge state. Defaults to |PROTON| Returns ------- :class:`~.IsotopicFitRecord` """ tid = self.generate_theoretical_isotopic_cluster(composition, charge=charge, truncate_after=truncate_after, charge_carrier=charge_carrier, ignore_below=ignore_below, mass_shift=mass_shift) monoisotopic_peak = self.peaklist.has_peak(tid[0].mz, error_tolerance) if monoisotopic_peak is not None: tid = self.recalibrate_theoretical_mz(tid, monoisotopic_peak.mz) eid = self.match_theoretical_isotopic_distribution( tid.peaklist, error_tolerance) missed_peaks = count_placeholders(eid) if missed_peaks > len(eid) / 2: return None self.scale_theoretical_distribution(tid, eid) score = self.scorer.evaluate(self.peaklist, eid, tid.peaklist) fit = IsotopicFitRecord(None, score, charge, tid, eid) fit.missed_peaks = missed_peaks return fit
def _make_deconvoluted_peak_solution(self, fit, composition, charge_carrier): eid = fit.experimental tid = fit.theoretical charge = fit.charge rep_eid = drop_placeholders(eid) total_abundance = sum( p.intensity for p in eid if p.intensity > 1) monoisotopic_mass = neutral_mass( tid.monoisotopic_mz, charge, charge_carrier) monoisotopic_mz = tid.monoisotopic_mz reference_peak = first_peak(eid) peak = DeconvolutedPeakSolution( composition, fit, monoisotopic_mass, total_abundance, charge, signal_to_noise=mean(p.signal_to_noise for p in rep_eid), index=reference_peak.index, full_width_at_half_max=mean( p.full_width_at_half_max for p in rep_eid), a_to_a2_ratio=a_to_a2_ratio(tid), most_abundant_mass=neutral_mass( most_abundant_mz(eid), charge), average_mass=neutral_mass(average_mz(eid), charge), score=fit.score, envelope=[(p.mz, p.intensity) for p in rep_eid], mz=monoisotopic_mz, area=sum(e.area for e in eid)) return peak
[docs] def deconvolute_composition(self, composition, error_tolerance=ERROR_TOLERANCE, charge_range=(1, 8), charge_carrier=PROTON, truncate_after=TRUNCATE_AFTER, ignore_below=IGNORE_BELOW, mass_shift=None): """For each charge state under consideration, fit the theoretical isotopic pattern for this composition, and if the fit is satisfactory, add it to the results set. Parameters ---------- composition : :class:`~.Mapping` An object representing an elemental composition error_tolerance : float The mass accuracy required to for peak matches charge_range : tuple The charge state range to generate the isotopic patterns for truncate_after : float, optional The percent of intensity to ensure is included in a theoretical isotopic pattern starting from the monoisotopic peak. This will cause theoretical isotopic patterns to be truncated, excluding trailing peaks which do not contribute substantially to the overall shape of the isotopic pattern. mass_shift : float, optional An arbitrary mass shift to apply to the generated theoretical isotopic pattern, moving all peaks forward by that mass charge ratio transformed mass. charge_carrier : float, optional The mass of the charge carrier, or more specifically, the moiety which is added for each incremental change in charge state. Defaults to |PROTON| """ for charge in charge_range_(*charge_range): fit = self.fit_composition_at_charge(composition, charge=charge, error_tolerance=error_tolerance, truncate_after=truncate_after, charge_carrier=charge_carrier, mass_shift=mass_shift, ignore_below=ignore_below) if fit is None: continue if not self.scorer.reject(fit): eid = fit.experimental tid = fit.theoretical rep_eid = drop_placeholders(eid) if (len(rep_eid) < 2) or (len(rep_eid) < (len(tid) / 2.)) or (len(rep_eid) == 1 and fit.charge > 1): continue if self.incremental_truncation is not None: fits = [fit] for case in self.fit_incremental_truncation(fit, self.incremental_truncation): if not self.scorer.reject(case): fits.append(case) fit = self.scorer.select.best(fits) peak = self._make_deconvoluted_peak_solution( fit, composition, charge_carrier) self._deconvoluted_peaks.append(peak) if self.use_subtraction: self.subtraction(tid, error_tolerance)
[docs]class CompositionListDeconvoluter(CompositionListDeconvoluterBase): """Fit exact isotopic patterns from a list of compositions. Fits are accepted as they are made, making this algorithm unsuitable for complex spectra where isotopic patterns will share peaks. Attributes ---------- composition_list : list of :class:`~.Mapping` A series of objects which represent elemental compositions and support the :class:`~.Mapping` interface to access their individual elements. peaklist : :class:`~ms_peak_picker.PeakSet` The collection of :class:`~.ms_peak_picker.FittedPeak` instances and possible associated data to deconvolute. scorer : :class:`~.IsotopicFitterBase` The criterion for evaluating individual isotopic pattern fits merge_isobaric_peaks : bool If multiple passes produce peaks with identical mass values, should those peaks be summed minimum_intensity : float Experimental peaks whose intensity is below this level will be ignored by peak querying methods scale_method : str The name of the method to use to scale theoretical isotopic pattern intensities to match the experimental isotopic pattern. For a description of options, see :meth:`~.TheoreticalIsotopicPattern.scale`. use_subtraction : bool Whether or not to apply a subtraction procedure to experimental peaks after they have been fitted. This is only necessary if the same signal may be examined multiple times as in a multi-pass method or when peak dependence is not considered verbose : bool Produce extra logging information """ def __init__(self, peaklist, composition_list, scorer, use_subtraction=False, scale_method='sum', verbose=False, use_quick_charge=False, **kwargs): self.peaklist = prepare_peaklist(peaklist) self.scorer = scorer self.verbose = verbose self._deconvoluted_peaks = [] self.use_quick_charge = use_quick_charge super(CompositionListDeconvoluter, self).__init__( composition_list, use_subtraction=use_subtraction, scale_method=scale_method, merge_isobaric_peaks=True, **kwargs)
[docs] def deconvolute(self, error_tolerance=ERROR_TOLERANCE, charge_range=(1, 8), charge_carrier=PROTON, truncate_after=TRUNCATE_AFTER, ignore_below=IGNORE_BELOW, mass_shift=None, **kwargs): """Deconvolute the spectrum, extracting isotopic patterns from the composition list. Parameters ---------- error_tolerance : float, optional The parts-per-million error tolerance in m/z to search with. Defaults to |ERROR_TOLERANCE| charge_range : tuple, optional The range of charge states to consider. Defaults to (1, 8) charge_carrier : float, optional The mass of the charge carrier. Defaults to |PROTON| truncate_after : float, optional The percent of intensity to ensure is included in a theoretical isotopic pattern starting from the monoisotopic peak. This will cause theoretical isotopic patterns to be truncated, excluding trailing peaks which do not contribute substantially to the overall shape of the isotopic pattern. mass_shift: float, optional An optional mass shift to apply to each composition Returns ------- :class:`~.DeconvolutedPeakSet` """ for composition in self.composition_list: self.deconvolute_composition(composition, error_tolerance=error_tolerance, charge_range=charge_range, charge_carrier=charge_carrier, truncate_after=truncate_after, ignore_below=ignore_below, mass_shift=mass_shift) return DeconvolutedPeakSet(self._deconvoluted_peaks).reindex()
[docs]class CompositionListPeakDependenceGraphDeconvoluter(CompositionListDeconvoluter): """Fit exact isotopic patterns from a list of compositions. Fits are added to a peak dependence graph, and the best fit is chosen after all fits are calculated at each iteration. Attributes ---------- composition_list : list of :class:`~.Mapping` A series of objects which represent elemental compositions and support the :class:`~.Mapping` interface to access their individual elements. peaklist : :class:`~ms_peak_picker.PeakSet` The collection of ms_peak_picker.FittedPeak instances and possible associated data to deconvolute. scorer : :class:`~.IsotopicFitterBase` The criterion for evaluating individual isotopic pattern fits max_missed_peaks : int The maximum number of missing peaks to tolerate in an isotopic fit peak_dependency_network : :class:`~PeakDependenceGraph` The peak dependence graph onto which isotopic fit dependences on peaks are constructed and solved. merge_isobaric_peaks : bool If multiple passes produce peaks with identical mass values, should those peaks be summed minimum_intensity : float Experimental peaks whose intensity is below this level will be ignored by peak querying methods scale_method : str The name of the method to use to scale theoretical isotopic pattern intensities to match the experimental isotopic pattern. For a description of options, see :meth:`~.TheoreticalIsotopicPattern.scale`. use_subtraction : bool Whether or not to apply a subtraction procedure to experimental peaks after they have been fitted. This is only necessary if the same signal may be examined multiple times as in a multi-pass method or when peak dependence is not considered verbose : bool Produce extra logging information """ def __init__(self, peaklist, composition_list, scorer, use_subtraction=False, scale_method='sum', verbose=False, use_quick_charge=False, **kwargs): max_missed_peaks = kwargs.get("max_missed_peaks", 1) super(CompositionListPeakDependenceGraphDeconvoluter, self).__init__( peaklist, composition_list, scorer=scorer, use_subtraction=use_subtraction, scale_method=scale_method, verbose=verbose, use_quick_charge=use_quick_charge, **kwargs) self.peak_dependency_network = PeakDependenceGraph( self.peaklist, maximize=self.scorer.is_maximizing(), **kwargs) self.max_missed_peaks = max_missed_peaks @property def max_missed_peaks(self): """The maximum number of missed peaks per isotopic fit record permitted. This property directly mirrors :attr:`PeakDependenceGraph.max_missed_peaks` Returns ------- int """ return self.peak_dependency_network.max_missed_peaks @max_missed_peaks.setter def max_missed_peaks(self, value): self.peak_dependency_network.max_missed_peaks = value def _save_peak_solution(self, solution): self._deconvoluted_peaks.append(solution)
[docs] def deconvolute_composition(self, composition, error_tolerance=ERROR_TOLERANCE, charge_range=(1, 8), charge_carrier=PROTON, truncate_after=TRUNCATE_AFTER, ignore_below=IGNORE_BELOW, mass_shift=None): for charge in charge_range_(*charge_range): fit = self.fit_composition_at_charge( composition, charge, error_tolerance, charge_carrier=charge_carrier, truncate_after=truncate_after, mass_shift=mass_shift, ignore_below=ignore_below) if fit is None: continue rep_eid = drop_placeholders(fit.experimental) if len(rep_eid) == 1 and fit.charge > 1: continue if not self.scorer.reject(fit): self.peak_dependency_network.add_fit_dependence(fit) if self.incremental_truncation is not None: for case in self.fit_incremental_truncation(fit, self.incremental_truncation): if not self.scorer.reject(case): self.peak_dependency_network.add_fit_dependence(case)
[docs] def populate_graph(self, error_tolerance=ERROR_TOLERANCE, charge_range=(1, 8), truncate_after=TRUNCATE_AFTER, charge_carrier=PROTON, ignore_below=IGNORE_BELOW, mass_shift=None): """For each composition, for each charge state under consideration, fit the theoretical isotopic pattern for this composition, and if the fit is satisfactory, add it to the peak dependence graph for later selecting the optimal solution. Parameters ---------- error_tolerance : float The mass accuracy required to for peak matches charge_range : tuple The charge state range to generate the isotopic patterns for truncate_after : float, optional The percent of intensity to ensure is included in a theoretical isotopic pattern starting from the monoisotopic peak. This will cause theoretical isotopic patterns to be truncated, excluding trailing peaks which do not contribute substantially to the overall shape of the isotopic pattern. charge_carrier : float, optional The mass of the charge carrier, or more specifically, the moiety which is added for each incremental change in charge state. Defaults to |PROTON| mass_shift : float, optional An arbitrary mass shift to apply to the generated theoretical isotopic pattern, moving all peaks forward by that mass charge ratio transformed mass. """ for composition in self.composition_list: self.deconvolute_composition(composition, error_tolerance, charge_range, truncate_after=truncate_after, charge_carrier=charge_carrier, ignore_below=ignore_below, mass_shift=mass_shift)
def select_best_disjoint_subgraphs(self, error_tolerance=ERROR_TOLERANCE, charge_carrier=PROTON): """Construct connected envelope graphs from :attr:`peak_dependency_network` and extract the best disjoint isotopic pattern fits in each envelope graph. This in turn produces one or more :class:`~.DeconvolutedPeakSolution` instances from each disjoint fit, which are processed and added to the results set. Parameters ---------- error_tolerance : float, optional The error tolerance to use when performing subtraction, if subtraction is being performed. charge_carrier : float, optional The mass of the charge carrier as used for the deconvolution. Required to back-out the neutral mass of the deconvoluted result """ disjoint_envelopes = self.peak_dependency_network.find_non_overlapping_intervals() for cluster in disjoint_envelopes: for fit in cluster.disjoint_best_fits(): eid = fit.experimental tid = fit.theoretical composition = fit.data rep_eid = drop_placeholders(eid) if len(rep_eid) < 2 or len(rep_eid) < len(tid) / 2.: continue peak = self._make_deconvoluted_peak_solution( fit, composition, charge_carrier) self._save_peak_solution(peak) if self.use_subtraction: self.subtraction(tid, error_tolerance)
[docs] def deconvolute(self, error_tolerance=ERROR_TOLERANCE, charge_range=(1, 8), iterations=MAX_ITERATION, # pylint: disable=arguments-differ truncate_after=TRUNCATE_AFTER, charge_carrier=PROTON, ignore_below=IGNORE_BELOW, mass_shift=None, convergence=CONVERGENCE, **kwargs): """Deconvolute the spectrum, extracting isotopic patterns from the composition list. Parameters ---------- error_tolerance : float, optional The parts-per-million error tolerance in m/z to search with. Defaults to |ERROR_TOLERANCE| charge_range : tuple, optional The range of charge states to consider. Defaults to (1, 8) charge_carrier : float, optional The mass of the charge carrier. Defaults to |PROTON| truncate_after : float, optional The percent of intensity to ensure is included in a theoretical isotopic pattern starting from the monoisotopic peak. This will cause theoretical isotopic patterns to be truncated, excluding trailing peaks which do not contribute substantially to the overall shape of the isotopic pattern. mass_shift: float, optional An optional mass shift to apply to each composition convergence : float, optional The threshold of the below which after the `(sum(intensity_before) - sum( intensity_after)) / sum(intensity_after)` Returns ------- :class:`~.DeconvolutedPeakSet` """ if not self.use_subtraction: iterations = 1 begin_signal = sum([p.intensity for p in self.peaklist]) for _ in range(iterations): self.populate_graph(error_tolerance, charge_range, charge_carrier=charge_carrier, truncate_after=truncate_after, ignore_below=ignore_below, mass_shift=mass_shift) self.select_best_disjoint_subgraphs(error_tolerance) self._slice_cache.clear() end_signal = sum([p.intensity for p in self.peaklist]) + 1 if (begin_signal - end_signal) / end_signal < convergence: break begin_signal = end_signal return DeconvolutedPeakSet(self._deconvoluted_peaks).reindex()