Source code for ms_deisotope.data_source.mzmlb
# -*- coding: utf-8 -*-
"""mzMLb is a standard rich HDF5-based format for raw mass spectrometry data storage.
This module provides :class:`MzMLbLoader`, a :class:`~.RandomAccessScanSource`
implementation. It is based upon the mzML XML file format, re-using a subset of the
features. The original design for mzMLb is described in [Bhamber]_.
The parser is based on :mod:`pyteomics.mzmlb`. It requires :mod:`h5py` to be installed
for reading, and :mod:`hdf5plugin` to use the faster, non-zlib-based compressors.
References
==========
.. [Bhamber] Bhamber, R. S., Jankevics, A., Deutsch, E. W., Jones, A. R., & Dowsey, A. W. (2021).
MzMLb: A Future-Proof Raw Mass Spectrometry Data Format Based on Standards-Compliant
mzML and Optimized for Speed and Storage Requirements. Journal of Proteome Research,
20(1), 172–183. https://doi.org/10.1021/acs.jproteome.0c00192
"""
import logging
logging.getLogger("hdf5plugin").addHandler(logging.NullHandler())
try:
from pyteomics import mzmlb
_BaseParser = mzmlb.MzMLb
except ImportError as impl_import_err:
mzmlb = None
class _BaseParser:
def __init__(self, *args, **kwargs):
raise impl_import_err
from .mzml import MzMLLoader as _MzMLLoader
from ._compression import DefinitelyFastRandomAccess
class _MzMLbParser(_BaseParser):
def _handle_param(self, element, **kwargs):
try:
element.attrib["value"]
except KeyError:
element.attrib["value"] = ""
return super(_MzMLbParser, self)._handle_param(element, **kwargs)
[docs]class MzMLbLoader(_MzMLLoader):
"""Reads scans from PSI-HUPO mzMLb HDF5 files. Provides both iterative and
random access.
Attributes
----------
source_file: str
Path to file to read from.
source: pyteomics.mzmlb.MzMLb
Underlying scan data source
"""
_parser_cls = _MzMLbParser
@property
def has_fast_random_access(self):
return DefinitelyFastRandomAccess
[docs] @classmethod
def prebuild_byte_offset_file(cls, path):
"""A stub method. MzMLb does not require an external index.
Parameters
----------
path : :class:`str` or file-like
The path to the file to index, or a file-like object with a name attribute.
"""
return None
def is_mzmlb_file(path):
"""Detect whether or not the file referenced by ``path``
is a mzMLb file.
Parameters
----------
path: :class:`str`
The path to test
Returns
-------
:class:`bool`:
Whether or not the file is a mzMLb file.
"""
try:
import h5py
if mzmlb is None:
raise impl_import_err
except ImportError:
return False
try:
source = h5py.File(path, 'r')
source['mzML']
return True
except KeyError:
return False
def infer_reader(path):
"""If the file referenced by ``path`` is a mzMLb
file, return the callable (:class:`MzMLbLoader`) to
open it, otherwise raise an exception.
Parameters
----------
path: :class:`str`
The path to test
Returns
-------
:class:`type`:
The type to use to open the file
Raises
------
:class:`ValueError`:
If the file is not a mzMLb file
"""
if is_mzmlb_file(path):
return MzMLbLoader
raise ValueError("Not mzMLb File")
def determine_if_available():
"""Checks whether or not the mzMLb HDF5-based
file reading feature is available.
Returns
-------
:class:`bool`:
Whether or not the feature is enabled.
"""
try:
import h5py
if mzmlb is None:
raise ImportError('pyteomics.mzmlb')
return True
except (OSError, ImportError):
return False