Source code for pyproteome.camv
"""
This module provides functionality for interacting with CAMV.
Currently limited to importing and outputing scan lists.
"""
from __future__ import absolute_import, division
import logging
import os
import pandas as pd
import pyproteome as pyp
try:
FileNotFoundError
except NameError:
FileNotFoundError = (IOError, OSError)
LOGGER = logging.getLogger("pyproteome.camv")
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
CAMV_PATH = pyp.utils.which("CAMV.exe")
if CAMV_PATH is None:
CAMV_PATH = os.path.abspath(
os.path.join(
THIS_DIR, "..",
"CAMV", "CAMV", "for_redistribution_files_only", "CAMV.exe",
)
)
[docs]def load_camv_validation(basename):
"""
Load validation data produced by CAMV.
Parameters
----------
basename : str
Returns
-------
accepted : :class:`pandas.DataFrame`
maybed : :class:`pandas.DataFrame`
rejected : :class:`pandas.DataFrame`
"""
accepted = None
maybed = None
rejected = None
def _try_open_xls(path, existing=None):
try:
df = pd.read_csv(path, sep="\t")
except OSError:
return existing
else:
LOGGER.info(
"Loading CAMV validation data from \"{}\"".format(
os.path.join(*path.split(os.sep)[-2:]),
)
)
in_name = os.path.splitext(os.path.basename(path))[0]
df["Scan Paths"] = pd.Series(
[set([in_name])] * len(df.index)
)
if existing is not None:
df = pd.concat([existing, df])
return df
try:
files = os.listdir(pyp.paths.CAMV_OUT_DIR)
except FileNotFoundError:
return accepted, maybed, rejected
for filename in files:
if filename.startswith(basename):
base_dir = os.path.join(pyp.paths.CAMV_OUT_DIR, filename)
accept_path = os.path.join(base_dir, "accept.xls")
maybe_path = os.path.join(base_dir, "maybe.xls")
reject_path = os.path.join(base_dir, "reject.xls")
accepted = _try_open_xls(accept_path, existing=accepted)
maybed = _try_open_xls(maybe_path, existing=maybed)
rejected = _try_open_xls(reject_path, existing=rejected)
return accepted, maybed, rejected