'''
Glyconnect
----------
A simple dialect of the Glyconnect/GlycoMod glycan composition notation.
'''
import re
from dataclasses import dataclass, field
from typing import Dict, Union, List, Optional, Type, Generic, TypeVar
from glypy.structure.glycan_composition import (
FrozenGlycanComposition,
FrozenMonosaccharideResidue,
SubstituentResidue)
from glypy.structure.glycan import Glycan
try:
import requests
except ImportError:
requests = None
#: The set of defined symbols and their mappings.
defined_symbols: Dict[str, Union[SubstituentResidue, FrozenMonosaccharideResidue]] = {
"Hex": FrozenMonosaccharideResidue.from_iupac_lite("Hex"),
"HexNAc": FrozenMonosaccharideResidue.from_iupac_lite('HexNAc'),
"dHex": FrozenMonosaccharideResidue.from_iupac_lite('dHex'),
"NeuAc": FrozenMonosaccharideResidue.from_iupac_lite("NeuAc"),
"NeuGc": FrozenMonosaccharideResidue.from_iupac_lite("NeuGc"),
"S": SubstituentResidue("sulfate"),
"Su": SubstituentResidue("sulfate"),
"Sulpho": SubstituentResidue("sulfate"),
"P": SubstituentResidue("phosphate"),
"Ph": SubstituentResidue("phosphate"),
"Phospho": SubstituentResidue("phosphate"),
"Xyl": FrozenMonosaccharideResidue.from_iupac_lite("Xyl"),
"HexA": FrozenMonosaccharideResidue.from_iupac_lite("HexA"),
"Pent": FrozenMonosaccharideResidue.from_iupac_lite("Pen"),
"Kdn": FrozenMonosaccharideResidue.from_iupac_lite("Kdn"),
}
def _invert_mapping(table: Dict[str, Union[SubstituentResidue,
FrozenMonosaccharideResidue]]) -> Dict[Union[SubstituentResidue,
FrozenMonosaccharideResidue],
str]:
inverted = {}
for k, v in table.items():
if v in inverted:
if len(k) > len(inverted[v]):
continue
inverted[v] = k
return inverted
monosaccharide_to_symbol = _invert_mapping(defined_symbols)
def _generate_pattern(symbols: List[str]) -> re.Pattern:
symbols = sorted(symbols, key=len, reverse=True)
return re.compile(f"({'|'.join(symbols)})(\d+?)")
tokenizer = re.compile(r"([^:\s]+):(\d+)")
undelimited_tokenizer = _generate_pattern(defined_symbols)
[docs]
def loads(string):
'''Parse a GlyConnect glycan composition into a :class:`~.FrozenGlycanComposition`
Parameters
----------
string: str
The string to parse
Returns
-------
:class:`~.FrozenGlycanComposition`
Raises
------
:class:`KeyError`: Raised if a key isn't defined by the GlyConnect dialect
'''
tokens = tokenizer.findall(string)
if not tokens:
tokens = undelimited_tokenizer.findall(string)
gc = FrozenGlycanComposition()
for mono, count in tokens:
mono = defined_symbols[mono]
count = int(count)
gc[mono] += count
return gc
[docs]
def dumps(composition):
'''Encode :class:`~.GlycanComposition` or :class:`~.Glycan` into the GlyConnect
glycan composition text format.
Parameters
----------
composition: :class:`~.GlycanComposition` or :class:`~.Glycan`
The structure to format
Returns
-------
:class:`str`
Raises
------
:class:`KeyError`: Raised if a key isn't defined by the GlyConnect Compozitor dialect
'''
if isinstance(composition, Glycan):
composition = FrozenGlycanComposition.from_glycan(composition)
tokens = []
for key, value in composition.items():
key = monosaccharide_to_symbol[key]
tokens.append("%s:%d" % (key, value))
return ' '.join(tokens)
API_SERVER = "https://glyconnect.expasy.org/api"
def from_glytoucan_id(glytoucan_id):
response = requests.post(
f"{API_SERVER}/structures/search/glytoucan",
data={"glytoucan_id": glytoucan_id})
response.raise_for_status()
data = response.json()
return data
@dataclass
class RecordBase:
@classmethod
def from_dict(cls, data):
return cls(**data)
@dataclass
class TaxonomyRecord(RecordBase):
id: int
taxonomy_id: str
common_name: Optional[str] = None
species: Optional[str] = None
@dataclass
class UniprotProteinAccessionRecord(RecordBase):
uniprot_acc: str
uniprot_id: Optional[str] = None
glygen: Optional[str] = None
nextprot: Optional[str] = None
genecards: Optional[str] = None
glycodomain: Optional[str] = None
@dataclass
class ProteinRecord(RecordBase):
id: int
name: str
taxonomy: TaxonomyRecord
uniprots: List[UniprotProteinAccessionRecord]
@classmethod
def from_dict(cls, data: dict):
tax = data.get("taxonomy")
if tax:
tax = TaxonomyRecord.from_dict(tax)
uniprots = list(map(UniprotProteinAccessionRecord.from_dict,
data.get("uniprots", [])))
return cls(data['id'], data['name'], tax, uniprots)
@dataclass
class SourceRecord(RecordBase):
type: str
name: str
id: int
ref: Optional[str] = None
ontology: Optional[str] = None
brenda_id: Optional[str] = None
@dataclass
class Source(RecordBase):
source: List[SourceRecord]
taxons: List[TaxonomyRecord]
@classmethod
def from_dict(cls, data: dict):
source = [SourceRecord.from_dict(x) for x in data.get("source", [])]
taxons = [TaxonomyRecord.from_dict(x) for x in data.get("taxons", [])]
return cls(source, taxons)
@dataclass
class CellLine(RecordBase):
cellosaurus_id: str
id: int
is_problematic: bool
name: str
@dataclass
class Disease:
id: int
name: str
do_id: Optional[str] = None
taxons: List[TaxonomyRecord] = field(default_factory=list)
@classmethod
def from_dict(cls, data: dict):
taxons = [TaxonomyRecord.from_dict(x) for x in data.get("taxons", [])]
return cls(data['id'], data['name'], data.get('do_id'), taxons)
@dataclass
class CompositionRecord(RecordBase):
format_byonic: str
format_condensed: str
format_glyconnect: str
format_numeric: str
id: int
mass: float
mass_monoisotopic: float
reviewed: bool
glytoucan_id: Optional[str] = None
def parse(self):
return loads(self.format_glyconnect)
@dataclass
class StructureRecord(RecordBase):
glycan_core: str
glycan_type: str
has_image: bool
id: int
is_undefined: bool
reviewed: bool
glytoucan_id: Optional[str] = None
@dataclass
class CompozitorGlycan(RecordBase):
composition: CompositionRecord
structure: StructureRecord
taxonomy: Optional[TaxonomyRecord]
protein: Optional[ProteinRecord]
@classmethod
def from_dict(cls, data: dict):
comp = CompositionRecord.from_dict(data['composition'])
struct = StructureRecord.from_dict(data['structure'])
protein = ProteinRecord.from_dict(data['protein'])
taxonomy = TaxonomyRecord.from_dict(data['taxonomy'])
return cls(comp, struct, protein, taxonomy)
T = TypeVar("T", bound=RecordBase)
@dataclass
class APICollectionProperty(Generic[T]):
url: str
record_type: Type[T]
def __get__(self, obj, objtype=None) -> List[T]:
if obj is None:
return self
result = obj._cache.get(self.url)
if result is not None:
return result
resp = requests.get(self.url)
resp.raise_for_status()
data = resp.json()
result = [self.record_type.from_dict(d) for d in data]
obj._cache[self.url] = result
return result
def __delete__(self, obj):
del obj._cache[self.url]
@dataclass
class Compozitor:
_cache: dict = field(default_factory=dict, repr=False)
proteins = APICollectionProperty(
f"{API_SERVER}/proteins-all",
ProteinRecord
)
sources = APICollectionProperty(
f"{API_SERVER}/sources-all",
Source
)
cell_lines = APICollectionProperty(
f"{API_SERVER}/cell_lines-all",
CellLine
)
diseases = APICollectionProperty(
f"{API_SERVER}/diseases-all",
Disease
)
def query(self, taxonomy: Optional[str]=None, cell_line: Optional[str]=None,
protein: Optional[str]=None, disease: Optional[str]=None):
params = {}
if taxonomy:
params['taxonomy'] = (taxonomy)
if cell_line:
params['cell_line'] = (cell_line)
if protein:
params['protein'] = (protein)
if disease:
params['disease'] = (disease)
resp = requests.get(f"{API_SERVER}/glycosylations", params)
resp.raise_for_status()
data = resp.json()
if isinstance(data, list):
raise ValueError("Malformed query or invalid response")
results = []
if data['results']:
for res in data['results']:
results.append(CompozitorGlycan.from_dict(res))
return results
client = Compozitor()
query = client.query