aboutsummaryrefslogtreecommitdiff
path: root/plip/exchange/webservices.py
blob: 61cb6f5f2d1c661ae5ad2117fa8a44781a3aeff1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import sys
from urllib.error import HTTPError
from urllib.request import urlopen

import lxml.etree as et

from plip.basic import logger

logger = logger.get_logger()


def check_pdb_status(pdbid):
    """Returns the status and up-to-date entry in the PDB for a given PDB ID"""
    url = "http://www.rcsb.org/pdb/rest/idStatus?structureId=%s" % pdbid
    xmlf = urlopen(url)
    xml = et.parse(xmlf)
    xmlf.close()
    status = None
    current_pdbid = pdbid
    for df in xml.xpath("//record"):
        status = df.attrib[
            "status"
        ]  # Status of an entry can be either 'UNKWOWN', 'OBSOLETE', or 'CURRENT'
        if status == "OBSOLETE":
            current_pdbid = df.attrib[
                "replacedBy"
            ]  # Contains the up-to-date PDB ID for obsolete entries
    return [status, current_pdbid.lower()]


def fetch_pdb(pdbid):
    """Get the newest entry from the RCSB server for the given PDB ID. Exits with '1' if PDB ID is invalid."""
    pdbid = pdbid.lower()
    logger.info(f"checking status of PDB-ID {pdbid}")
    state, current_entry = check_pdb_status(pdbid)  # Get state and current PDB ID

    if state == "OBSOLETE":
        logger.info(f"entry is obsolete, getting {current_entry} instead")
    elif state == "CURRENT":
        logger.info("entry is up-to-date")
    elif state == "UNKNOWN":
        logger.error("invalid PDB-ID (entry does not exist on PDB server)")
        sys.exit(1)
    logger.info("downloading file from PDB")
    # get URL for current entry
    # @todo needs update to react properly on response codes of RCSB servers
    pdburl = f"http://www.rcsb.org/pdb/files/{current_entry}.pdb"
    try:
        pdbfile = urlopen(pdburl).read().decode()
        # If no PDB file is available, a text is now shown with "We're sorry, but ..."
        # Could previously be distinguished by an HTTP error
        if "sorry" in pdbfile:
            logger.error(
                "no file in PDB format available from wwPDB for the given PDB ID."
            )
            sys.exit(1)
    except HTTPError:
        logger.error("no file in PDB format available from wwPDB for the given PDB ID")
        sys.exit(1)
    return [pdbfile, current_entry]