import sys from urllib.error import HTTPError from urllib.request import urlopen import lxml.etree as et from plip.basic import logger logger = logger.get_logger() def check_pdb_status(pdbid): """Returns the status and up-to-date entry in the PDB for a given PDB ID""" url = 'http://www.rcsb.org/pdb/rest/idStatus?structureId=%s' % pdbid xmlf = urlopen(url) xml = et.parse(xmlf) xmlf.close() status = None current_pdbid = pdbid for df in xml.xpath('//record'): status = df.attrib['status'] # Status of an entry can be either 'UNKWOWN', 'OBSOLETE', or 'CURRENT' if status == 'OBSOLETE': current_pdbid = df.attrib['replacedBy'] # Contains the up-to-date PDB ID for obsolete entries return [status, current_pdbid.lower()] def fetch_pdb(pdbid): """Get the newest entry from the RCSB server for the given PDB ID. Exits with '1' if PDB ID is invalid.""" pdbid = pdbid.lower() logger.info(f'checking status of PDB-ID {pdbid}') state, current_entry = check_pdb_status(pdbid) # Get state and current PDB ID if state == 'OBSOLETE': logger.info(f'entry is obsolete, getting {current_entry} instead') elif state == 'CURRENT': logger.info('entry is up-to-date') elif state == 'UNKNOWN': logger.error('invalid PDB-ID (entry does not exist on PDB server)') sys.exit(1) logger.info('downloading file from PDB') # get URL for current entry # @todo needs update to react properly on response codes of RCSB servers pdburl = f'http://www.rcsb.org/pdb/files/{current_entry}.pdb' try: pdbfile = urlopen(pdburl).read().decode() # If no PDB file is available, a text is now shown with "We're sorry, but ..." # Could previously be distinguished by an HTTP error if 'sorry' in pdbfile: logger.error('no file in PDB format available from wwPDB for the given PDB ID.') sys.exit(1) except HTTPError: logger.error('no file in PDB format available from wwPDB for the given PDB ID') sys.exit(1) return [pdbfile, current_entry]