A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from https://physcraper.readthedocs.io/en/main/_modules/physcraper/configobj.html below:

physcraper.configobj — Physcraper 0.1 documentation

"""
Physcraper run Configuration object generator
"""
# Disabling attributes defined outside init bc they are defined in functions called in init
# pylint: disable=attribute-defined-outside-init

import sys
import os
import datetime
import configparser
import shutil
import wget


_DEBUG = 0




PHYSCRAPER_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
#sys.stdout.write(PHYSCRAPER_DIR)


[docs]def is_number(inputstr):
    """Test if string can be coerced to float"""
    try:
        float(inputstr)
        return True
    except ValueError:
        return False



[docs]class ConfigObj():
    """
    To build the class the following is needed:

      * **configfi**: a configuration file in a specific format, e.g. to read in self.e_value_thresh.

    During the initializing process the following self objects are generated:

      * **self.e_value_thresh**: the defined threshold for the e-value during Blast searches,
            check out: https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=FAQ
      * **self.hitlist_size**: the maximum number of sequences retrieved by a single blast search
      * **self.minlen**: value from 0 to 1. Defines how much shorter new seq can be compared to input
      * **self.trim_perc**: value that determines how many seq need to be present before the beginning
            and end of alignment will be trimmed
      * **self.maxlen**: max length for values to add to aln
      * **self.get_ncbi_taxonomy**: Path to sh file doing something...
      * **self.ott_ncbi**: file containing OTT id, ncbi and taxon name (??)
      * **self.email**: email address used for blast queries
      * **self.blast_loc**: defines which blasting method to use:

          * either web-query (=remote)
          * from a local blast database (=local)
      * **self.num_threads**: number of cores to be used during a run
      * **self.url_base**:

          * if blastloc == remote: it defines the url for the blast queries.
          * if blastloc == local: url_base = None
      * **self.delay**: defines when to reblast sequences in days
      * **optional self.objects**:

          * if blastloc == local:

              * self.blastdb: this defines the path to the local blast database
              * self.ncbi_nodes: path to 'nodes.dmp' file, that contains the hierarchical information
              * self.ncbi_names: path to 'names.dmp' file, that contains the different ID's
    """

    def __init__(self, configfile=None, run = True):
       # debug(configfi)
        if _DEBUG:
            sys.stdout.write("Building config object\n")
        self.run = run
        if configfile:
            self.set_defaults()
            self.read_config(configfile)
        else:
            sys.stdout.write("No config file, using defaults\n")
            self.set_defaults()
[docs]    def set_defaults(self):
        """ In the absence of an input configuration file, sets default values.
        """
        self.email = None
        self.e_value_thresh = 0.00001
        self.hitlist_size = 10
        self.blast_loc = 'remote'
        self.url_base = None
        self.blastdb = None
        self.num_threads = 4
        self.delay = 90
        self.spp_threshold = 5
        self.minlen = 0.8
        self.api_key = None
        self.maxlen = 1.2
        self.taxonomy_dir = ''
        self.check_taxonomy()
[docs]    def config_str(self):
        """ Write out the current config values.
        DOES NOT INCUDE SOME HIDDEN CONFIGUREABLE ATTRIBUTES
        """
        config_text = '''[blast]
Entrez.email = {email}
e_value_thresh = {e_val}
hitlist_size = {hls}
location = {bl}
localblastdb = {db}
num_threads = {nt}
delay = {delay}
[physcraper]
spp_threshold = {sppt}
min_length = {perc}
max_length = {maxlen}
'''.format(
    email=self.email,
    e_val=self.e_value_thresh,
    hls=self.hitlist_size,
    bl=self.blast_loc,
    db=self.blastdb,
    nt=self.num_threads,
    delay=self.delay,
    sppt=self.spp_threshold,
    perc=self.minlen,
    maxlen=self.maxlen)
        return config_text

[docs]    def check_taxonomy(self):
        """Locates a taxonomy directory in tthe phyysraper repo,
        or if not avail (often because module was pip installed),
        genertes one.
        """
        if os.path.exists(self.taxonomy_dir):
            pass
        elif os.path.exists("{}/taxonomy".format(PHYSCRAPER_DIR)):
            self.taxonomy_dir = "{}/taxonomy".format(PHYSCRAPER_DIR)
        else:
            if not os.path.exists('taxonomy'):
                os.mkdir("taxonomy")
            self.taxonomy_dir = os.path.abspath('taxonomy')
            sys.stdout.write("Using {} as taxonomy dir.".format(self.taxonomy_dir))
        self.ott_ncbi = "{}/ott_ncbi".format(self.taxonomy_dir)
        if not os.path.exists(self.ott_ncbi):
            sys.stdout.write(
                "downloading taxonomy from \
                https://raw.githubusercontent.com/McTavishLab/physcraper/main/taxonomy/ott_ncbi")
            wget.download(
                'https://raw.githubusercontent.com/McTavishLab/physcraper/main/taxonomy/ott_ncbi',
                out=self.taxonomy_dir)
        assert os.path.isfile(self.ott_ncbi), (
            "file `%s` does not exist" % self.ott_ncbi
            )

[docs]    def write_file(self, direc, filename="run.config"):
        """ writes config params to file
            * **direc**: path to write file
            * **filename**: filename to use. Default = run.config
        """
        config_text = self.config_str()
        fi = open("{}/{}".format(direc, filename), "w")
        fi.write(config_text)
        fi.close()

[docs]    def read_config(self, configfi):
        """ Reads configfile, and sets configuration params.
        any params not listed will be set to dafault values in set_default()
        * **configfile**: path to input file.
        """
        assert os.path.isfile(configfi), "file `%s` does not exist" % configfi
        config = configparser.ConfigParser()
        self.configfi = configfi
        config.read_file(open(configfi))

        # read in blast settings
        self.email = config["blast"].get("Entrez.email")
        #if not "@" in self.email:
            #sys.stderr.write(
            #    "your email `%s` does not have an @ sign. NCBI blast requests an email address.\n" % self.email)
        if config["blast"].get("Entrez.api_key"):
            self.api_key = config["blast"]["Entrez.api_key"]
            if self.api_key == 'None':
                self.api_key = None
        else:
            self.api_key = None
        self.e_value_thresh = config["blast"]["e_value_thresh"]
        assert is_number(self.e_value_thresh), (
            "value `%s` does not exists" % self.e_value_thresh
            )
        self.hitlist_size = int(config["blast"]["hitlist_size"])
        assert is_number(self.hitlist_size), (
            "value `%s`is not a number" % self.e_value_thresh
            )

        # read in settings for internal Physcraper processes
        if "taxonomy_path" in config["physcraper"].keys():
            self.taxonomy_dir = config["physcraper"]["taxonomy_path"]
        self.check_taxonomy()
        self.blast_loc = config["blast"]["location"]
        assert self.blast_loc in ["local", "remote"], (
            "your blast location `%s` is not remote or local" % self.email
        )
        if self.blast_loc == "local":
            self.blastdb = config["blast"]["localblastdb"]
            self.set_local()
        if self.blast_loc == "remote":
            self.url_base = config["blast"].get("url_base")
            if self.url_base == 'None':
                self.url_base = None
        if _DEBUG:
            sys.stdout.write("{}\n".format(self.email))
            #if self.blast_loc == "remote":
            #    sys.stdout.write("url base = {}\n".format(self.url_base))
            sys.stdout.write("{}\n".format(self.blast_loc))
            if self.blast_loc == "local":
                sys.stdout.write("local blast db {}\n".format(self.blastdb))
        self.num_threads = config["blast"].get("num_threads")
       # print("slurm threads")
       # print(os.environ.get('SLURM_JOB_CPUS_PER_NODE'))
        if os.environ.get('SLURM_JOB_CPUS_PER_NODE'):
            self.num_threads = int(os.environ.get('SLURM_JOB_CPUS_PER_NODE'))
        self.delay = int(config["blast"]["delay"])
        assert is_number(self.delay), (
            "value `%s`is not a number" % self.delay
        )
        # #############
        # read in physcraper settings
        self.minlen = float(config["physcraper"]["min_length"])
        assert 0 < self.minlen <= 1, (
            "value `%s` is not between 0 and 1" % self.minlen
        )
        self.spp_threshold = int(config["physcraper"]["spp_threshold"])
        self.maxlen = float(config["physcraper"]["max_length"])
        assert self.maxlen > 1, (
            "value `%s` is not larger than 1" % self.maxlen
        )

[docs]    def set_local(self):
        """ Checks that all appropriate files etc are in place for local blast db.
        """
        if not self.run:
            return
        self.blast_loc = "local"
        self.ncbi_nodes = "{}/nodes.dmp".format(self.taxonomy_dir)
        self.ncbi_names = "{}/names.dmp".format(self.taxonomy_dir)
        assert(self.blastdb), "No blast db location set"
        if not os.path.isdir(self.blastdb):
            sys.stderr.write(
                "Local Blast DB not found at {},\
                please use a remote search, or update as described\
                in 'https://physcraper.readthedocs.io/en/main/install.html#local-databases'\n".format(self.blastdb))
            sys.exit()
        if not os.path.exists("{}/nt.23.nhr".format(self.blastdb)):
            sys.stderr.write("Errors with local Blast DB at {}, \
                may be incomplete. please use a remote search, \
                or update as described in \
                'https://physcraper.readthedocs.io/en/main/install.html#local-databases'\n".format(self.blastdb))
            sys.exit()
        else:
            download_date = os.path.getmtime("{}/nt.23.nhr".format(self.blastdb))
            download_date = datetime.datetime.fromtimestamp(download_date)
            today = datetime.datetime.now()
            time_passed = (today - download_date).days
            if time_passed >= 90:
                sys.stderr.write("Your databases might not be up to date anymore. \
                    You downloaded them {} days ago. \
                    Continuing, but perhaps use a remote search, \
                    or update as decribed in \
                    'https://physcraper.readthedocs.io/en/main/install.html#local-databases'\n".format(time_passed))
        if not os.path.exists(self.ncbi_nodes):
            sys.stderr.write("NCBI taxonomy not found at {} - \
             To perform a local blast search, please update nodes and names.dmp, \
                as described in \
                'https://physcraper.readthedocs.io/en/main/install.html#local-databases'\n".format(self.ncbi_nodes))

            sys.exit()
        else:
            download_date = os.path.getmtime(self.ncbi_nodes)
            download_date = datetime.datetime.fromtimestamp(download_date)
            today = datetime.datetime.now()
            time_passed = (today - download_date).days
            if time_passed >= 90:
                sys.stderr.write("Your taxonomy databases from NCBI were dowloaded {} days ago. \
                    To perform a local blast search, please update nodes and names.dmp, \
                    as described in \
                    'https://physcraper.readthedocs.io/en/main/install.html#local-databases'\n".format(time_passed))
        assert(shutil.which("blastn")), "blastn  not found in path"
        self.url_base = None

RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4