Module mfold_library

Expand source code
from glob import glob
import os
import re
import string
import subprocess

class Strand:
    """
    A class representing a strand of DNA.
    """
    allowed_bases = set('ATCG')
    allowed_constraints = set(string.ascii_letters + string.digits)
    base_pair = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}

    def __init__(self, bases, constraints):
        """
        Args:
            bases: A string representing the bases in a strand
            constraints: A list of Regions representing the structure of a strand
        Raises:
            TypeError: The listed bases are not valid.
        """
        self.bases = bases.upper()
        self.constraints = constraints
        if set(self.bases) > Strand.allowed_bases:
            raise TypeError('The selected bases contain letters '
                          + 'other than A, T, C, and G: ' + bases)
        if set(self.constraints) > Strand.allowed_constraints:
            raise TypeError('The selected constraints contain '
                          + 'non-alphanumeric characters: ' + constraints)

    @staticmethod
    def complement(bases):
        """
        Returns the complement of a string of bases.
        Args:
            bases: The string of bases to find the complement of.
        Returns:
            A string representing the complement.
        """
        return "".join([Strand.base_pair[base] for base in bases])

class Region:
    """
    A class representing a region of DNA on a strand. The structure of a Strand is represented as a list of Regions.
    """
    def __init__(self, name, length):
        """
        Args:
            name: A string representing the name of the region. It should either be all uppercase or all lowercase.
            length: The number of bases in the region.
        """
        # the name that represents the region, e.g. 'A3' -> 'A'
        self.name = name
        # the length of the region
        self.length = length

    def __repr__(self):
        return f"Region('{self.name}', {self.length})"

class Mfold:
    """
    Interface to Mfold software.
    """
    energy_string = ' dG = '
    linker_sequence = 'LLL'
    output_suffixes = ['.aux', '.cmd', '.con', '.log', '.pnt', '.sav', '.seq', '.ss',
            '-local.pnt', '-local.seq', '.ann', '.ct', '.ps', '.det', '.out',
            '.h-num', '.plot', '.pdf', '.ss-count', '-temp.det', '-temp.out']


    def __init__(self, output_folder='', mfold_command=''):
        self.folder = output_folder
        self.command = mfold_command


    def run(self, strand1, strand2, sequence_file='a.seq', settings_file='a.aux'):
        seq_path = os.path.join(self.folder, sequence_file)
        set_path = os.path.join(self.folder, settings_file)

        with open(seq_path, 'w') as seqfile:
            seqfile.write(strand1.bases + Mfold.linker_sequence + strand2.bases)
        with open(set_path, 'w') as setfile:
            for constraint in Mfold.get_constraints(strand1, strand2):
                setfile.write(constraint)

        subprocess.run([self.command, f'SEQ={seq_path}', f'AUX={set_path}'],
                cwd=self.folder)

    def clean_all(self):
        for suffix in Mfold.output_suffixes:
            for file in glob(f'{self.folder}/*{suffix}'):
                os.remove(file)

    def clean(self, file_prefix):
        for suffix in Mfold.output_suffixes:
            file_path = os.path.join(self.folder, f'{file_prefix}{suffix}')
            for file in glob(file_path):
                os.remove(file)

    def get_energy(self, details_file='a.det'):
        details_path = os.path.join(self.folder, details_file)
        if os.path.exists(details_path):
           with open(details_path, 'r') as detfile:
                for line in detfile:
                    if line.startswith(Mfold.energy_string):
                        return float(line[len(Mfold.energy_string):])
        return 0

    def get_constraints(strand1, strand2):
        constraints = []
        all_regions = {}

        curr_index = 0
        for region in strand1.constraints:
            all_regions[region.name] = (curr_index + 1, curr_index + region.length)
            curr_index += region.length

        curr_index += 3
        for region in strand2.constraints:
            all_regions[region.name] = (curr_index + 1, curr_index + region.length)
            curr_index += region.length


        for region in all_regions:
            if region.isupper() and region.lower() in all_regions:
                constraints.append(
                        f'P {all_regions[region.lower()][0]}-{all_regions[region.lower()][1]} '
                        + f'{all_regions[region][0]}-{all_regions[region][1]}')
        return constraints

class EnergyMatrix:
    """
    The matrix of interaction energies between a list of Strands.
    """
    def __init__(self, mfold, strands):
        self.mfold = mfold
        self.strands = strands
        self.matrix = [[None for strand1 in strands] for strand2 in strands]

    def create(self):
        for i, strand1 in enumerate(self.strands):
            for j, strand2 in enumerate(self.strands):
                self.mfold.clean_all()
                self.mfold.run(strand1, strand2, f'{i}_{j}.seq', f'{i}_{j}.aux')
                self.matrix[i][j] = self.mfold.get_energy(f'{i}_{j}.det')

Classes

class EnergyMatrix (mfold, strands)

The matrix of interaction energies between a list of Strands.

Expand source code
class EnergyMatrix:
    """
    The matrix of interaction energies between a list of Strands.
    """
    def __init__(self, mfold, strands):
        self.mfold = mfold
        self.strands = strands
        self.matrix = [[None for strand1 in strands] for strand2 in strands]

    def create(self):
        for i, strand1 in enumerate(self.strands):
            for j, strand2 in enumerate(self.strands):
                self.mfold.clean_all()
                self.mfold.run(strand1, strand2, f'{i}_{j}.seq', f'{i}_{j}.aux')
                self.matrix[i][j] = self.mfold.get_energy(f'{i}_{j}.det')

Methods

def create(self)
Expand source code
def create(self):
    for i, strand1 in enumerate(self.strands):
        for j, strand2 in enumerate(self.strands):
            self.mfold.clean_all()
            self.mfold.run(strand1, strand2, f'{i}_{j}.seq', f'{i}_{j}.aux')
            self.matrix[i][j] = self.mfold.get_energy(f'{i}_{j}.det')
class Mfold (output_folder='', mfold_command='')

Interface to Mfold software.

Expand source code
class Mfold:
    """
    Interface to Mfold software.
    """
    energy_string = ' dG = '
    linker_sequence = 'LLL'
    output_suffixes = ['.aux', '.cmd', '.con', '.log', '.pnt', '.sav', '.seq', '.ss',
            '-local.pnt', '-local.seq', '.ann', '.ct', '.ps', '.det', '.out',
            '.h-num', '.plot', '.pdf', '.ss-count', '-temp.det', '-temp.out']


    def __init__(self, output_folder='', mfold_command=''):
        self.folder = output_folder
        self.command = mfold_command


    def run(self, strand1, strand2, sequence_file='a.seq', settings_file='a.aux'):
        seq_path = os.path.join(self.folder, sequence_file)
        set_path = os.path.join(self.folder, settings_file)

        with open(seq_path, 'w') as seqfile:
            seqfile.write(strand1.bases + Mfold.linker_sequence + strand2.bases)
        with open(set_path, 'w') as setfile:
            for constraint in Mfold.get_constraints(strand1, strand2):
                setfile.write(constraint)

        subprocess.run([self.command, f'SEQ={seq_path}', f'AUX={set_path}'],
                cwd=self.folder)

    def clean_all(self):
        for suffix in Mfold.output_suffixes:
            for file in glob(f'{self.folder}/*{suffix}'):
                os.remove(file)

    def clean(self, file_prefix):
        for suffix in Mfold.output_suffixes:
            file_path = os.path.join(self.folder, f'{file_prefix}{suffix}')
            for file in glob(file_path):
                os.remove(file)

    def get_energy(self, details_file='a.det'):
        details_path = os.path.join(self.folder, details_file)
        if os.path.exists(details_path):
           with open(details_path, 'r') as detfile:
                for line in detfile:
                    if line.startswith(Mfold.energy_string):
                        return float(line[len(Mfold.energy_string):])
        return 0

    def get_constraints(strand1, strand2):
        constraints = []
        all_regions = {}

        curr_index = 0
        for region in strand1.constraints:
            all_regions[region.name] = (curr_index + 1, curr_index + region.length)
            curr_index += region.length

        curr_index += 3
        for region in strand2.constraints:
            all_regions[region.name] = (curr_index + 1, curr_index + region.length)
            curr_index += region.length


        for region in all_regions:
            if region.isupper() and region.lower() in all_regions:
                constraints.append(
                        f'P {all_regions[region.lower()][0]}-{all_regions[region.lower()][1]} '
                        + f'{all_regions[region][0]}-{all_regions[region][1]}')
        return constraints

Class variables

var energy_string

str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str

Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.str() (if defined) or repr(object). encoding defaults to sys.getdefaultencoding(). errors defaults to 'strict'.

var linker_sequence

str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str

Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.str() (if defined) or repr(object). encoding defaults to sys.getdefaultencoding(). errors defaults to 'strict'.

var output_suffixes

list() -> new empty list list(iterable) -> new list initialized from iterable's items

Methods

def clean(self, file_prefix)
Expand source code
def clean(self, file_prefix):
    for suffix in Mfold.output_suffixes:
        file_path = os.path.join(self.folder, f'{file_prefix}{suffix}')
        for file in glob(file_path):
            os.remove(file)
def clean_all(self)
Expand source code
def clean_all(self):
    for suffix in Mfold.output_suffixes:
        for file in glob(f'{self.folder}/*{suffix}'):
            os.remove(file)
def get_constraints(strand1, strand2)
Expand source code
def get_constraints(strand1, strand2):
    constraints = []
    all_regions = {}

    curr_index = 0
    for region in strand1.constraints:
        all_regions[region.name] = (curr_index + 1, curr_index + region.length)
        curr_index += region.length

    curr_index += 3
    for region in strand2.constraints:
        all_regions[region.name] = (curr_index + 1, curr_index + region.length)
        curr_index += region.length


    for region in all_regions:
        if region.isupper() and region.lower() in all_regions:
            constraints.append(
                    f'P {all_regions[region.lower()][0]}-{all_regions[region.lower()][1]} '
                    + f'{all_regions[region][0]}-{all_regions[region][1]}')
    return constraints
def get_energy(self, details_file='a.det')
Expand source code
def get_energy(self, details_file='a.det'):
    details_path = os.path.join(self.folder, details_file)
    if os.path.exists(details_path):
       with open(details_path, 'r') as detfile:
            for line in detfile:
                if line.startswith(Mfold.energy_string):
                    return float(line[len(Mfold.energy_string):])
    return 0
def run(self, strand1, strand2, sequence_file='a.seq', settings_file='a.aux')
Expand source code
def run(self, strand1, strand2, sequence_file='a.seq', settings_file='a.aux'):
    seq_path = os.path.join(self.folder, sequence_file)
    set_path = os.path.join(self.folder, settings_file)

    with open(seq_path, 'w') as seqfile:
        seqfile.write(strand1.bases + Mfold.linker_sequence + strand2.bases)
    with open(set_path, 'w') as setfile:
        for constraint in Mfold.get_constraints(strand1, strand2):
            setfile.write(constraint)

    subprocess.run([self.command, f'SEQ={seq_path}', f'AUX={set_path}'],
            cwd=self.folder)
class Region (name, length)

A class representing a region of DNA on a strand. The structure of a Strand is represented as a list of Regions.

Args

name
A string representing the name of the region. It should either be all uppercase or all lowercase.
length
The number of bases in the region.
Expand source code
class Region:
    """
    A class representing a region of DNA on a strand. The structure of a Strand is represented as a list of Regions.
    """
    def __init__(self, name, length):
        """
        Args:
            name: A string representing the name of the region. It should either be all uppercase or all lowercase.
            length: The number of bases in the region.
        """
        # the name that represents the region, e.g. 'A3' -> 'A'
        self.name = name
        # the length of the region
        self.length = length

    def __repr__(self):
        return f"Region('{self.name}', {self.length})"
class Strand (bases, constraints)

A class representing a strand of DNA.

Args

bases
A string representing the bases in a strand
constraints
A list of Regions representing the structure of a strand

Raises

TypeError
The listed bases are not valid.
Expand source code
class Strand:
    """
    A class representing a strand of DNA.
    """
    allowed_bases = set('ATCG')
    allowed_constraints = set(string.ascii_letters + string.digits)
    base_pair = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}

    def __init__(self, bases, constraints):
        """
        Args:
            bases: A string representing the bases in a strand
            constraints: A list of Regions representing the structure of a strand
        Raises:
            TypeError: The listed bases are not valid.
        """
        self.bases = bases.upper()
        self.constraints = constraints
        if set(self.bases) > Strand.allowed_bases:
            raise TypeError('The selected bases contain letters '
                          + 'other than A, T, C, and G: ' + bases)
        if set(self.constraints) > Strand.allowed_constraints:
            raise TypeError('The selected constraints contain '
                          + 'non-alphanumeric characters: ' + constraints)

    @staticmethod
    def complement(bases):
        """
        Returns the complement of a string of bases.
        Args:
            bases: The string of bases to find the complement of.
        Returns:
            A string representing the complement.
        """
        return "".join([Strand.base_pair[base] for base in bases])

Class variables

var allowed_bases

set() -> new empty set object set(iterable) -> new set object

Build an unordered collection of unique elements.

var allowed_constraints

set() -> new empty set object set(iterable) -> new set object

Build an unordered collection of unique elements.

var base_pair

dict() -> new empty dictionary dict(mapping) -> new dictionary initialized from a mapping object's (key, value) pairs dict(iterable) -> new dictionary initialized as if via: d = {} for k, v in iterable: d[k] = v dict(**kwargs) -> new dictionary initialized with the name=value pairs in the keyword argument list. For example: dict(one=1, two=2)

Static methods

def complement(bases)

Returns the complement of a string of bases.

Args

bases
The string of bases to find the complement of.

Returns

A string representing the complement.

Expand source code
@staticmethod
def complement(bases):
    """
    Returns the complement of a string of bases.
    Args:
        bases: The string of bases to find the complement of.
    Returns:
        A string representing the complement.
    """
    return "".join([Strand.base_pair[base] for base in bases])