Module mfold_library
Expand source code
from glob import glob
import os
import re
import string
import subprocess
class Strand:
"""
A class representing a strand of DNA.
"""
allowed_bases = set('ATCG')
allowed_constraints = set(string.ascii_letters + string.digits)
base_pair = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
def __init__(self, bases, constraints):
"""
Args:
bases: A string representing the bases in a strand
constraints: A list of Regions representing the structure of a strand
Raises:
TypeError: The listed bases are not valid.
"""
self.bases = bases.upper()
self.constraints = constraints
if set(self.bases) > Strand.allowed_bases:
raise TypeError('The selected bases contain letters '
+ 'other than A, T, C, and G: ' + bases)
if set(self.constraints) > Strand.allowed_constraints:
raise TypeError('The selected constraints contain '
+ 'non-alphanumeric characters: ' + constraints)
@staticmethod
def complement(bases):
"""
Returns the complement of a string of bases.
Args:
bases: The string of bases to find the complement of.
Returns:
A string representing the complement.
"""
return "".join([Strand.base_pair[base] for base in bases])
class Region:
"""
A class representing a region of DNA on a strand. The structure of a Strand is represented as a list of Regions.
"""
def __init__(self, name, length):
"""
Args:
name: A string representing the name of the region. It should either be all uppercase or all lowercase.
length: The number of bases in the region.
"""
# the name that represents the region, e.g. 'A3' -> 'A'
self.name = name
# the length of the region
self.length = length
def __repr__(self):
return f"Region('{self.name}', {self.length})"
class Mfold:
"""
Interface to Mfold software.
"""
energy_string = ' dG = '
linker_sequence = 'LLL'
output_suffixes = ['.aux', '.cmd', '.con', '.log', '.pnt', '.sav', '.seq', '.ss',
'-local.pnt', '-local.seq', '.ann', '.ct', '.ps', '.det', '.out',
'.h-num', '.plot', '.pdf', '.ss-count', '-temp.det', '-temp.out']
def __init__(self, output_folder='', mfold_command=''):
self.folder = output_folder
self.command = mfold_command
def run(self, strand1, strand2, sequence_file='a.seq', settings_file='a.aux'):
seq_path = os.path.join(self.folder, sequence_file)
set_path = os.path.join(self.folder, settings_file)
with open(seq_path, 'w') as seqfile:
seqfile.write(strand1.bases + Mfold.linker_sequence + strand2.bases)
with open(set_path, 'w') as setfile:
for constraint in Mfold.get_constraints(strand1, strand2):
setfile.write(constraint)
subprocess.run([self.command, f'SEQ={seq_path}', f'AUX={set_path}'],
cwd=self.folder)
def clean_all(self):
for suffix in Mfold.output_suffixes:
for file in glob(f'{self.folder}/*{suffix}'):
os.remove(file)
def clean(self, file_prefix):
for suffix in Mfold.output_suffixes:
file_path = os.path.join(self.folder, f'{file_prefix}{suffix}')
for file in glob(file_path):
os.remove(file)
def get_energy(self, details_file='a.det'):
details_path = os.path.join(self.folder, details_file)
if os.path.exists(details_path):
with open(details_path, 'r') as detfile:
for line in detfile:
if line.startswith(Mfold.energy_string):
return float(line[len(Mfold.energy_string):])
return 0
def get_constraints(strand1, strand2):
constraints = []
all_regions = {}
curr_index = 0
for region in strand1.constraints:
all_regions[region.name] = (curr_index + 1, curr_index + region.length)
curr_index += region.length
curr_index += 3
for region in strand2.constraints:
all_regions[region.name] = (curr_index + 1, curr_index + region.length)
curr_index += region.length
for region in all_regions:
if region.isupper() and region.lower() in all_regions:
constraints.append(
f'P {all_regions[region.lower()][0]}-{all_regions[region.lower()][1]} '
+ f'{all_regions[region][0]}-{all_regions[region][1]}')
return constraints
class EnergyMatrix:
"""
The matrix of interaction energies between a list of Strands.
"""
def __init__(self, mfold, strands):
self.mfold = mfold
self.strands = strands
self.matrix = [[None for strand1 in strands] for strand2 in strands]
def create(self):
for i, strand1 in enumerate(self.strands):
for j, strand2 in enumerate(self.strands):
self.mfold.clean_all()
self.mfold.run(strand1, strand2, f'{i}_{j}.seq', f'{i}_{j}.aux')
self.matrix[i][j] = self.mfold.get_energy(f'{i}_{j}.det')
Classes
class EnergyMatrix (mfold, strands)
-
The matrix of interaction energies between a list of Strands.
Expand source code
class EnergyMatrix: """ The matrix of interaction energies between a list of Strands. """ def __init__(self, mfold, strands): self.mfold = mfold self.strands = strands self.matrix = [[None for strand1 in strands] for strand2 in strands] def create(self): for i, strand1 in enumerate(self.strands): for j, strand2 in enumerate(self.strands): self.mfold.clean_all() self.mfold.run(strand1, strand2, f'{i}_{j}.seq', f'{i}_{j}.aux') self.matrix[i][j] = self.mfold.get_energy(f'{i}_{j}.det')
Methods
def create(self)
-
Expand source code
def create(self): for i, strand1 in enumerate(self.strands): for j, strand2 in enumerate(self.strands): self.mfold.clean_all() self.mfold.run(strand1, strand2, f'{i}_{j}.seq', f'{i}_{j}.aux') self.matrix[i][j] = self.mfold.get_energy(f'{i}_{j}.det')
class Mfold (output_folder='', mfold_command='')
-
Interface to Mfold software.
Expand source code
class Mfold: """ Interface to Mfold software. """ energy_string = ' dG = ' linker_sequence = 'LLL' output_suffixes = ['.aux', '.cmd', '.con', '.log', '.pnt', '.sav', '.seq', '.ss', '-local.pnt', '-local.seq', '.ann', '.ct', '.ps', '.det', '.out', '.h-num', '.plot', '.pdf', '.ss-count', '-temp.det', '-temp.out'] def __init__(self, output_folder='', mfold_command=''): self.folder = output_folder self.command = mfold_command def run(self, strand1, strand2, sequence_file='a.seq', settings_file='a.aux'): seq_path = os.path.join(self.folder, sequence_file) set_path = os.path.join(self.folder, settings_file) with open(seq_path, 'w') as seqfile: seqfile.write(strand1.bases + Mfold.linker_sequence + strand2.bases) with open(set_path, 'w') as setfile: for constraint in Mfold.get_constraints(strand1, strand2): setfile.write(constraint) subprocess.run([self.command, f'SEQ={seq_path}', f'AUX={set_path}'], cwd=self.folder) def clean_all(self): for suffix in Mfold.output_suffixes: for file in glob(f'{self.folder}/*{suffix}'): os.remove(file) def clean(self, file_prefix): for suffix in Mfold.output_suffixes: file_path = os.path.join(self.folder, f'{file_prefix}{suffix}') for file in glob(file_path): os.remove(file) def get_energy(self, details_file='a.det'): details_path = os.path.join(self.folder, details_file) if os.path.exists(details_path): with open(details_path, 'r') as detfile: for line in detfile: if line.startswith(Mfold.energy_string): return float(line[len(Mfold.energy_string):]) return 0 def get_constraints(strand1, strand2): constraints = [] all_regions = {} curr_index = 0 for region in strand1.constraints: all_regions[region.name] = (curr_index + 1, curr_index + region.length) curr_index += region.length curr_index += 3 for region in strand2.constraints: all_regions[region.name] = (curr_index + 1, curr_index + region.length) curr_index += region.length for region in all_regions: if region.isupper() and region.lower() in all_regions: constraints.append( f'P {all_regions[region.lower()][0]}-{all_regions[region.lower()][1]} ' + f'{all_regions[region][0]}-{all_regions[region][1]}') return constraints
Class variables
var energy_string
-
str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str
Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.str() (if defined) or repr(object). encoding defaults to sys.getdefaultencoding(). errors defaults to 'strict'.
var linker_sequence
-
str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str
Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.str() (if defined) or repr(object). encoding defaults to sys.getdefaultencoding(). errors defaults to 'strict'.
var output_suffixes
-
list() -> new empty list list(iterable) -> new list initialized from iterable's items
Methods
def clean(self, file_prefix)
-
Expand source code
def clean(self, file_prefix): for suffix in Mfold.output_suffixes: file_path = os.path.join(self.folder, f'{file_prefix}{suffix}') for file in glob(file_path): os.remove(file)
def clean_all(self)
-
Expand source code
def clean_all(self): for suffix in Mfold.output_suffixes: for file in glob(f'{self.folder}/*{suffix}'): os.remove(file)
def get_constraints(strand1, strand2)
-
Expand source code
def get_constraints(strand1, strand2): constraints = [] all_regions = {} curr_index = 0 for region in strand1.constraints: all_regions[region.name] = (curr_index + 1, curr_index + region.length) curr_index += region.length curr_index += 3 for region in strand2.constraints: all_regions[region.name] = (curr_index + 1, curr_index + region.length) curr_index += region.length for region in all_regions: if region.isupper() and region.lower() in all_regions: constraints.append( f'P {all_regions[region.lower()][0]}-{all_regions[region.lower()][1]} ' + f'{all_regions[region][0]}-{all_regions[region][1]}') return constraints
def get_energy(self, details_file='a.det')
-
Expand source code
def get_energy(self, details_file='a.det'): details_path = os.path.join(self.folder, details_file) if os.path.exists(details_path): with open(details_path, 'r') as detfile: for line in detfile: if line.startswith(Mfold.energy_string): return float(line[len(Mfold.energy_string):]) return 0
def run(self, strand1, strand2, sequence_file='a.seq', settings_file='a.aux')
-
Expand source code
def run(self, strand1, strand2, sequence_file='a.seq', settings_file='a.aux'): seq_path = os.path.join(self.folder, sequence_file) set_path = os.path.join(self.folder, settings_file) with open(seq_path, 'w') as seqfile: seqfile.write(strand1.bases + Mfold.linker_sequence + strand2.bases) with open(set_path, 'w') as setfile: for constraint in Mfold.get_constraints(strand1, strand2): setfile.write(constraint) subprocess.run([self.command, f'SEQ={seq_path}', f'AUX={set_path}'], cwd=self.folder)
class Region (name, length)
-
A class representing a region of DNA on a strand. The structure of a Strand is represented as a list of Regions.
Args
name
- A string representing the name of the region. It should either be all uppercase or all lowercase.
length
- The number of bases in the region.
Expand source code
class Region: """ A class representing a region of DNA on a strand. The structure of a Strand is represented as a list of Regions. """ def __init__(self, name, length): """ Args: name: A string representing the name of the region. It should either be all uppercase or all lowercase. length: The number of bases in the region. """ # the name that represents the region, e.g. 'A3' -> 'A' self.name = name # the length of the region self.length = length def __repr__(self): return f"Region('{self.name}', {self.length})"
class Strand (bases, constraints)
-
A class representing a strand of DNA.
Args
bases
- A string representing the bases in a strand
constraints
- A list of Regions representing the structure of a strand
Raises
TypeError
- The listed bases are not valid.
Expand source code
class Strand: """ A class representing a strand of DNA. """ allowed_bases = set('ATCG') allowed_constraints = set(string.ascii_letters + string.digits) base_pair = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'} def __init__(self, bases, constraints): """ Args: bases: A string representing the bases in a strand constraints: A list of Regions representing the structure of a strand Raises: TypeError: The listed bases are not valid. """ self.bases = bases.upper() self.constraints = constraints if set(self.bases) > Strand.allowed_bases: raise TypeError('The selected bases contain letters ' + 'other than A, T, C, and G: ' + bases) if set(self.constraints) > Strand.allowed_constraints: raise TypeError('The selected constraints contain ' + 'non-alphanumeric characters: ' + constraints) @staticmethod def complement(bases): """ Returns the complement of a string of bases. Args: bases: The string of bases to find the complement of. Returns: A string representing the complement. """ return "".join([Strand.base_pair[base] for base in bases])
Class variables
var allowed_bases
-
set() -> new empty set object set(iterable) -> new set object
Build an unordered collection of unique elements.
var allowed_constraints
-
set() -> new empty set object set(iterable) -> new set object
Build an unordered collection of unique elements.
var base_pair
-
dict() -> new empty dictionary dict(mapping) -> new dictionary initialized from a mapping object's (key, value) pairs dict(iterable) -> new dictionary initialized as if via: d = {} for k, v in iterable: d[k] = v dict(**kwargs) -> new dictionary initialized with the name=value pairs in the keyword argument list. For example: dict(one=1, two=2)
Static methods
def complement(bases)
-
Returns the complement of a string of bases.
Args
bases
- The string of bases to find the complement of.
Returns
A string representing the complement.
Expand source code
@staticmethod def complement(bases): """ Returns the complement of a string of bases. Args: bases: The string of bases to find the complement of. Returns: A string representing the complement. """ return "".join([Strand.base_pair[base] for base in bases])