Module cli

Expand source code
from genetic import GeneticAlgorithm, Sequence
from mfold_library import Region
import matplotlib.pyplot as plt
import statistics
import sys
import re
import yaml

def parse_raw_structure(raw_structure):
    """
    Parses a set of constraints that determine which sections of strands are complementary.

    Args:
        raw_structure: the constraints to parse. sample input: (a25 B25, b25 C25, c25 D25, d25 A25).
    Returns:
        A number representing the fitness of the sequence.
    """
    return [[Region(re.findall('\D+', region)[0], int(re.findall('\d+', region)[0])) for region in strand] for strand in [strand.strip().split() for strand in raw_structure.split(',')]]

def consume_input(key, default):
        print(f"Enter the {key}: (default: {default})")
        value = input().strip()
        if not value:
                value = default
        print(f"Given {key}: {value}")
        return value

def save_configuration(params):
    """
    Saves input configurations. Default filename is config.dat.
    
    Args:
        params: configurations to save
    """
    print("Enter the file name to save your input configurations: (default: config.dat)")
    configpath = input()
    if not configpath:
        configpath = "config.dat"
    with open(configpath, "w") as configfile:
        yaml.dump(params, configfile, default_flow_style=False)
    print(f"Configuration file saved to {configpath}.")
    print(f"You can edit the configuration file directly and run `python3 cli.py {configpath}` next time to skip the manual setup steps.")


def load_configuration(configpath):
    """
    Allows users to load input configurations saved to file
    
    Args:
        configpath: path to file of configurations
    """
    print(f"Automatically using inputs from configuration file {configpath}")

    params = {}
    with open(configpath, "r") as configfile:
        params = yaml.load(configfile, Loader=yaml.FullLoader)

    return params

def get_user_input():
    """
    Initialisation procedure for pipeline.
    """
    params = {}

    print("Enter your desired shape (for example: a25 B25, b25 C25, c25 D25, d25 A25)")
    params["raw_structure"] = input().strip()
    print(f"Given desired shape: {parse_raw_structure(params['raw_structure'])}\n")

    params["mfold_command"] = consume_input('the path to Mfold executable', '~/.local/bin/mfold_quik')
    params["population_size"] = consume_input('population size', '25')
    params["mutation_rate"] = consume_input('mutation rate', '100')
    params["iterations"] = consume_input('number of iterations', '100')
    params["boltzmann_factor"] = consume_input('Boltzmann scaling factor', '1')
    num_init_seq = int(consume_input('number of initial sequences', '0'))
    params["input_sequence_definitions"] = [{} for i in range(num_init_seq)]
    for i in range(1, num_init_seq + 1):
        print(f"Enter each region definition of sequence #{i} on a new line followed by an empty line")
        while True:
            region = input().strip()
            if len(region) > 0:
                div = region.find(':')
                params["input_sequence_definitions"][i - 1][region[:div]] = region[div + 1:]
            else:
                break
    print('Enter all fixed regions followed by an empty line')
    params["fixed_regions"] = {}
    while True:
        region = input().strip()
        if len(region) > 0:
            div = region.find(':')
            params["fixed_regions"][region[:div]] = region[div + 1:]
        else:
            break

    print("Enter the file name for the output plot of fitness and diversity history: (default: history.png)")
    params["outfile"] = input()
    if not params["outfile"]:
        params["outfile"] = "history.png"
    print(f"Output plot will be saved to: {params['outfile']}\n")
    save_configuration(params)
    return params

if __name__ == '__main__':
        if len(sys.argv) > 1:
                params = load_configuration(sys.argv[1])
        else:
                params = get_user_input()

        structure = parse_raw_structure(params["raw_structure"])
        gen_alg = GeneticAlgorithm(
                structure,
                mfold_command=params["mfold_command"],
                population_size=int(params["population_size"]),
                iterations=int(params["iterations"]),
                mutation_rate=int(params["mutation_rate"]),
                boltzmann_factor=float(params["boltzmann_factor"]),
                initial_sequences=[Sequence(definition, structure) for definition in params["input_sequence_definitions"]],
                fixed_regions=params['fixed_regions']
        )
        try:
                gen_alg.run()
        finally:
                print(len(gen_alg.diversity_history) - 1, " iterations completed")
                print("Diversity history: ", gen_alg.diversity_history)
                print("Fitness history: ", gen_alg.fitness_history)
                with open("diversity.dat", "w") as outfile:
                        for diversity in gen_alg.diversity_history:
                                outfile.write(str(diversity) + '\n')
                with open("fitness.dat", "w") as outfile:
                        for fitness in gen_alg.fitness_history:
                                outfile.write(str(fitness) + '\n')
                gen_alg.print_population()
                print("Best Sequences: \n")
                gen_alg.best_child.print()

        iterations = range(int(params["iterations"]))
        best = [min(iteration) for iteration in gen_alg.fitness_history]
        worst = [max(iteration) for iteration in gen_alg.fitness_history]
        std = [statistics.stdev(iteration) for iteration in gen_alg.fitness_history]

        plt.rcParams["figure.figsize"] = [10, 15]
        fig, axs = plt.subplots(3, 1)

        axs[0].plot(iterations, best, 'r', label='Best solution')
        axs[0].plot(iterations, worst, 'b', label='Worst solution')
        axs[0].set_xlabel('Iteration')
        axs[0].set_ylabel('Norm')
        axs[0].grid(True)
        axs[0].legend()
        axs[0].set_title('Norms of best and worst solutions per iteration')
        axs[0].set_ylim([0.8*min(best),1.2*max(worst)])
        axs[0].set_xlim([min(iterations),max(iterations)])

        axs[1].plot(iterations, std)
        axs[1].set_xlabel('Iteration')
        axs[1].set_ylabel('Standard deviation')
        axs[1].grid(True)
        axs[1].set_title('Standard deviation of norms in population per iteration')
        axs[1].set_ylim([0, 1.2*max(std)])
        axs[1].set_xlim([min(iterations),max(iterations)])

        axs[2].plot(iterations, gen_alg.diversity_history)
        axs[2].axhline(y=12.5, color='r', linestyle='-')
        axs[2].set_xlabel('Iteration')
        axs[2].set_ylabel('Diversity')
        axs[2].grid(True)
        axs[2].set_title('Diversity of population per iteration')
        axs[2].set_ylim([0,1.2*max(gen_alg.diversity_history)])
        axs[2].set_xlim([min(iterations),max(iterations)])

        fig.tight_layout()
        plt.savefig(params["outfile"])

Functions

def consume_input(key, default)
Expand source code
def consume_input(key, default):
        print(f"Enter the {key}: (default: {default})")
        value = input().strip()
        if not value:
                value = default
        print(f"Given {key}: {value}")
        return value
def get_user_input()

Initialisation procedure for pipeline.

Expand source code
def get_user_input():
    """
    Initialisation procedure for pipeline.
    """
    params = {}

    print("Enter your desired shape (for example: a25 B25, b25 C25, c25 D25, d25 A25)")
    params["raw_structure"] = input().strip()
    print(f"Given desired shape: {parse_raw_structure(params['raw_structure'])}\n")

    params["mfold_command"] = consume_input('the path to Mfold executable', '~/.local/bin/mfold_quik')
    params["population_size"] = consume_input('population size', '25')
    params["mutation_rate"] = consume_input('mutation rate', '100')
    params["iterations"] = consume_input('number of iterations', '100')
    params["boltzmann_factor"] = consume_input('Boltzmann scaling factor', '1')
    num_init_seq = int(consume_input('number of initial sequences', '0'))
    params["input_sequence_definitions"] = [{} for i in range(num_init_seq)]
    for i in range(1, num_init_seq + 1):
        print(f"Enter each region definition of sequence #{i} on a new line followed by an empty line")
        while True:
            region = input().strip()
            if len(region) > 0:
                div = region.find(':')
                params["input_sequence_definitions"][i - 1][region[:div]] = region[div + 1:]
            else:
                break
    print('Enter all fixed regions followed by an empty line')
    params["fixed_regions"] = {}
    while True:
        region = input().strip()
        if len(region) > 0:
            div = region.find(':')
            params["fixed_regions"][region[:div]] = region[div + 1:]
        else:
            break

    print("Enter the file name for the output plot of fitness and diversity history: (default: history.png)")
    params["outfile"] = input()
    if not params["outfile"]:
        params["outfile"] = "history.png"
    print(f"Output plot will be saved to: {params['outfile']}\n")
    save_configuration(params)
    return params
def load_configuration(configpath)

Allows users to load input configurations saved to file

Args

configpath
path to file of configurations
Expand source code
def load_configuration(configpath):
    """
    Allows users to load input configurations saved to file
    
    Args:
        configpath: path to file of configurations
    """
    print(f"Automatically using inputs from configuration file {configpath}")

    params = {}
    with open(configpath, "r") as configfile:
        params = yaml.load(configfile, Loader=yaml.FullLoader)

    return params
def parse_raw_structure(raw_structure)

Parses a set of constraints that determine which sections of strands are complementary.

Args

raw_structure
the constraints to parse. sample input: (a25 B25, b25 C25, c25 D25, d25 A25).

Returns

A number representing the fitness of the sequence.

Expand source code
def parse_raw_structure(raw_structure):
    """
    Parses a set of constraints that determine which sections of strands are complementary.

    Args:
        raw_structure: the constraints to parse. sample input: (a25 B25, b25 C25, c25 D25, d25 A25).
    Returns:
        A number representing the fitness of the sequence.
    """
    return [[Region(re.findall('\D+', region)[0], int(re.findall('\d+', region)[0])) for region in strand] for strand in [strand.strip().split() for strand in raw_structure.split(',')]]
def save_configuration(params)

Saves input configurations. Default filename is config.dat.

Args

params
configurations to save
Expand source code
def save_configuration(params):
    """
    Saves input configurations. Default filename is config.dat.
    
    Args:
        params: configurations to save
    """
    print("Enter the file name to save your input configurations: (default: config.dat)")
    configpath = input()
    if not configpath:
        configpath = "config.dat"
    with open(configpath, "w") as configfile:
        yaml.dump(params, configfile, default_flow_style=False)
    print(f"Configuration file saved to {configpath}.")
    print(f"You can edit the configuration file directly and run `python3 cli.py {configpath}` next time to skip the manual setup steps.")