Source code for compchem_si.si_generation

"""si_generation
Module to take Gaussian output files and generate SI pdf pages

"""
import os

# pylint:disable=import-error
import subprocess

import cclib
import numpy as np
from reportlab.lib import colors

# from reportlab.lib.pagesizes import letter
from reportlab.lib.units import inch
from reportlab.platypus import (
    Image,
    PageBreak,
    Paragraph,
    SimpleDocTemplate,
    Spacer,
    Table,
)

no_to_symb = {
    1: "H",
    2: "He",
    3: "Li",
    4: "Be",
    5: "B",
    6: "C",
    7: "N",
    8: "O",
    9: "F",
    10: "Ne",
    11: "Na",
    12: "Mg",
    13: "Al",
    14: "Si",
    15: "P",
    16: "S",
    17: "Cl",
    18: "Ar",
    19: "K",
    20: "Ca",
    21: "Sc",
    22: "Ti",
    23: "V",
    24: "Cr",
    25: "Mn",
    26: "Fe",
    27: "Co",
    28: "Ni",
    29: "Cu",
    30: "Zn",
    31: "Ga",
    32: "Ge",
    33: "As",
    34: "Se",
    35: "Br",
    36: "Kr",
    37: "Rb",
    38: "Sr",
    39: "Y",
    40: "Zr",
    41: "Nb",
    42: "Mo",
    43: "Tc",
    44: "Ru",
    45: "Rh",
    46: "Pd",
    47: "Ag",
    48: "Cd",
    49: "In",
    50: "Sn",
    51: "Sb",
    52: "Te",
    53: "I",
    54: "Xe",
    55: "Cs",
    56: "Ba",
    57: "La",
    58: "Ce",
    59: "Pr",
    60: "Nd",
    61: "Pm",
    62: "Sm",
    63: "Eu",
    64: "Gd",
    65: "Tb",
    66: "Dy",
    67: "Ho",
    68: "Er",
    69: "Tm",
    70: "Yb",
    71: "Lu",
    72: "Hf",
    73: "Ta",
    74: "W",
    75: "Re",
    76: "Os",
    77: "Ir",
    78: "Pt",
    79: "Au",
    80: "Hg",
    81: "Tl",
    82: "Pb",
    83: "Bi",
    84: "Po",
    85: "At",
    86: "Rn",
    87: "Fr",
    88: "Ra",
    89: "Ac",
    90: "Th",
    91: "Pa",
    92: "U",
    93: "Np",
    94: "Pu",
    95: "Am",
    96: "Cm",
    97: "Bk",
    98: "Cf",
    99: "Es",
    100: "Fm",
    101: "Md",
    102: "No",
    103: "Lr",
    104: "Rf",
    105: "Db",
    106: "Sg",
    107: "Bh",
    108: "Hs",
    109: "Mt",
    110: "Ds",
    111: "Rg",
    112: "Cn",
    113: "Nh",
    114: "Fl",
    115: "Mc",
    116: "Lv",
    117: "Ts",
    118: "Og",
}


def _get_geom(data, calc_type):
    """Gets the geometry of the optimized structure from cclib parsed data

    Args:
        data: cclib data
        calc_type: the calculation type of the .log file, from _determine_calctype
    Returns:
        geometry as a list of lists with symbols and xyz coordinates

    """
    if calc_type not in ["freq", "opt", "opt+freq", "singlepoint"]:
        raise ValueError(
            f"Calculation type {calc_type} passed to _get_geom is not a supported type."
        )
    if calc_type in ["freq", "singlepoint"]:
        opt_geom = data.atomcoords[0]
    else:
        opt_geom = data.atomcoords[-1]
    atom_symb = [no_to_symb[no] for no in data.atomnos]
    mol_formula = _get_molformula(atom_symb)
    geom_array = np.c_[np.array(atom_symb), opt_geom]
    geom_list = list(list(el) for el in geom_array)
    return geom_list, mol_formula


def _write_image(data, log_file):
    """Create an .xyz file for the final geometry and use that and openbabel to create an image

    Args:
        data: data of log_file parsed by cclib.io.ccread
        loog_file: name of log file

    Returns:
        None, but writes image to file

    """
    if not os.path.exists("png_files/"):
        os.mkdir("png_files")
    if not os.path.exists("xyz_files/"):
        os.mkdir("xyz_files")

    xyz_file = log_file.replace(".log", ".xyz")
    png_file = log_file.replace(".log", ".png")
    cclib.io.ccwrite(
        data, "xyz", f"xyz_files/{xyz_file}", indices=len(data.atomcoords) - 1
    )
    subprocess.check_output(
        [
            "obabel",
            f"xyz_files/{xyz_file}",
            "-O",
            f"png_files/{png_file}",
            "-xp",
            "600",
            "-x0",
            "molfile",
        ],
        text=True,
    )


def _determine_calctype(data):
    """Takes cclib data and checks its keys to determine what type of calculation was run

    Args:
        data: ouput of cclib.io.ccread

    Returns:
        one of the supported calculation types(opt, freq, opt+freq, singlepoint, modredundant)

    """
    dict_data = data.getattributes()
    if "vibfreqs" in dict_data and "geotargets" in dict_data:
        return "opt+freq"
    if "vibfreqs" in dict_data:
        return "freq"
    if "scancoords" in dict_data:
        return "modredundant"
    if "geotargets" in dict_data:
        return "opt"
    return "singlepoint"


def _get_molformula(at_symbs):
    """Converts list of atomic symbols to molecular formula"""
    out_dict = {s: at_symbs.count(s) for s in set(at_symbs)}
    out_str = ""
    # sort alphabetically by key
    sorted_dict = dict(sorted(out_dict.items()))
    for key, value in sorted_dict.items():
        if value != 1:
            out_str += f"{key}{value}"
        else:
            # 1 atom, don't put the number
            out_str += f"{key}"
    return out_str


[docs] def parse_log_file(log_file): """Take a log file and get the geometry and energy values Args: log_file: the name of a .log file calc_type: type of the calculation Returns: the energy and geometry of the molecule in the .log file, and an image written to log_file.png """ data = cclib.io.ccread(log_file) calc_type = _determine_calctype(data) if calc_type != "modredundant": geom_array, mol_formula = _get_geom(data, calc_type) if calc_type in ["opt", "opt+freq"]: energy = data.scfenergies[-1] else: energy = data.scfenergies[0] _write_image(data, log_file) if len(list(set(data.metadata["methods"]))) > 1: raise Warning( f"Multiple different electronic structure methods detected for {log_file}, only returning first" ) out_dict = { "energy": energy, "geom": geom_array, "method": data.metadata["methods"][0], "basis": data.metadata["basis_set"], "calc_type": calc_type, "mol_formula": mol_formula, } return out_dict
[docs] def construct_si(log_file_list="dir", out_name="SupplementaryInformation.pdf"): """Construct the Supplementary Information Args: log_file_list: either 'dir' to run for all .log files in the current directory or a list of .log files to run out_name: name for the resulting document including the .pdf extension Returns: None, but writes generated SI to out_name """ if log_file_list == "dir": log_file_list = [file for file in os.listdir() if file.endswith(".log")] doc = SimpleDocTemplate(out_name) story = [] for log_file in log_file_list: story = construct_si_page(log_file, story) doc.build(story)
[docs] def construct_si_page(log_file, story): """For a given molecule, construct the SI page Args: log_file: log file name story: list containing platypus flowables from reportlab for the document being built Returns: the story input updated with the current page being built """ print(log_file) cc_dict = parse_log_file(log_file) story = _supporting_info_page(story, cc_dict, log_file) return story
def _supporting_info_page(story, cc_dict, log_file): """Construct a SI page for a given molecule Args: story: list containing platypus flowables from reportlab for the document being built cc_dict: dictionary containing information on the calculation, from parse_log_file Returns: the story input updated with the current page being built """ p = Paragraph("<font size=20>Structure</font>") story.append(p) story.append(Spacer(1, 0.2 * inch)) I = Image(f'png_files/{log_file.replace(".log", ".png")}') I.drawHeight = 2 * inch I.drawWidth = 2 * inch story.append(I) story.append(Spacer(1, 0.2 * inch)) data = [ ["Method:", cc_dict["method"], "Geometry"], ["Basis Set:", cc_dict["basis"], Table(cc_dict["geom"])], ["Calculation Type:", cc_dict["calc_type"], ""], ["Energy:", cc_dict["energy"], ""], ["Molecular Formula:", cc_dict["mol_formula"], ""], ] t = Table( data, style=[ ("GRID", (0, 0), (-1, -1), 0.5, colors.grey), ("SPAN", (-1, 1), (-1, -1)), ], ) story.append(t) story.append(PageBreak()) return story if __name__ == "__main__": log_files = [file for file in os.listdir() if file.endswith(".log")] construct_si(log_files)