Source code for abipy.abio.abivars

"""This module contains lookup table with the name of the ABINIT variables."""
from __future__ import annotations

import os
import warnings
import numpy as np

from pprint import pformat #, pprint
from functools import cached_property
from monty.string import is_string, boxed
from monty.termcolor import cprint
from pymatgen.core.units import bohr_to_ang
from abipy.core.structure import Structure, dataframes_from_structures
from abipy.core.mixins import Has_Structure, TextFile, NotebookWriter
from abipy.abio.abivar_database.variables import get_codevars

__all__ = [
    "is_abivar",
    "is_anaddb_var",
    "is_atdep_var",
    "is_abiunit",
    "AbinitInputFile",
    "AbinitInputParser",
    "structure_from_abistruct_fmt",
]



[docs]
def is_anaddb_var(varname: str) -> bool:
    """True if varname is a valid Anaddb variable."""
    return varname in get_codevars()["anaddb"]




[docs]
def is_atdep_var(varname: str) -> bool:
    """True if varname is a valid atdep variable."""
    return varname in get_codevars()["atdep"]




[docs]
def is_abivar(varname: str) -> bool:
    """True if s is an ABINIT variable."""
    # Add include statement
    # FIXME: These variables should be added to the database.
    extra = ["include", "xyzfile"]
    return varname in get_codevars()["abinit"] or varname in extra



# TODO: Move to new directory
ABI_OPERATORS = set(["sqrt", ])

ABI_UNIT_NAMES = {
    s.lower() for s in (
        "au", "nm",
        "Angstr", "Angstrom", "Angstroms", "Bohr", "Bohrs",
        "eV", "meV", "Ha", "Hartree", "Hartrees", "K", "Ry", "Rydberg", "Rydbergs",
        "T", "Tesla",)
}



[docs]
def is_abiunit(s: str) -> bool:
    """
    True if string is one of the units supported by the ABINIT parser
    """
    if not is_string(s): return False
    return s.lower() in ABI_UNIT_NAMES



def expand_star_syntax(s: str) -> str:
    """
    Evaluate star syntax. Return new string
    Remember that Abinit does not accept white spaces.
    For example `typat 2 * 1` is not valid.

    >>> assert expand_star_syntax("3*2") == '2 2 2'
    >>> assert expand_star_syntax("2 *1") == '1 1'
    >>> assert expand_star_syntax("1 2*2") == '1 2 2'
    >>> assert expand_star_syntax("*2") == '*2'
    """
    s = s.strip()
    if "*" not in s:
        return s
    else:
        # Handle e.g `pawecutdg*`
        if s[0].isalpha() and s[-1] == "*": return s

    s = s.replace("*", " * ").strip()
    tokens = s.split()
    #tokens = [c.rstrip().lstrip() for c in s.split()]

    # Handle "*2" case i.e. return "*2"
    if len(tokens) == 2 and tokens[0] == "*":
        assert tokens[1] != "*"
        return "".join(tokens)

    #print(s, tokens)
    l = []
    while tokens:
        c = tokens.pop(0)
        if c == "*":
            num = int(l.pop(-1))
            val = tokens.pop(0)
            l.extend(num * [val])
        else:
            l.append(c)

    return " ".join(l)


def str2array_bohr(obj):
    if not is_string(obj):
        return np.asarray(obj)

    # Treat e.g. acell 3 * 1.0
    obj = expand_star_syntax(obj)
    # Numpy does not understand "0.00d0 0.00d0"
    obj = obj.lower().replace("d", "e")

    tokens = obj.split()
    if not tokens[-1].isalpha():
        # No unit
        return np.fromstring(obj, sep=" ")

    unit = tokens[-1]
    if unit in ("angstr", "angstrom", "angstroms"):
        return np.fromstring(" ".join(tokens[:-1]), sep=" ") / bohr_to_ang
    elif unit in ("bohr", "bohrs", "au"):
        return np.fromstring(" ".join(tokens[:-1]), sep=" ")
    else:
        raise ValueError("Don't know how to handle unit: %s" % str(unit))


def str2array(obj, dtype=float) -> np.ndarray:
    if not is_string(obj): return np.asarray(obj)
    if obj.startswith("*"):
        raise ValueError("This case should be treated by the caller: %s" % str(obj))
    s = expand_star_syntax(obj)
    # Numpy does not understand "0.00d0 0.00d0"
    s = s.lower().replace("d", "e")
    return np.fromstring(s, sep=" ", dtype=dtype)


def eval_abinit_operators(tokens: list[str]) -> list[str]:
    """
    Receive a list of strings, find the occurences of operators supported
    in the input file (e.g. sqrt), evalute the expression and return new list of strings.

    .. note:

        This function is not recursive hence expr like sqrt(1/2) are not supported
    """
    import math # noqa: F401
    import re
    re_sqrt = re.compile(r"[+|-]?sqrt\((.+)\)")

    values = []
    for tok in tokens:
        if tok.startswith("'") or tok.startswith('"'):
            values.append(tok)
        else:
            m = re_sqrt.match(tok)
            if m:
                tok = tok.replace("sqrt", "math.sqrt")
                tok = str(eval(tok))
            if "/" in tok:
                tok = str(eval(tok))
            values.append(tok)

    return values


def abi_tokenize(string: str) -> list[str]:

    string = string.strip()

    # stack = [(char, [start, end]), ...] where char is " or '
    stack = []
    for i, c in enumerate(string):
        if c in ("'", '"'):
            if stack and stack[-1][1][1] == -1:
                # Close quotation mark. Note that the algo is not very robust as nested quotation marks are not supported!
                entry = stack[-1]
                if c != entry[0]:
                    raise ValueError("Found unclosed quotation marks in: %s" % string)
                entry = stack[-1][1][1] = i
            else:
                # First hit.
                stack.append((c, [i, -1]))

    if not stack:
        # No ' or " found in string.
        return string.split()

    # Unroll stack and buid tokens so that e.g. "foo bar" is treated as a single token.
    tokens = []
    base = 0
    for item in stack:
        start, stop = item[1][0], item[1][1]
        # Consistency check:
        if stop == -1:
            raise ValueError("Found unclosed quotation mark in: %s" % string)
        if start > 0:
            tokens.extend(string[base:start-1].strip().split())
            base = stop + 1
        tokens.append(string[start:stop + 1])

    if base < len(string):
        tokens.append(string[base:])

    return tokens


class Dataset(dict, Has_Structure):

    @cached_property
    def structure(self) -> Structure:
        """
        The initial structure associated to the dataset.
        """

        # First of all check whether the structure is defined through an external file.
        if "structure" in self:
            s = self["structure"].replace('"', "")
            filetype, path = s.split(":")
            #print("filetype:", filetype)
            from abipy import abilab
            if filetype == "poscar":
                return abilab.Structure.from_file(path)

            elif filetype == "abivars":
                with open(path, "rt") as fh:
                    return structure_from_abistruct_fmt(fh.read())

            else:
                try:
                    with abilab.abiopen(path) as abifile:
                        if hasattr(abifile, "final_structure"):
                            return abifile.final_structure
                        else:
                            return abifile.structure
                except Exception as exc:
                    raise RuntimeError("Error while opening file: `%s`:\n%s" % (path, exc))

        # Get lattice.
        kwargs = {}
        if "angdeg" in self:
            if "rprim" in self:
                raise ValueError("rprim and angdeg cannot be used together!")
            angdeg = str2array(self["angdeg"])
            angdeg.shape = (3)
            kwargs["angdeg"] = angdeg
        else:
            # Handle structure specified with rprim.
            kwargs["rprim"] = str2array_bohr(self.get("rprim", "1.0 0 0 0 1 0 0 0 1"))

        # Default value for acell.
        acell = str2array_bohr(self.get("acell", "1.0 1.0 1.0"))

        # Get important dimensions.
        ntypat = int(self.get("ntypat", 1))
        natom = int(self.get("natom", 1))

        # znucl(npsp)
        znucl = self["znucl"]
        if znucl.startswith("*"):
            i = znucl.find("*")
            znucl_size = natom if "npsp" not in self else int(self["npsp"])
            znucl = znucl_size * [float(znucl[i+1:])]
        else:
            znucl = str2array(self["znucl"])

        # v67mbpt/Input/t12.in
        typat = self["typat"]
        if typat.startswith("*"):
            i = typat.find("*")
            typat = np.array(natom * [int(typat[i+1:])], dtype=int)
        else:
            typat = str2array(self["typat"], dtype=int)

        # Extract atomic positions.
        # Select first natom entries (needed if multidatasets with different natom)
        #    # v3/Input/t05.in
        typat = typat[:natom]
        for k in ("xred", "xcart", "xangst"):
            toarray = str2array_bohr if k == "xcart" else str2array
            if k in self:
                arr = np.reshape(toarray(self[k]), (-1, 3))
                kwargs[k] = arr[:natom]
                break
        else:
            raise ValueError("xred|xcart|xangst must be given in input")

        try:
            return Structure.from_abivars(acell=acell, znucl=znucl, typat=typat, **kwargs)
        except Exception as exc:
            print("Wrong inputs passed to Structure.from_abivars:")
            print("acell:", acell, "znucl:", znucl, "typat:", typat, "kwargs:", kwargs, sep="\n")
            raise exc

    def get_vars(self) -> dict:
        """
        Return dictionary with variables. The variables describing the crystalline structure
        are removed from the output dictionary.
        """
        geovars = {"acell", "angdeg", "rprim", "ntypat", "natom", "znucl", "typat", "xred", "xcart", "xangst"}
        return {k: self[k] for k in self if k not in geovars}

    def __str__(self) -> str:
        return self.to_string()

    def to_string(self,
                  post: str | None = None,
                  mode: str = "text",
                  verbose: int = 0) -> str:
        """
        String representation.

        Args:
            post: String that will be appended to the name of the variables
            mode: Either `text` or `html` if HTML output with links is wanted.
            verbose: Verbosity level.
        """
        post = post if post is not None else ""
        if mode == "html":
            from abipy.abio.abivars_db import get_abinit_variables
            var_database = get_abinit_variables()

        lines = []
        app = lines.append
        for k in sorted(list(self.keys())):
            vname = k + post
            if mode == "html": vname = var_database[k].html_link(label=vname)
            app("%s %s" % (vname, str(self[k])))

        return "\n".join(lines) if mode == "text" else "\n".join(lines).replace("\n", "<br>")

    def _repr_html_(self) -> str:
        """Integration with jupyter_ notebooks."""
        return self.to_string(mode="html")



[docs]
class AbinitInputFile(TextFile, Has_Structure, NotebookWriter):
    """
    This object parses the Abinit input file, stores the variables in
    dict-like objects (Datasets) and build `Structure` objects from
    the input variables. Mainly used for inspecting the structure
    declared in the Abinit input file.
    """


[docs]
    @classmethod
    def from_string(cls, string: str) -> AbinitInputFile:
        """Build the object from string."""
        import tempfile
        _, filename = tempfile.mkstemp(suffix=".abi", text=True)
        with open(filename, "wt") as fh:
            fh.write(string)
        return cls(filename)


    def __init__(self, filepath: str):
        super().__init__(filepath)

        with open(filepath, "rt") as fh:
            self.string = fh.read()

        self.datasets = AbinitInputParser().parse(self.string)
        self.ndtset = len(self.datasets)

    def __str__(self) -> str:
        return self.to_string()


[docs]
    def to_string(self, verbose: int = 0) -> str:
        """String representation with verbosity level verbose."""
        lines = []
        app = lines.append
        header = 10 * "=" + " Input File " + 10 * "="
        app(header)
        app(self.string)
        app(len(header) * "=" + "\n")

        # Print info on structure(s).
        if self.structure is not None:
            app(self.structure.spget_summary())
        else:
            structures = [dt.structure for dt in self.datasets]
            app("Input file contains %d structures:" % len(structures))
            for i, structure in enumerate(structures):
                app(boxed("Dataset: %d" % (i+1)))
                app(structure.spget_summary())
                app("")

            dfs = dataframes_from_structures(structures, index=[i+1 for i in range(self.ndtset)])
            app(boxed("Tabular view (each row corresponds to a dataset structure)"))
            app("")
            app("Lattice parameters:")
            app(str(dfs.lattice))
            app("")
            app("Atomic positions:")
            app(str(dfs.coords))

        return "\n".join(lines)


    @cached_property
    def has_multi_structures(self) -> bool:
        """True if input defines multiple structures."""
        return self.structure is None

    def _repr_html_(self) -> str:
        """Integration with jupyter notebooks."""
        from abipy.abio.abivars_db import repr_html_from_abinit_string
        return repr_html_from_abinit_string(self.string)
        #return self.to_string(mode="html"))


[docs]
    def close(self) -> None:
        """NOP, required by ABC."""


    @cached_property
    def structure(self) -> Structure:
        """
        The structure defined in the input file.

        If the input file contains multiple datasets **AND** the datasets
        have different structures, this property returns None.
        In this case, one has to access the structure of the individual datasets.
        For example:

            input.datasets[0].structure

        gives the structure of the first dataset.
        """
        for dt in self.datasets[1:]:
            if dt.structure != self.datasets[0].structure:
                warnings.warn("Datasets have different structures. Returning None. Use input.datasets[i].structure")
                return None

        return self.datasets[0].structure


[docs]
    def yield_figs(self, **kwargs):  # pragma: no cover
        """
        This function *generates* a predefined list of matplotlib figures with minimal input from the user.
        """
        if not self.has_multi_structures:
            yield self.structure.plot(show=False)
            yield self.structure.plot_bz(show=False)
        else:
            for dt in self.datasets:
                yield dt.structure.plot(show=False)
                yield dt.structure.plot_bz(show=False)



[docs]
    def write_notebook(self, nbpath=None) -> str:
        """
        Write an ipython notebook to nbpath. If nbpath is None, a temporay file in the current
        working directory is created. Return path to the notebook.
        """
        nbformat, nbv, nb = self.get_nbformat_nbv_nb(title=None)

        nb.cells.extend([
            nbv.new_code_cell("abinp = abilab.abiopen('%s')" % self.filepath),
            nbv.new_code_cell("print(abinp)"),
        ])

        if self.has_multi_structures:
            nb.cells.extend([
                nbv.new_code_cell("""\
for dataset in inp.datasets:
    print(dataset.structure)"""),
            ])

        if self.ndtset > 1:
            nb.cells.extend([
                nbv.new_code_cell("""\
for dataset in abinp.datasets:
    print(dataset)"""),
            ])

        return self._write_nb_nbpath(nb, nbpath)



[docs]
    def get_differences(self, other,
                        ignore_vars: list | None = None,
                        decimal: int = 7) -> list[str]:
        """
        Get the differences between this Dataset and another.

        Args:
            other: Other Dataset instance.
            ignore_vars: List of variable names that should be ignored
            decimal: Number of decimal places to check for equality. Default is 7.
        """
        from numpy.testing import assert_almost_equal
        abivars_database = get_codevars()["abinit"]

        diffs = []
        to_ignore = {"acell", "angdeg", "rprim", "ntypat", "natom", "znucl", "typat", "xred", "xcart", "xangst"}
        if ignore_vars is not None:
            to_ignore.update(ignore_vars)
        if self.ndtset != other.ndtset:
            diffs.append(f"Number of datasets in this file is {self.ndtset} "
                         f"while other file has {other.ndtset} datasets.")
            return diffs

        for idataset, self_dataset in enumerate(self.datasets):
            other_dataset = other.datasets[idataset]
            if self_dataset.structure != other_dataset.structure:
                diffs.append("Structures are different.")

            self_dataset_dict = dict(self_dataset)
            other_dataset_dict = dict(other_dataset)
            for k in to_ignore:
                if k in self_dataset_dict:
                    del self_dataset_dict[k]
                if k in other_dataset_dict:
                    del other_dataset_dict[k]

            common_keys = set(self_dataset_dict.keys()).intersection(other_dataset_dict.keys())
            self_only_keys = set(self_dataset_dict.keys()).difference(other_dataset_dict.keys())
            other_only_keys = set(other_dataset_dict.keys()).difference(self_dataset_dict.keys())

            if self_only_keys:
                diffs.append(f"The following variables are in this file but not in other: "
                             f"{', '.join([str(k) for k in self_only_keys])}")
            if other_only_keys:
                diffs.append(f"The following variables are in other file but not in this one: "
                             f"{', '.join([str(k) for k in other_only_keys])}")

            for k in common_keys:
                v1, v2 = self_dataset_dict[k], other_dataset_dict[k]
                ok = True
                exc_msg = ""
                try:
                    if abivars_database[k].vartype == "string":
                        # Strict equality for abinit variables whose type is string.
                        ok = v1 == v2

                    elif isinstance(v1, str):
                        # v1 is a string representing a number/vector with/without units.
                        tokens_1, tokens_2 = v1.split(), v2.split()
                        if is_abiunit(tokens_1[-1]):
                            assert_almost_equal(np.array(tokens_1[:-1], dtype=float),
                                                np.array(tokens_2[:-1], dtype=float), decimal=decimal)
                            # Check units string as well
                            ok = tokens_1[-1] == tokens_2[-1]
                        else:
                            assert_almost_equal(np.array(tokens_1, dtype=float),
                                                np.array(tokens_2, dtype=float), decimal=decimal)
                    else:
                        # Assume v1 and v2 are numpy arrays, list, tuple.
                        assert_almost_equal(v1, v2, decimal)

                except Exception as exc:
                    ok = False
                    exc_msg = str(exc)

                if not ok:
                    diffs.append(f"The variable '{k}' is different in the two files:\n"
                                 f" - this file:  '{v1}'\n"
                                 f" - other file: '{v2}'")
                    #if exc_msg:
                    #    diffs[-1] += f"\npython exception: {exc_msg}"

                    print(diffs[-1])

        return diffs





[docs]
class AbinitInputParser:

    verbose = 0


[docs]
    def parse(self, s: str):
        """
        This function receives a string `s` with the Abinit input and return
        a list of :class:`Dataset` objects.
        """
        # Remove "=" and comments from lines.
        lines = []
        s = s.replace("=", " ")
        for line in s.splitlines():
            line.strip()
            i = line.find("#")
            if i != -1: line = line[:i]
            i = line.find("!")
            if i != -1: line = line[:i]
            if line: lines.append(line)

        # 1) Build string of the form `var1 value1 var2 value2`
        # 2) Split string in tokens.
        # 3) Evaluate star syntax i.e. "3*2" ==> '2 2 2'
        # 4) Evaluate operators e.g. sqrt(0.75)
        #
        # NB: Step 3 is needed because we are gonna use python to evaluate the operators and
        #     in abinit `2*sqrt(0.75)` means `sqrt(0.75) sqrt(0.75)` and not math multiplication!
        #
        #tokens = " ".join(lines).split()
        #print("tokens:\n", pformat(tokens))

        tokens = abi_tokenize(" ".join(lines))  #print("new tokens:\n", pformat(tokens))
        if self.verbose: print("tokens", tokens)

        # Step 3-4
        new_tokens = []
        for t in tokens:
            if t.startswith("'") or t.startswith('"'):
                new_tokens.append(t)
            else:
                l = expand_star_syntax(t).split() # ;print(l)
                new_tokens.extend(l)

        tokens = new_tokens
        if self.verbose: print("new_tokens", new_tokens)

        tokens = self.eval_abinit_operators(tokens) # ; print(tokens)

        varpos = []
        for pos, tok in enumerate(tokens):
            #if not isnewvar(ok): continue
            #print("token:", tok)

            if tok[0].isalpha():
                #print("tok[0]", tok[0])
                # Either new variable or string defining the unit or operator e.g. sqrt
                if is_abiunit(tok) or tok in ABI_OPERATORS or "?" in tok:
                    continue

                # Got new variable
                if tok[-1].isdigit(): # and "?" not in tok:
                    # Handle dataset index.
                    l = []
                    for i, c in enumerate(tok[::-1]):
                        if c.isalpha(): break
                        l.append(c)
                    else:
                        raise ValueError("Cannot find dataset index in token: %s" % tok)
                    l.reverse()
                    #if not is_abivar(tok):
                        #continue
                        #raise ValueError("Expecting variable but got: %s" % tok)

                #print("new varname `", tok, "` at positions", pos)
                varpos.append(pos)

        varpos.append(len(tokens))

        # Build dict {varname --> value_string}
        dvars = {}
        for i, pos in enumerate(varpos[:-1]):
            varname = tokens[pos]  # ; print("varname:", varname)
            if pos + 2 == len(tokens):
                dvars[varname] = tokens[-1]
            else:
                dvars[varname] = " ".join(tokens[pos+1: varpos[i+1]])
        # print("dvars:\n", pformat(dvars))

        err_lines = []
        for k, v in dvars.items():
            if not v:
                err_lines.append("key `%s` was not parsed correctly (empty value)" % k)
        if err_lines:
            raise RuntimeError("\n".join(err_lines))

        # Get value of ndtset.
        ndtset = int(dvars.pop("ndtset", 1))
        udtset = dvars.pop("udtset", None)
        jdtset = dvars.pop("jdtset", None)
        if udtset is not None:
            raise NotImplementedError("udtset is not supported")

        # Build list of datasets.
        datasets = [Dataset() for i in range(ndtset)]

        # Treat all variables without a dataset index
        kv_list = list(dvars.items())
        for k, v in kv_list:
            if k[-1].isdigit() or any(c in k for c in ("?", ":", "+", "*")): continue
            for d in datasets: d[k] = v
            dvars.pop(k)

        # Treat all variables with a dataset index except those with "?", ":", "+"
        kv_list = list(dvars.items())
        for k, v in kv_list:
            if any(c in k for c in ("?", ":", "+", "*")): continue
            varname, idt = self.varname_dtindex(k)
            dvars.pop(k)
            #if varname == "angdeg": raise ValueError("got angdeg")
            if idt > ndtset:
                if self.verbose: print("Ignoring key: %s because ndtset: %d" % (k, ndtset))
                continue
            datasets[idt-1][varname] = v

        # Now treat series e.g. ecut: 10 ecut+ 5 (NB: ? is not treated here)
        kv_list = list(dvars.items())
        for k, v in kv_list:
            if "?" in k: continue
            if ":" not in k: continue
            # TODO units
            vname = k[:-1]
            start = str2array(dvars.pop(k))

            # Handle ecut+ or ecut*
            incr = dvars.pop(vname + "+", None)
            if incr is not None:
                incr = str2array(incr)
                for dt in datasets:
                    dt[vname] = start.copy()
                    start += incr

            else:
                mult = dvars.pop(vname + "*")
                mult = str2array(mult)
                for dt in datasets:
                    dt[vname] = start.copy()
                    start *= mult

        # Consistency check
        # 1) dvars should be empty
        if dvars:
            raise ValueError("Don't know how handle variables in:\n%s" % pformat(dvars), indent=4)

        # 2) Keys in datasets should be valid Abinit input variables.
        wrong = []
        for i, dt in enumerate(datasets):
            wlist = [k for k in dt if not is_abivar(k)]
            if wlist:
                wrong.extend(("dataset %d" % i, wlist))
        if wrong:
            raise ValueError("Found variables that are not registered in the abipy database:\n%s" % pformat(wrong, indent=4))

        # 3) We don't support spg builder: dataset.structure will fail or, even worse,
        #    spglib will segfault so it's better to raise here!
        for dt in datasets:
            if "spgroup" in dt or "nobj" in dt:
                raise NotImplementedError(
                    "Abinit spgroup builder is not supported. Structure must be given explicitly!")

        if jdtset is not None:
            # Return the datasets selected by jdtset.
            datasets = [datasets[i-1] for i in np.fromstring(jdtset, sep=" ", dtype=int)]

        return datasets



[docs]
    @staticmethod
    def eval_abinit_operators(tokens):
        return eval_abinit_operators(tokens)



[docs]
    @staticmethod
    def varname_dtindex(tok: str) -> tuple[str, int]:
        """
        >>> p = AbinitInputParser()
        >>> assert p.varname_dtindex("acell1") == ("acell", 1)
        >>> assert p.varname_dtindex("fa1k2") == ("fa1k", 2)
        """
        l = []
        for i, c in enumerate(tok[::-1]):
            if c.isalpha(): break
            l.append(c)
        else:
            raise ValueError(f"Cannot find dataset index in {tok=}")

        assert i > 0
        l.reverse()
        dtidx = int("".join(l))
        varname = tok[:len(tok)-i]

        return varname, dtidx




def format_string_abivars(varname: str, value, code: str = "abinit") -> str:
    """
    Given a variable and its value, check if it is of string type and wraps it
    in double quotes if needed.

    Args:
        varname: the name of the variable.
        value: the value to format.
        code: the code the variable refers to (e.g. "abinit", "anaddb")

    Returns: The properly formatted value.
    """

    var = get_codevars()[code].get(varname)
    if var and var.vartype == "string":
        if not isinstance(value, (list, tuple)):
            value = [value]

        str_values = [str(v).strip() for v in value]

        # handle gruns_ddbs separately as it supports a different syntax incompatible with other variables
        if varname == "gruns_ddbs":
            join_str = "\n    "
            for i, str_val in enumerate(str_values):
                if str_val[0] != '"':
                    str_values[i] = '"' + str_val
                if str_val[-1] != '"':
                    str_values[i] += '"'
        else:
            join_str = ",\n    "
            if str_values[0][0] != '"':
                str_values[0] = '"' + str_values[0]
            if str_values[-1][-1] != '"':
                str_values[-1] += '"'

        if len(str_values) > 1:
            str_values[0] = "\n    " + str_values[0]

        return join_str.join(str_values)
    return value



[docs]
def structure_from_abistruct_fmt(string: str) -> Structure:
    """
    Parse geometrical information given in the structure:abivars format return Structure object

    A typical input file in "structure:abivars" format looks like::

        # MgB2 lattice structure.
        natom   3
        acell   2*3.086  3.523 Angstrom
        rprim   0.866025403784439  0.5  0.0
               -0.866025403784439  0.5  0.0
                0.0                0.0  1.0

        # Atomic positions
        xred_symbols
            0.0  0.0  0.0 Mg
            1/3  2/3  0.5 B
            2/3  1/3  0.5 B

    i.e. the zcucl, typat and ntypat vars are implicitly defined via the xred_symbols table.
    """
    # Algorithm is as follows:
    #     1) parse the xred_symbols section using a record-based parser.
    #     2) Compute xred, znucl, ntypat and typat from the xred_symbols section
    #     3) Add these values in string format to the head to get a "standard" Abinit input file.
    #     4) Init structure from the "standard" Abinit input file.
    found = 0
    head = []
    xred, symbols = [], []

    for line in string.split("\n"):
        line = line.strip()

        if line.lower() == "xred_symbols":
            found += 1
            continue
        if not found:
            head.append(line)
        if found and line:
            tokens = line.split() #; print(line)
            xred.append([float(t) for t in eval_abinit_operators(tokens[:3])])
            symbols.append(tokens[3])

    if not xred:
        raise ValueError("Cannot find xred_symbols section in string:\n %s" % string)

    xred = np.reshape(xred, (-1, 3))

    # Set small values to zero. This usually happens when the CIF file
    # does not give structure parameters with enough digits.
    xred = np.where(np.abs(xred) > 1e-8, xred, 0.0)

    # Compute ntypat, typat, znucl from xred and symbols.
    natom = len(xred)
    ntypat = 1
    typat = [0]
    for iatom in range(1, natom):
        found = -1
        for jj in range(ntypat):
            if symbols[iatom] == symbols[typat[jj]]:
                found = jj
                break
        if found == -1:
            ntypat += 1
            typat.append(ntypat - 1)
        else:
            typat.append(found)

    from pymatgen.core.periodic_table import Element
    znucl = np.empty(ntypat)
    for iatom in range(natom):
        itypat = typat[iatom]
        znucl[itypat] = Element(symbols[iatom]).Z

    # Convert C indexing to Fortran.
    typat = np.array(typat) + 1

    # Build standard Abinit input string.
    lines = []
    app = lines.append
    app("ntypat %d" % ntypat)
    app("typat " + " ".join("%d" % t for t in typat))
    app("znucl " + " ".join("%f" % z for z in znucl))
    app("xred ")
    for iatom in range(natom):
        v = xred[iatom]
        lines.append("%.16f %.16f %.16f" % (v[0], v[1], v[2]))

    s = "\n".join(head) + "\n" + "\n".join(lines) #; print(s)

    return AbinitInputFile.from_string(s).structure



def validate_input_parser(abitests_dir=None, input_files=None) -> int:
    """
    validate/test AbinitInput parser.

    Args:
        abitests_dir: Abinit tests directory.
        input_files: List of Abinit input files.

    Return: Exit code.
    """
    def is_abinit_input(path):
        """
        True if path is one of the input files used in the Abinit Test suite.
        """
        if path.endswith(".abi"): return True
        if not path.endswith(".in"): return False

        with open(path, "rt") as fh:
            for line in fh:
                if "executable" in line and "abinit" in line: return True
            return False

    # Files are collected in paths.
    paths = []

    if abitests_dir is not None:
        print("Analyzing directory %s for input files" % abitests_dir)

        for dirpath, dirnames, filenames in os.walk(abitests_dir):
            for fname in filenames:
                path = os.path.join(dirpath, fname)
                if is_abinit_input(path): paths.append(path)

            #import ast
            #init_path = os.path.join(dirpath, "__init__.py")
            #with open(init_path, "rt") as f:
            #    source = f.read()
            #    start = source.find("inp_files = [")
            #    if start == -1:
            #        print("ignoring ", init_path)
            #        continue
            #    stop = source.find("]", start)
            #    if stop == -1:
            #        raise ValueError("Invalid code in %s" % init_path)
            #    print(init_path)
            #    inp_basenames = ast.literal_eval(source[start:stop+1])
            #    print(int_basenames)

    if input_files is not None:
        print("Analyzing files ", str(input_files))
        for arg in input_files:
            if is_abinit_input(arg): paths.append(arg)

    nfiles = len(paths)
    if nfiles == 0:
        cprint("Empty list of input files.", "red")
        return 0

    print("Found %d Abinit input files" % len(paths))
    errpaths = []
    for path in paths:
        print(path + ": ", end="")
        try:
            inp = AbinitInputFile.from_file(path)
            s = str(inp)
            cprint("OK", "green")
        except Exception as exc:
            if not isinstance(exc, NotImplementedError):
                cprint("FAILED", "red")
                errpaths.append(path)
                import traceback
                print(traceback.format_exc())
                #print("[%s]: Exception:\n%s" % (path, str(exc)))
                #with open(path, "rt") as fh:
                #    print(10*"=" + "Input File" + 10*"=")
                #    print(fh.read())
                #    print()
            else:
                cprint("NOTIMPLEMENTED", "magenta")

    if errpaths:
        cprint("failed: %d/%d [%.1f%%]" % (len(errpaths), nfiles, 100 * len(errpaths)/nfiles), "red")
        for i, epath in enumerate(errpaths):
            cprint("[%d] %s" % (i, epath), "red")
    else:
        cprint("All input files successfully parsed!", "green")

    return len(errpaths)