Source code for abipy.core.restapi

"""
This module provides interfaces with the Materials Project REST API v2 to enable
the creation of data structures and pymatgen objects using Materials Project data.
"""
from __future__ import annotations

import sys
import pandas as pd

from collections import OrderedDict
from pprint import pprint
from functools import cached_property
from monty.string import marquee

from pymatgen.ext.matproj import MPRester, MPRestError
from abipy.tools.printing import print_dataframe
from abipy.core.mixins import NotebookWriter



MP_KEYS_FOR_DATAFRAME = (
    "pretty_formula", "e_above_hull", "energy_per_atom",
    "formation_energy_per_atom", "nsites", "volume",
    "spacegroup.symbol", "spacegroup.number",
    "band_gap", "total_magnetization", "material_id" # "unit_cell_formula", "icsd_id", "icsd_ids", "cif", , "tags", "elasticity")
)



[docs]
def get_mprester():
    """
    Args:
    """
    rester = MPRester()
    #print(f"{type(rester)=}")
    return rester



#class MyMPRester(MPRester):
#    """
#    Subclass Materials project Rester.
#    See :cite:`Jain2013,Ong2015`.
#
#    .. rubric:: Inheritance Diagram
#    .. inheritance-diagram:: MyMPRester
#    """
#    Error = MPRestError
#
#    def get_phasediagram_results(self, elements) -> PhaseDiagramResults:
#        """
#        Contact the materials project database, fetch entries and build :class:``PhaseDiagramResults`` instance.
#
#        Args:
#            elements: List of chemical elements.
#        """
#        entries = self.get_entries_in_chemsys(elements, inc_structure="final")
#        return PhaseDiagramResults(entries)


#class PhaseDiagramResults:
#    """
#    Simplified interface to phase-diagram pymatgen API.
#
#    Inspired to:
#
#        https://anaconda.org/matsci/plotting-and-analyzing-a-phase-diagram-using-the-materials-api/notebook
#
#    See also: :cite:`Ong2008,Ong2010`
#    """
#    def __init__(self, entries):
#        self.entries = entries
#        # Convert pymatgen structure to Abipy.
#        from abipy.core.structure import Structure
#        for e in entries:
#            e.structure.__class__ = Structure
#
#        self.structures = [e.structure for e in entries]
#        self.mpids = [e.entry_id for e in entries]
#
#        # Create phase diagram.
#        from pymatgen.analysis.phase_diagram import PhaseDiagram
#        self.phasediagram = PhaseDiagram(self.entries)
#
#    def plot(self, show_unstable=True, show=True):
#        """
#        Plot phase diagram.
#
#        Args:
#            show_unstable (float): Whether unstable phases will be plotted as
#                well as red crosses. If a number > 0 is entered, all phases with
#                ehull < show_unstable will be shown.
#            show: True to show plot.
#
#        Return: plotter object.
#        """
#        from pymatgen.analysis.phase_diagram import PDPlotter
#        plotter = PDPlotter(self.phasediagram, show_unstable=show_unstable)
#        if show:
#            plotter.show()
#        return plotter
#
#    @cached_property
#    def dataframe(self) -> pd.DataFrame:
#        """Pandas dataframe with the most important results."""
#        rows = []
#        for e in self.entries:
#            d = e.structure.get_dict4pandas(with_spglib=True)
#            decomp, ehull = self.phasediagram.get_decomp_and_e_above_hull(e)
#
#            rows.append(OrderedDict([
#                ("Materials ID", e.entry_id),
#                ("spglib_symb", d["spglib_symb"]), ("spglib_num", d["spglib_num"]),
#                ("Composition", e.composition.reduced_formula),
#                ("Ehull", ehull), # ("Equilibrium_reaction_energy", pda.get_equilibrium_reaction_energy(e)),
#                ("Decomposition", " + ".join(["%.2f %s" % (v, k.composition.formula) for k, v in decomp.items()])),
#            ]))
#
#        import pandas as pd
#        return pd.DataFrame(rows, columns=list(rows[0].keys()) if rows else None)
#
#    def print_dataframes(self, with_spglib=False, file=sys.stdout, verbose=0) -> None:
#        """
#        Print pandas dataframe to file `file`.
#
#        Args:
#            with_spglib: True to compute spacegroup with spglib.
#            file: Output stream.
#            verbose: Verbosity level.
#        """
#        print_dataframe(self.dataframe, file=file)
#        if verbose:
#            from abipy.core.structure import dataframes_from_structures
#            dfs = dataframes_from_structures(self.structures, index=self.mpids, with_spglib=with_spglib)
#            print_dataframe(dfs.lattice, title="Lattice parameters:", file=file)
#            if verbose > 1:
#                print_dataframe(dfs.coords, title="Atomic positions (columns give the site index):", file=file)



[docs]
class DatabaseStructures(NotebookWriter):
    """
    Store the results of a query to the MP database.
    This object is immutable, use add_entry to create a new instance.
    """

    def __init__(self, structures, ids, data=None):
        """
        Args:
            structures: List of structure objects
            ids: List of database ids.
            data: List of dictionaries with data associated to the structures (optional).
        """
        from abipy.core.structure import Structure
        self.structures = list(map(Structure.as_structure, structures))
        self.ids, self.data = ids, data
        assert len(self.structures) == len(ids)
        if data is not None:
            assert len(self.structures) == len(data)

    def __bool__(self):
        """bool(self)"""
        return bool(self.structures)

    __nonzero__ = __bool__  # py2


[docs]
    def filter_by_spgnum(self, spgnum: int) -> MpStructures:
        """Filter structures by space group number. Return new MpStructures object."""
        inds = [i for i, s in enumerate(self.structures) if s.get_space_group_info()[1] == int(spgnum)]
        new_data = None if self.data is None else [self.data[i] for i in inds]
        return self.__class__([self.structures[i] for i in inds], [self.ids[i] for i in inds], data=new_data)



[docs]
    def add_entry(self, structure, entry_id, data_dict=None):
        """
        Add new entry, return new object.

        Args:
           structure: New structure object.
           entry_id: ID associated to new structure.
           data_dict: Option dictionary with metadata.
        """
        if data_dict is None:
            new_data = None if self.data is None else self.data + [{}]
        else:
            assert self.data is not None
            new_data = self.data + [data_dict]

        return self.__class__(self.structures + [structure], self.ids + [entry_id], data=new_data)


    @property
    def lattice_dataframe(self) -> pd.DataFrame:
        """pandas DataFrame with lattice parameters."""
        return self.structure_dataframes.lattice

    @property
    def coords_dataframe(self) -> pd.DataFrame:
        """pandas DataFrame with atomic positions."""
        return self.structure_dataframes.coords

    @cached_property
    def structure_dataframes(self):
        """Pandas dataframes constructed from self.structures."""
        from abipy.core.structure import dataframes_from_structures
        return dataframes_from_structures(self.structures, index=self.ids, with_spglib=True)


[docs]
    def print_results(self, fmt="abivars", verbose=0, file=sys.stdout) -> None:
        """
        Print pandas dataframe, structures using format `fmt`, and data to file `file`.
        `fmt` is automaticall set to `cif` if structure is disordered.
        Set fmt to None or empty string to disable structure output.
        """
        print("\n# Found %s structures in %s database (use `verbose` to get further info)\n"
              % (len(self.structures), self.dbname), file=file)

        if self.dataframe is not None: print_dataframe(self.dataframe, file=file)
        if verbose and self.data is not None: pprint(self.data, stream=file)

        # Print structures
        print_structures = not (fmt is None or str(fmt) == "None")
        if print_structures:
            for i, structure in enumerate(self.structures):
                print(" ", file=file)
                print(marquee("%s input for %s" % (fmt, self.ids[i]), mark="#"), file=file)
                print("# " + structure.spget_summary(verbose=verbose).replace("\n", "\n# ") + "\n", file=file)
                if not structure.is_ordered:
                    print(structure.convert(fmt="cif"), file=file)
                else:
                    print(structure.convert(fmt=fmt), file=file)
                print(2 * "\n", file=file)

        if len(self.structures) > 10:
            # Print info again
            print("\n# Found %s structures in %s database (use `verbose` to get further info)\n"
                  % (len(self.structures), self.dbname), file=file)



[docs]
    def yield_figs(self, **kwargs):  # pragma: no cover
        """NOP required by NotebookWriter protocol."""
        yield None



[docs]
    def write_notebook(self, nbpath=None, title=None) -> str:
        """
        Write a jupyter notebook to nbpath. If nbpath is None, a temporay file in the current
        working directory is created. Return path to the notebook.
        """
        nbformat, nbv, nb = self.get_nbformat_nbv_nb(title=title)

        # Use pickle files for data persistence.
        tmpfile = self.pickle_dump()

        nb.cells.extend([
            #nbv.new_markdown_cell("# This is a markdown cell"),
            nbv.new_code_cell("dbs = abilab.restapi.DatabaseStructures.pickle_load('%s')" % tmpfile),
            nbv.new_code_cell("import qgrid"),
            nbv.new_code_cell("# dbs.print_results(fmt='cif', verbose=0)"),
            nbv.new_code_cell("# qgrid.show_grid(dbs.lattice_dataframe)"),
            nbv.new_code_cell("# qgrid.show_grid(dbs.coords_dataframe)"),
            nbv.new_code_cell("qgrid.show_grid(dbs.dataframe)"),
        ])

        return self._write_nb_nbpath(nb, nbpath)





[docs]
class MpStructures(DatabaseStructures):
    """
    Store the results of a query to the Materials Project database.

    .. inheritance-diagram:: MpStructures
    """
    dbname = "Materials Project"

    @cached_property
    def dataframe(self) -> pd.DataFrame:
        """
        Pandas dataframe constructed from self.data. None if data is not available.
        """
        if not self.data: return None
        import pandas as pd
        rows = []
        for d, structure in zip(self.data, self.structures):
            d = Dotdict(d)
            d = OrderedDict([(k, d.dotget(k, default=None)) for k in MP_KEYS_FOR_DATAFRAME])
            # Add lattice parameters.
            for k in ("a", "b", "c", "alpha", "beta", "gamma"):
                d[k] = getattr(structure.lattice, k)
            rows.append(d)

        return pd.DataFrame(rows, index=self.ids, columns=list(rows[0].keys()))


[docs]
    def open_browser(self, browser=None, limit=10):
        """
        Args:
            browser: Open webpage in ``browser``. Use default if $BROWSER if None.
            limit: Max number of tabs opened in browser. None for no limit.
        """
        import webbrowser
        import cgi
        for i, mpid in enumerate(self.ids):
            if limit is not None and i >= limit:
                print("Found %d structures found. Won't open more than %d tabs" % (len(self.ids), limit))
                break
            # https://materialsproject.org/materials/mp-2172/
            url = "https://materialsproject.org/materials/%s/" % mpid
            webbrowser.get(browser).open_new_tab(cgi.escape(url))





[docs]
class CodStructures(DatabaseStructures):
    """
    Store the results of a query to the COD_ database :cite:`Grazulis2011`.

    .. inheritance-diagram:: CodStructures
    """
    dbname = "COD"

    @cached_property
    def dataframe(self) -> pd.DataFrame:
        """
        |pandas-Dataframe| constructed. Essentially geometrical info and space groups found by spglib_
        as COD API is rather limited.
        """
        df = self.lattice_dataframe.copy()
        # Add space group from COD
        df["cod_sg"] = [d.get("sg", "").replace(" ", "") for d in self.data]
        return df




[docs]
class Dotdict(dict):


[docs]
    def dotget(self, key, default=None):
        """
        d.dotget["foo.bar"] --> d["foo"]["bar"] if "foo.bar" not in self
        """
        # if key is in dict access as normal
        if key in self:
            return super().__getitem__(key)

        # Assume string
        i = -1
        try:
            i = key.find(".")
            if i == -1: return default
        except AttributeError:
            return default

        try:
            root, key = key[:i], key[i+1:]
            if key == ".": return None
            return Dotdict(**self[root])[key]
        except Exception:
            return None