Source code for aiida_quantumespresso.workflows.functions.get_xspectra_structures

# -*- coding: utf-8 -*-
"""CalcFunction to analyse the symmetry properties of a crystal structure and its atomic sites.

Returns a supercell with a marked absorbing atom for each symmetrically non-equivalent site in the system.
"""
from aiida import orm
from aiida.common import ValidationError
from aiida.common.constants import elements
from aiida.engine import calcfunction
from aiida.orm.nodes.data.structure import Kind, Site, StructureData
from aiida.tools import spglib_tuple_to_structure, structure_to_spglib_tuple
import numpy as np
import spglib

from aiida_quantumespresso.utils.hubbard import HubbardStructureData, HubbardUtils


@calcfunction
[docs]def get_xspectra_structures(structure, **kwargs):  # pylint: disable=too-many-statements
    """Read a StructureData object using spglib for its symmetry data and return structures for XSpectra calculations.

    Takes an incoming StructureData node and prepares structures suitable for calculation with
    xspectra.x. The basic criteria for a suitable structure is for the cell parameters to be
    sufficiently large to reduce spurious interactions between localised core-holes in
    neighbouring periodic images and for one atomic site to be marked as the absorbing atom
    site.

    The default setting for the cell size is 8.0 angstrom, which can be changed by including
    'supercell_min_parameter' as a Float node in the keyword arguments. Accepted keyword
    arguments are:

        - abs_atom_marker: a Str node defining the name of the absorbing atom Kind. The
                           absorbing Kind will be labelled 'X' if no input is given.
        - supercell_min_parameter: a Float node defining the minimum cell length in
                                   angstrom for the resulting supercell, and thus all output
                                   structures. The default value of 8.0 angstrom will be used
                                   if no input is given. Setting this value to 0.0 will
                                   instruct the CF to not scale up the input structure.
        - standardize_structure: a Bool object defining if the input structure should
                                 standardized using spglib. The input structure will be
                                 standardized if no input is given, or if the crystal system
                                 is triclinic.
        - absorbing_elements_list: a List object defining the list of elements to consider
                                   when producing structures. All elements in the structure
                                   will be considered if no input is given.
        - is_molecule_input: a Bool object to define for the function that the input structure is
                    a molecule and not a periodic solid system. Required in order to instruct
                    the CF to use Pymatgen rather than spglib to determine the symmetry. The
                    CF will assume the structure to be a periodic solid if no input is given.
        - use_element_types: a Bool object to indicate that symmetry analysis should consider all
                             sites of the same element to be equal and ignore any special Kind names
                             from the parent structure. For instance, use_element_types = True would
                             consider sites for Kinds 'Si' and 'Si1' to be equivalent if both are sites
                             containing silicon. Defaults to True.
        - spglib_settings: an optional Dict object containing overrides for the symmetry
                            tolerance parameters used by spglib (symmprec, angle_tolerance).
        - pymatgen_settings: an optional Dict object containing overrides for the symmetry
                             tolerance parameters used by Pymatgen when processing molecular
                             systems (tolerance, eigen_tolerance, matrix_tolerance).

    :param structure: the StructureData object to be analysed
    :returns: StructureData objects for the standardized crystal structure, the supercell, and
              all generated structure and associated symmetry data
    """

    is_molecule_input = False
    elements_present = [kind.symbol for kind in structure.kinds]
    unwrapped_kwargs = {key: node for key, node in kwargs.items() if isinstance(node, orm.Data)}
    if 'abs_atom_marker' in unwrapped_kwargs.keys():
        abs_atom_marker = unwrapped_kwargs['abs_atom_marker'].value
        if abs_atom_marker in elements_present:
            raise ValidationError(
                f'The marker given for the absorbing atom ("{abs_atom_marker}") should not match an existing Kind in '
                f'the input structure ({elements_present}.'
            )
        unwrapped_kwargs.pop('abs_atom_marker')
    else:
        abs_atom_marker = 'X'
    if 'supercell_min_parameter' in unwrapped_kwargs.keys():
        supercell_min_parameter = unwrapped_kwargs.pop('supercell_min_parameter').value
        if supercell_min_parameter < 0:
            raise ValueError(
                f'The requested minimum supercell parameter ({supercell_min_parameter}) should not be'
                ' less than 0.'
            )
        elif supercell_min_parameter == 0:  # useful if no core-hole treatment is required
            scale_unit_cell = False
        else:
            scale_unit_cell = True
    else:
        supercell_min_parameter = 8.0  # angstroms
        scale_unit_cell = True
    if 'standardize_structure' in unwrapped_kwargs.keys():
        standardize_structure = unwrapped_kwargs['standardize_structure'].value
        unwrapped_kwargs.pop('standardize_structure')
    else:
        standardize_structure = True
    if 'absorbing_elements_list' in unwrapped_kwargs.keys():
        abs_elements_list = unwrapped_kwargs['absorbing_elements_list'].get_list()
        # confirm that the elements requested are actually in the input structure
        for req_element in abs_elements_list:
            if req_element not in elements_present:
                raise ValidationError(
                    f'Requested elements {abs_elements_list} do not match those in the given structure:'
                    f' {elements_present}'
                )
    else:
        abs_elements_list = []
        for kind in structure.kinds:
            if kind.symbol not in abs_elements_list:
                abs_elements_list.append(kind.symbol)

    if 'is_molecule_input' in unwrapped_kwargs.keys():
        is_molecule_input = unwrapped_kwargs['is_molecule_input'].value
        # If we are working with a molecule, check for pymatgen_settings
        if 'pymatgen_settings' in unwrapped_kwargs.keys():
            pymatgen_settings_dict = unwrapped_kwargs.keys()['pymatgen_settings'].get_dict()
            valid_keys = ['tolerance', 'eigen_tolerance', 'matrix_tolerance']
            pymatgen_kwargs = {key: value for key, value in pymatgen_settings_dict.items() if key in valid_keys}
        else:
            pymatgen_kwargs = {}

    if 'spglib_settings' in unwrapped_kwargs.keys():
        spglib_settings_dict = unwrapped_kwargs['spglib_settings'].get_dict()
        valid_keys = ['symprec', 'angle_tolerance']
        spglib_kwargs = {key: value for key, value in spglib_settings_dict.items() if key in valid_keys}
    else:
        spglib_kwargs = {}

    if 'use_element_types' in unwrapped_kwargs.keys():
        use_element_types = unwrapped_kwargs['use_element_types'].value
    else:
        use_element_types = True

    if isinstance(structure, HubbardStructureData):
        is_hubbard_structure = True
        if standardize_structure:
            raise ValidationError(
                'Incoming structure set to be standardized, but hubbard data has been found. '
                'Please set ``standardize_structure`` to false in ``**kwargs`` to preserve the hubbard data.'
            )
    else:
        is_hubbard_structure = False

    output_params = {}
    result = {}

    output_params['absorbing_elements_list'] = abs_elements_list

    incoming_structure_size = len(structure.sites)
    incoming_structure_cell = structure.cell
    incoming_structure_params = structure.cell_lengths

    output_params['input_structure_num_sites'] = incoming_structure_size
    output_params['input_structure_cell_matrix'] = incoming_structure_cell
    output_params['input_structure_cell_lengths'] = incoming_structure_params

    # Process a non-periodic system
    if is_molecule_input:
        from pymatgen.symmetry.analyzer import PointGroupAnalyzer

        pymatgen_molecule = structure.get_pymatgen_molecule()
        centered_molecule = pymatgen_molecule.get_centered_molecule()
        centered_positions = centered_molecule.cart_coords

        analyzer_data = PointGroupAnalyzer(pymatgen_molecule, **pymatgen_kwargs)
        eq_atoms_data = analyzer_data.get_equivalent_atoms()['eq_sets']
        atomic_nos = pymatgen_molecule.atomic_numbers
        point_group = str(analyzer_data.get_pointgroup())
        output_params['molecule_point_group'] = point_group

        equivalency_dict = {}

        for key in eq_atoms_data:
            site_symbol = elements[atomic_nos[key]]['symbol']
            if site_symbol in abs_elements_list:
                equivalency_dict[f'site_{key}'] = {}
                atom_no_set = eq_atoms_data[key]
                equivalency_dict[f'site_{key}']['site_index'] = key
                equivalency_dict[f'site_{key}']['equivalent_sites_list'] = atom_no_set
                equivalency_dict[f'site_{key}']['multiplicity'] = len(atom_no_set)
                equivalency_dict[f'site_{key}']['symbol'] = site_symbol

        output_params['equivalent_sites_data'] = equivalency_dict

        x_pos = []
        y_pos = []
        z_pos = []
        for site in structure.sites:
            x_pos.append(site.position[0])
            y_pos.append(site.position[1])
            z_pos.append(site.position[2])

        # We assume that the Martyna-Tuckerman approach will be used in the core-hole SCF
        # thus, at a minimum, the new box for the molecule needs to be 2x the extent of the
        # molecule in x, y, and z. We therefore use the larger of either the MT distance or
        # `supercell_min_parameter` for each cell parameter.
        high_x = x_pos[(np.argmax(x_pos))]
        high_y = y_pos[(np.argmax(y_pos))]
        high_z = z_pos[(np.argmax(z_pos))]

        low_x = x_pos[(np.argmin(x_pos))]
        low_y = y_pos[(np.argmin(y_pos))]
        low_z = z_pos[(np.argmin(z_pos))]

        box_a = max((high_x - low_x) * 2, supercell_min_parameter)
        box_b = max((high_y - low_y) * 2, supercell_min_parameter)
        box_c = max((high_z - low_z) * 2, supercell_min_parameter)

        new_supercell = StructureData()
        new_supercell.set_cell(value=([box_a, 0., 0.], [0., box_b, 0.], [0., 0., box_c]))

        for kind in structure.kinds:
            new_supercell.append_kind(kind)

        for site in structure.sites:
            new_supercell.append_site(site)

        # Reset the positions so that the centre of mass is at the centre of the new box
        new_supercell.reset_sites_positions(centered_positions + (np.array(new_supercell.cell_lengths) / 2))

        output_params['supercell_cell_matrix'] = new_supercell.cell
        output_params['supercell_cell_lengths'] = new_supercell.cell_lengths
        output_params['supercell_num_sites'] = len(new_supercell.sites)
        result['supercell'] = new_supercell

    # Process a periodic system
    else:
        incoming_structure_tuple = structure_to_spglib_tuple(structure)
        spglib_tuple = incoming_structure_tuple[0]
        types_order = spglib_tuple[-1]
        kinds_information = incoming_structure_tuple[1]
        kinds_list = incoming_structure_tuple[2]

        # We need a way to reliably convert type number into element, so we
        # first create a mapping of assigned number to kind name then a mapping
        # of assigned number to ``Kind``

        type_name_mapping = {str(value): key for key, value in kinds_information.items()}
        type_mapping_dict = {}

        for key, value in type_name_mapping.items():
            for kind in kinds_list:
                if value == kind.name:
                    type_mapping_dict[key] = kind

        # By default, `structure_to_spglib_tuple` gives different
        # ``Kinds`` of the same element a distinct atomic number by
        # multiplying the normal atomic number by 1000, then adding
        # 100 for each distinct duplicate. if we want to treat all sites
        # of the same element as equal, then we must therefore briefly
        # operate on a "cleaned" version of the structure tuple where this
        # new label is reduced to its normal element number.
        if use_element_types:
            cleaned_structure_tuple = (spglib_tuple[0], spglib_tuple[1], [])
            for i in spglib_tuple[2]:
                if i >= 1000:
                    new_i = int(np.trunc(i / 1000))
                else:
                    new_i = i
                cleaned_structure_tuple[2].append(new_i)
            symmetry_dataset = spglib.get_symmetry_dataset(cleaned_structure_tuple, **spglib_kwargs)
        else:
            symmetry_dataset = spglib.get_symmetry_dataset(spglib_tuple, **spglib_kwargs)

        # if there is no symmetry to exploit, or no standardization is desired, then we just use
        # the input structure in the following steps. This is done to account for the case where
        # the user has submitted an improper crystal for calculation work and doesn't want it to
        # be changed.
        if symmetry_dataset['number'] in [1, 2] or not standardize_structure:
            standardized_structure_node = spglib_tuple_to_structure(spglib_tuple, kinds_information, kinds_list)
            structure_is_standardized = False
        else:  # otherwise, we proceed with the standardized structure.
            standardized_structure_tuple = spglib.standardize_cell(spglib_tuple, **spglib_kwargs)
            standardized_structure_node = spglib_tuple_to_structure(
                standardized_structure_tuple, kinds_information, kinds_list
            )
            # if we are standardizing the structure, then we need to update the symmetry
            # information for the standardized structure
            symmetry_dataset = spglib.get_symmetry_dataset(standardized_structure_tuple, **spglib_kwargs)
            structure_is_standardized = True

        equivalent_atoms_array = symmetry_dataset['equivalent_atoms']

        if structure_is_standardized:
            element_types = symmetry_dataset['std_types']
        else:  # convert the 'std_types' from standardized to primitive cell
            # we generate the type-specific data on-the-fly since we need to
            # know which type (and thus kind) *should* be at each site
            # even if we "cleaned" the structure previously
            non_cleaned_dataset = spglib.get_symmetry_dataset(spglib_tuple, **spglib_kwargs)
            spglib_std_types = non_cleaned_dataset['std_types']
            spglib_map_to_prim = non_cleaned_dataset['mapping_to_primitive']
            spglib_std_map_to_prim = non_cleaned_dataset['std_mapping_to_primitive']

            map_std_pos_to_type = {}
            for position, atom_type in zip(spglib_std_map_to_prim, spglib_std_types):
                map_std_pos_to_type[str(position)] = atom_type
            primitive_types = []
            for position in spglib_map_to_prim:
                atom_type = map_std_pos_to_type[str(position)]
                primitive_types.append(atom_type)
            element_types = primitive_types

        equivalency_dict = {}
        for index, symmetry_types in enumerate(zip(equivalent_atoms_array, element_types)):
            symmetry_value, element_type = symmetry_types
            if type_mapping_dict[str(element_type)].symbol in abs_elements_list:
                if f'site_{symmetry_value}' in equivalency_dict:
                    equivalency_dict[f'site_{symmetry_value}']['equivalent_sites_list'].append(index)
                else:
                    equivalency_dict[f'site_{symmetry_value}'] = {
                        'kind_name': type_mapping_dict[str(element_type)].name,
                        'symbol': type_mapping_dict[str(element_type)].symbol,
                        'site_index': symmetry_value,
                        'equivalent_sites_list': [symmetry_value]
                    }

        for value in equivalency_dict.values():
            value['multiplicity'] = len(value['equivalent_sites_list'])

        output_params['equivalent_sites_data'] = equivalency_dict

        output_params['spacegroup_number'] = symmetry_dataset['number']
        output_params['international_symbol'] = symmetry_dataset['international']

        output_params['structure_is_standardized'] = structure_is_standardized
        if structure_is_standardized:
            result['standardized_structure'] = standardized_structure_node
            output_params['standardized_structure_num_sites'] = len(standardized_structure_node.sites)
            output_params['standardized_structure_cell_matrix'] = standardized_structure_node.cell
            output_params['standardized_structure_params'] = standardized_structure_node.cell_lengths

        standard_pbc = standardized_structure_node.cell_lengths

        multiples = [1, 1, 1]
        if scale_unit_cell:
            multiples = []
            for length in standard_pbc:
                multiple = np.ceil(supercell_min_parameter / length)
                multiples.append(int(multiple))

        ase_structure = standardized_structure_node.get_ase()
        ase_supercell = ase_structure * multiples

        # if there are hubbard data to apply, we re-construct
        # the supercell site-by-site to keep the correct ordering
        if is_hubbard_structure:
            blank_supercell = StructureData(ase=ase_supercell)
            new_supercell = StructureData()
            new_supercell.set_cell(blank_supercell.cell)
            num_extensions = np.product(multiples)
            supercell_types_order = []
            # For each supercell extension, loop over each site.
            # This way, the original pattern-ordering of sites is
            # preserved.
            for i in range(0, num_extensions):  # pylint: disable=unused-variable
                for type_number in types_order:
                    supercell_types_order.append(type_number)

            for site, type_number in zip(blank_supercell.sites, supercell_types_order):
                kind_present = type_mapping_dict[str(type_number)]
                if kind_present.name not in [kind.name for kind in new_supercell.kinds]:
                    new_supercell.append_kind(kind_present)
                new_site = Site(kind_name=kind_present.name, position=site.position)
                new_supercell.append_site(new_site)
        else:  # otherwise, simply re-construct the supercell with ASE
            new_supercell = StructureData(ase=ase_supercell)

        if is_hubbard_structure:  # Scale up the hubbard parameters to match and return the HubbardStructureData
            # we can exploit the fact that `get_hubbard_for_supercell` will return a HubbardStructureData node
            # with the same hubbard parameters in the case where the input structure and the supercell are the
            # same (i.e. multiples == [1, 1, 1])
            hubbard_manip = HubbardUtils(structure)
            new_hubbard_supercell = hubbard_manip.get_hubbard_for_supercell(new_supercell)
            new_supercell = new_hubbard_supercell
            supercell_hubbard_params = new_supercell.hubbard
            result['supercell'] = new_supercell
        else:
            result['supercell'] = new_supercell
        output_params['supercell_factors'] = multiples
        output_params['supercell_num_sites'] = len(new_supercell.sites)
        output_params['supercell_cell_matrix'] = new_supercell.cell
        output_params['supercell_cell_lengths'] = new_supercell.cell_lengths

    for value in equivalency_dict.values():
        target_site = value['site_index']
        marked_structure = StructureData()
        marked_structure.set_cell(new_supercell.cell)
        new_kind_names = [kind.name for kind in new_supercell.kinds]

        for index, site in enumerate(new_supercell.sites):
            kind_at_position = new_supercell.kinds[new_kind_names.index(site.kind_name)]
            if index == target_site:
                absorbing_kind = Kind(name=abs_atom_marker, symbols=kind_at_position.symbol)
                absorbing_site = Site(kind_name=absorbing_kind.name, position=site.position)
                marked_structure.append_kind(absorbing_kind)
                marked_structure.append_site(absorbing_site)
            else:
                if kind_at_position.name not in [kind.name for kind in marked_structure.kinds]:
                    marked_structure.append_kind(kind_at_position)
                new_site = Site(kind_name=site.kind_name, position=site.position)
                marked_structure.append_site(new_site)

        if is_hubbard_structure:
            marked_hubbard_structure = HubbardStructureData.from_structure(marked_structure)
            marked_hubbard_structure.hubbard = supercell_hubbard_params
            result[f'site_{target_site}_{value["symbol"]}'] = marked_hubbard_structure
        else:
            result[f'site_{target_site}_{value["symbol"]}'] = marked_structure

    output_params['is_molecule_input'] = is_molecule_input
    result['output_parameters'] = orm.Dict(dict=output_params)

    return result