# -*- coding: utf-8 -*-
"""CalcFunction to analyse the symmetry properties of a crystal structure and its atomic sites.
Returns a supercell with a marked absorbing atom for each symmetrically non-equivalent site in the system.
"""
from aiida import orm
from aiida.common import ValidationError
from aiida.common.constants import elements
from aiida.engine import calcfunction
from aiida.orm.nodes.data.structure import Kind, Site, StructureData
from aiida.tools import spglib_tuple_to_structure, structure_to_spglib_tuple
import numpy as np
import spglib
from aiida_quantumespresso.utils.hubbard import HubbardStructureData, HubbardUtils
@calcfunction
[docs]def get_xspectra_structures(structure, **kwargs): # pylint: disable=too-many-statements
"""Read a StructureData object using spglib for its symmetry data and return structures for XSpectra calculations.
Takes an incoming StructureData node and prepares structures suitable for calculation with
xspectra.x. The basic criteria for a suitable structure is for the cell parameters to be
sufficiently large to reduce spurious interactions between localised core-holes in
neighbouring periodic images and for one atomic site to be marked as the absorbing atom
site.
The default setting for the cell size is 8.0 angstrom, which can be changed by including
'supercell_min_parameter' as a Float node in the keyword arguments. Accepted keyword
arguments are:
- abs_atom_marker: a Str node defining the name of the absorbing atom Kind. The
absorbing Kind will be labelled 'X' if no input is given.
- supercell_min_parameter: a Float node defining the minimum cell length in
angstrom for the resulting supercell, and thus all output
structures. The default value of 8.0 angstrom will be used
if no input is given. Setting this value to 0.0 will
instruct the CF to not scale up the input structure.
- standardize_structure: a Bool object defining if the input structure should
standardized using spglib. The input structure will be
standardized if no input is given, or if the crystal system
is triclinic.
- absorbing_elements_list: a List object defining the list of elements to consider
when producing structures. All elements in the structure
will be considered if no input is given.
- is_molecule_input: a Bool object to define for the function that the input structure is
a molecule and not a periodic solid system. Required in order to instruct
the CF to use Pymatgen rather than spglib to determine the symmetry. The
CF will assume the structure to be a periodic solid if no input is given.
- use_element_types: a Bool object to indicate that symmetry analysis should consider all
sites of the same element to be equal and ignore any special Kind names
from the parent structure. For instance, use_element_types = True would
consider sites for Kinds 'Si' and 'Si1' to be equivalent if both are sites
containing silicon. Defaults to True.
- spglib_settings: an optional Dict object containing overrides for the symmetry
tolerance parameters used by spglib (symmprec, angle_tolerance).
- pymatgen_settings: an optional Dict object containing overrides for the symmetry
tolerance parameters used by Pymatgen when processing molecular
systems (tolerance, eigen_tolerance, matrix_tolerance).
:param structure: the StructureData object to be analysed
:returns: StructureData objects for the standardized crystal structure, the supercell, and
all generated structure and associated symmetry data
"""
is_molecule_input = False
elements_present = [kind.symbol for kind in structure.kinds]
unwrapped_kwargs = {key: node for key, node in kwargs.items() if isinstance(node, orm.Data)}
if 'abs_atom_marker' in unwrapped_kwargs.keys():
abs_atom_marker = unwrapped_kwargs['abs_atom_marker'].value
if abs_atom_marker in elements_present:
raise ValidationError(
f'The marker given for the absorbing atom ("{abs_atom_marker}") should not match an existing Kind in '
f'the input structure ({elements_present}.'
)
unwrapped_kwargs.pop('abs_atom_marker')
else:
abs_atom_marker = 'X'
if 'supercell_min_parameter' in unwrapped_kwargs.keys():
supercell_min_parameter = unwrapped_kwargs.pop('supercell_min_parameter').value
if supercell_min_parameter < 0:
raise ValueError(
f'The requested minimum supercell parameter ({supercell_min_parameter}) should not be'
' less than 0.'
)
elif supercell_min_parameter == 0: # useful if no core-hole treatment is required
scale_unit_cell = False
else:
scale_unit_cell = True
else:
supercell_min_parameter = 8.0 # angstroms
scale_unit_cell = True
if 'standardize_structure' in unwrapped_kwargs.keys():
standardize_structure = unwrapped_kwargs['standardize_structure'].value
unwrapped_kwargs.pop('standardize_structure')
else:
standardize_structure = True
if 'absorbing_elements_list' in unwrapped_kwargs.keys():
abs_elements_list = unwrapped_kwargs['absorbing_elements_list'].get_list()
# confirm that the elements requested are actually in the input structure
for req_element in abs_elements_list:
if req_element not in elements_present:
raise ValidationError(
f'Requested elements {abs_elements_list} do not match those in the given structure:'
f' {elements_present}'
)
else:
abs_elements_list = []
for kind in structure.kinds:
if kind.symbol not in abs_elements_list:
abs_elements_list.append(kind.symbol)
if 'is_molecule_input' in unwrapped_kwargs.keys():
is_molecule_input = unwrapped_kwargs['is_molecule_input'].value
# If we are working with a molecule, check for pymatgen_settings
if 'pymatgen_settings' in unwrapped_kwargs.keys():
pymatgen_settings_dict = unwrapped_kwargs.keys()['pymatgen_settings'].get_dict()
valid_keys = ['tolerance', 'eigen_tolerance', 'matrix_tolerance']
pymatgen_kwargs = {key: value for key, value in pymatgen_settings_dict.items() if key in valid_keys}
else:
pymatgen_kwargs = {}
if 'spglib_settings' in unwrapped_kwargs.keys():
spglib_settings_dict = unwrapped_kwargs['spglib_settings'].get_dict()
valid_keys = ['symprec', 'angle_tolerance']
spglib_kwargs = {key: value for key, value in spglib_settings_dict.items() if key in valid_keys}
else:
spglib_kwargs = {}
if 'use_element_types' in unwrapped_kwargs.keys():
use_element_types = unwrapped_kwargs['use_element_types'].value
else:
use_element_types = True
if isinstance(structure, HubbardStructureData):
is_hubbard_structure = True
if standardize_structure:
raise ValidationError(
'Incoming structure set to be standardized, but hubbard data has been found. '
'Please set ``standardize_structure`` to false in ``**kwargs`` to preserve the hubbard data.'
)
else:
is_hubbard_structure = False
output_params = {}
result = {}
output_params['absorbing_elements_list'] = abs_elements_list
incoming_structure_size = len(structure.sites)
incoming_structure_cell = structure.cell
incoming_structure_params = structure.cell_lengths
output_params['input_structure_num_sites'] = incoming_structure_size
output_params['input_structure_cell_matrix'] = incoming_structure_cell
output_params['input_structure_cell_lengths'] = incoming_structure_params
# Process a non-periodic system
if is_molecule_input:
from pymatgen.symmetry.analyzer import PointGroupAnalyzer
pymatgen_molecule = structure.get_pymatgen_molecule()
centered_molecule = pymatgen_molecule.get_centered_molecule()
centered_positions = centered_molecule.cart_coords
analyzer_data = PointGroupAnalyzer(pymatgen_molecule, **pymatgen_kwargs)
eq_atoms_data = analyzer_data.get_equivalent_atoms()['eq_sets']
atomic_nos = pymatgen_molecule.atomic_numbers
point_group = str(analyzer_data.get_pointgroup())
output_params['molecule_point_group'] = point_group
equivalency_dict = {}
for key in eq_atoms_data:
site_symbol = elements[atomic_nos[key]]['symbol']
if site_symbol in abs_elements_list:
equivalency_dict[f'site_{key}'] = {}
atom_no_set = eq_atoms_data[key]
equivalency_dict[f'site_{key}']['site_index'] = key
equivalency_dict[f'site_{key}']['equivalent_sites_list'] = atom_no_set
equivalency_dict[f'site_{key}']['multiplicity'] = len(atom_no_set)
equivalency_dict[f'site_{key}']['symbol'] = site_symbol
output_params['equivalent_sites_data'] = equivalency_dict
x_pos = []
y_pos = []
z_pos = []
for site in structure.sites:
x_pos.append(site.position[0])
y_pos.append(site.position[1])
z_pos.append(site.position[2])
# We assume that the Martyna-Tuckerman approach will be used in the core-hole SCF
# thus, at a minimum, the new box for the molecule needs to be 2x the extent of the
# molecule in x, y, and z. We therefore use the larger of either the MT distance or
# `supercell_min_parameter` for each cell parameter.
high_x = x_pos[(np.argmax(x_pos))]
high_y = y_pos[(np.argmax(y_pos))]
high_z = z_pos[(np.argmax(z_pos))]
low_x = x_pos[(np.argmin(x_pos))]
low_y = y_pos[(np.argmin(y_pos))]
low_z = z_pos[(np.argmin(z_pos))]
box_a = max((high_x - low_x) * 2, supercell_min_parameter)
box_b = max((high_y - low_y) * 2, supercell_min_parameter)
box_c = max((high_z - low_z) * 2, supercell_min_parameter)
new_supercell = StructureData()
new_supercell.set_cell(value=([box_a, 0., 0.], [0., box_b, 0.], [0., 0., box_c]))
for kind in structure.kinds:
new_supercell.append_kind(kind)
for site in structure.sites:
new_supercell.append_site(site)
# Reset the positions so that the centre of mass is at the centre of the new box
new_supercell.reset_sites_positions(centered_positions + (np.array(new_supercell.cell_lengths) / 2))
output_params['supercell_cell_matrix'] = new_supercell.cell
output_params['supercell_cell_lengths'] = new_supercell.cell_lengths
output_params['supercell_num_sites'] = len(new_supercell.sites)
result['supercell'] = new_supercell
# Process a periodic system
else:
incoming_structure_tuple = structure_to_spglib_tuple(structure)
spglib_tuple = incoming_structure_tuple[0]
types_order = spglib_tuple[-1]
kinds_information = incoming_structure_tuple[1]
kinds_list = incoming_structure_tuple[2]
# We need a way to reliably convert type number into element, so we
# first create a mapping of assigned number to kind name then a mapping
# of assigned number to ``Kind``
type_name_mapping = {str(value): key for key, value in kinds_information.items()}
type_mapping_dict = {}
for key, value in type_name_mapping.items():
for kind in kinds_list:
if value == kind.name:
type_mapping_dict[key] = kind
# By default, `structure_to_spglib_tuple` gives different
# ``Kinds`` of the same element a distinct atomic number by
# multiplying the normal atomic number by 1000, then adding
# 100 for each distinct duplicate. if we want to treat all sites
# of the same element as equal, then we must therefore briefly
# operate on a "cleaned" version of the structure tuple where this
# new label is reduced to its normal element number.
if use_element_types:
cleaned_structure_tuple = (spglib_tuple[0], spglib_tuple[1], [])
for i in spglib_tuple[2]:
if i >= 1000:
new_i = int(np.trunc(i / 1000))
else:
new_i = i
cleaned_structure_tuple[2].append(new_i)
symmetry_dataset = spglib.get_symmetry_dataset(cleaned_structure_tuple, **spglib_kwargs)
else:
symmetry_dataset = spglib.get_symmetry_dataset(spglib_tuple, **spglib_kwargs)
# if there is no symmetry to exploit, or no standardization is desired, then we just use
# the input structure in the following steps. This is done to account for the case where
# the user has submitted an improper crystal for calculation work and doesn't want it to
# be changed.
if symmetry_dataset['number'] in [1, 2] or not standardize_structure:
standardized_structure_node = spglib_tuple_to_structure(spglib_tuple, kinds_information, kinds_list)
structure_is_standardized = False
else: # otherwise, we proceed with the standardized structure.
standardized_structure_tuple = spglib.standardize_cell(spglib_tuple, **spglib_kwargs)
standardized_structure_node = spglib_tuple_to_structure(
standardized_structure_tuple, kinds_information, kinds_list
)
# if we are standardizing the structure, then we need to update the symmetry
# information for the standardized structure
symmetry_dataset = spglib.get_symmetry_dataset(standardized_structure_tuple, **spglib_kwargs)
structure_is_standardized = True
equivalent_atoms_array = symmetry_dataset['equivalent_atoms']
if structure_is_standardized:
element_types = symmetry_dataset['std_types']
else: # convert the 'std_types' from standardized to primitive cell
# we generate the type-specific data on-the-fly since we need to
# know which type (and thus kind) *should* be at each site
# even if we "cleaned" the structure previously
non_cleaned_dataset = spglib.get_symmetry_dataset(spglib_tuple, **spglib_kwargs)
spglib_std_types = non_cleaned_dataset['std_types']
spglib_map_to_prim = non_cleaned_dataset['mapping_to_primitive']
spglib_std_map_to_prim = non_cleaned_dataset['std_mapping_to_primitive']
map_std_pos_to_type = {}
for position, atom_type in zip(spglib_std_map_to_prim, spglib_std_types):
map_std_pos_to_type[str(position)] = atom_type
primitive_types = []
for position in spglib_map_to_prim:
atom_type = map_std_pos_to_type[str(position)]
primitive_types.append(atom_type)
element_types = primitive_types
equivalency_dict = {}
for index, symmetry_types in enumerate(zip(equivalent_atoms_array, element_types)):
symmetry_value, element_type = symmetry_types
if type_mapping_dict[str(element_type)].symbol in abs_elements_list:
if f'site_{symmetry_value}' in equivalency_dict:
equivalency_dict[f'site_{symmetry_value}']['equivalent_sites_list'].append(index)
else:
equivalency_dict[f'site_{symmetry_value}'] = {
'kind_name': type_mapping_dict[str(element_type)].name,
'symbol': type_mapping_dict[str(element_type)].symbol,
'site_index': symmetry_value,
'equivalent_sites_list': [symmetry_value]
}
for value in equivalency_dict.values():
value['multiplicity'] = len(value['equivalent_sites_list'])
output_params['equivalent_sites_data'] = equivalency_dict
output_params['spacegroup_number'] = symmetry_dataset['number']
output_params['international_symbol'] = symmetry_dataset['international']
output_params['structure_is_standardized'] = structure_is_standardized
if structure_is_standardized:
result['standardized_structure'] = standardized_structure_node
output_params['standardized_structure_num_sites'] = len(standardized_structure_node.sites)
output_params['standardized_structure_cell_matrix'] = standardized_structure_node.cell
output_params['standardized_structure_params'] = standardized_structure_node.cell_lengths
standard_pbc = standardized_structure_node.cell_lengths
multiples = [1, 1, 1]
if scale_unit_cell:
multiples = []
for length in standard_pbc:
multiple = np.ceil(supercell_min_parameter / length)
multiples.append(int(multiple))
ase_structure = standardized_structure_node.get_ase()
ase_supercell = ase_structure * multiples
# if there are hubbard data to apply, we re-construct
# the supercell site-by-site to keep the correct ordering
if is_hubbard_structure:
blank_supercell = StructureData(ase=ase_supercell)
new_supercell = StructureData()
new_supercell.set_cell(blank_supercell.cell)
num_extensions = np.product(multiples)
supercell_types_order = []
# For each supercell extension, loop over each site.
# This way, the original pattern-ordering of sites is
# preserved.
for i in range(0, num_extensions): # pylint: disable=unused-variable
for type_number in types_order:
supercell_types_order.append(type_number)
for site, type_number in zip(blank_supercell.sites, supercell_types_order):
kind_present = type_mapping_dict[str(type_number)]
if kind_present.name not in [kind.name for kind in new_supercell.kinds]:
new_supercell.append_kind(kind_present)
new_site = Site(kind_name=kind_present.name, position=site.position)
new_supercell.append_site(new_site)
else: # otherwise, simply re-construct the supercell with ASE
new_supercell = StructureData(ase=ase_supercell)
if is_hubbard_structure: # Scale up the hubbard parameters to match and return the HubbardStructureData
# we can exploit the fact that `get_hubbard_for_supercell` will return a HubbardStructureData node
# with the same hubbard parameters in the case where the input structure and the supercell are the
# same (i.e. multiples == [1, 1, 1])
hubbard_manip = HubbardUtils(structure)
new_hubbard_supercell = hubbard_manip.get_hubbard_for_supercell(new_supercell)
new_supercell = new_hubbard_supercell
supercell_hubbard_params = new_supercell.hubbard
result['supercell'] = new_supercell
else:
result['supercell'] = new_supercell
output_params['supercell_factors'] = multiples
output_params['supercell_num_sites'] = len(new_supercell.sites)
output_params['supercell_cell_matrix'] = new_supercell.cell
output_params['supercell_cell_lengths'] = new_supercell.cell_lengths
for value in equivalency_dict.values():
target_site = value['site_index']
marked_structure = StructureData()
marked_structure.set_cell(new_supercell.cell)
new_kind_names = [kind.name for kind in new_supercell.kinds]
for index, site in enumerate(new_supercell.sites):
kind_at_position = new_supercell.kinds[new_kind_names.index(site.kind_name)]
if index == target_site:
absorbing_kind = Kind(name=abs_atom_marker, symbols=kind_at_position.symbol)
absorbing_site = Site(kind_name=absorbing_kind.name, position=site.position)
marked_structure.append_kind(absorbing_kind)
marked_structure.append_site(absorbing_site)
else:
if kind_at_position.name not in [kind.name for kind in marked_structure.kinds]:
marked_structure.append_kind(kind_at_position)
new_site = Site(kind_name=site.kind_name, position=site.position)
marked_structure.append_site(new_site)
if is_hubbard_structure:
marked_hubbard_structure = HubbardStructureData.from_structure(marked_structure)
marked_hubbard_structure.hubbard = supercell_hubbard_params
result[f'site_{target_site}_{value["symbol"]}'] = marked_hubbard_structure
else:
result[f'site_{target_site}_{value["symbol"]}'] = marked_structure
output_params['is_molecule_input'] = is_molecule_input
result['output_parameters'] = orm.Dict(dict=output_params)
return result