Source code for aiida_quantumespresso.parsers.parse_xml.parse

# -*- coding: utf-8 -*-
from urllib.error import URLError

import numpy as np
from packaging.version import Version
from qe_tools import CONSTANTS
from xmlschema import XMLSchema

from aiida_quantumespresso.utils.mapping import get_logging_container

from .exceptions import XMLParseError
from .versions import get_default_schema_filepath, get_schema_filepath


[docs]def raise_parsing_error(message):
    raise XMLParseError(message)


[docs]def parser_assert(condition, message, log_func=raise_parsing_error):
    if not condition:
        log_func(message)


[docs]def parser_assert_equal(val1, val2, message, log_func=raise_parsing_error):
    if not (val1 == val2):
        msg = f'Violated assertion: {val1} == {val2}'
        if message:
            msg += ' - '
            msg += message
        log_func(msg)


[docs]def cell_volume(a1, a2, a3):
    r"""Returns the volume of the primitive cell: :math:`|\vec a_1\cdot(\vec a_2\cross \vec a_3)|`"""
    a_mid_0 = a2[1] * a3[2] - a2[2] * a3[1]
    a_mid_1 = a2[2] * a3[0] - a2[0] * a3[2]
    a_mid_2 = a2[0] * a3[1] - a2[1] * a3[0]

    return abs(float(a1[0] * a_mid_0 + a1[1] * a_mid_1 + a1[2] * a_mid_2))


[docs]def parse_xml_post_6_2(xml):
    """Parse the content of XML output file written by `pw.x` and `cp.x` with the new schema-based XML format.

    :param xml: parsed XML
    :returns: tuple of two dictionaries, with the parsed data and log messages, respectively
    """
    e_bohr2_to_coulomb_m2 = 57.214766  # e/a0^2 to C/m^2 (electric polarization) from Wolfram Alpha

    logs = get_logging_container()

    schema_filepath = get_schema_filepath(xml)

    try:
        xsd = XMLSchema(schema_filepath)
    except URLError:

        # If loading the XSD file specified in the XML file fails, we try the default
        schema_filepath_default = get_default_schema_filepath()

        try:
            xsd = XMLSchema(schema_filepath_default)
        except URLError:
            raise XMLParseError(
                f'Could not open or parse the XSD files {schema_filepath} and {schema_filepath_default}'
            )
        else:
            schema_filepath = schema_filepath_default

    # Validate XML document against the schema
    # Returned dictionary has a structure where, if tag ['key'] is "simple", xml_dictionary['key'] returns its content.
    # Otherwise, the following keys are available:
    #
    #  xml_dictionary['key']['$'] returns its content
    #  xml_dictionary['key']['@attr'] returns its attribute 'attr'
    #  xml_dictionary['key']['nested_key'] goes one level deeper.

    # Fix a bug of QE 6.8: the output XML is not consistent with schema, see
    # https://github.com/aiidateam/aiida-quantumespresso/pull/717
    xml_creator = xml.find('./general_info/creator')
    if xml_creator is not None and 'VERSION' in xml_creator.attrib:
        creator_version = xml_creator.attrib['VERSION']
        if creator_version == '6.8':
            root = xml.getroot()
            timing_info = root.find('./timing_info')
            partial_pwscf = timing_info.find("partial[@label='PWSCF'][@calls='0']")
            try:
                timing_info.remove(partial_pwscf)
            except (TypeError, ValueError):
                pass

    xml_dictionary, errors = xsd.to_dict(xml, validation='lax')
    if errors:
        logs.error.append(f'{len(errors)} XML schema validation error(s) schema: {schema_filepath}:')
        for err in errors:
            logs.error.append(str(err))

    xml_version = Version(xml_dictionary['general_info']['xml_format']['@VERSION'])
    inputs = xml_dictionary.get('input', {})
    outputs = xml_dictionary['output']

    lattice_vectors = [
        [x * CONSTANTS.bohr_to_ang for x in outputs['atomic_structure']['cell']['a1']],
        [x * CONSTANTS.bohr_to_ang for x in outputs['atomic_structure']['cell']['a2']],
        [x * CONSTANTS.bohr_to_ang for x in outputs['atomic_structure']['cell']['a3']],
    ]

    has_electric_field = inputs.get('electric_field', {}).get('electric_potential', None) == 'sawtooth_potential'
    has_dipole_correction = inputs.get('electric_field', {}).get('dipole_correction', False)

    if 'occupations' in inputs.get('bands', {}):
        try:
            occupations = inputs['bands']['occupations']['$']  # yapf: disable
        except TypeError:  # "string indices must be integers" -- might have attribute 'nspin'
            occupations = inputs['bands']['occupations']
    else:
        occupations = None

    starting_magnetization = []
    magnetization_angle1 = []
    magnetization_angle2 = []

    for specie in outputs['atomic_species']['species']:
        starting_magnetization.append(specie.get('starting_magnetization', 0.0))
        magnetization_angle1.append(specie.get('magnetization_angle1', 0.0))
        magnetization_angle2.append(specie.get('magnetization_angle2', 0.0))

    constraint_mag = 0
    spin_constraints = inputs.get('spin_constraints', {}).get('spin_constraints', None)
    if spin_constraints == 'atomic':
        constraint_mag = 1
    elif spin_constraints == 'atomic direction':
        constraint_mag = 2
    elif spin_constraints == 'total':
        constraint_mag = 3
    elif spin_constraints == 'total direction':
        constraint_mag = 6

    lsda = inputs.get('spin', {}).get('lsda', False)
    spin_orbit_calculation = inputs.get('spin', {}).get('spinorbit', False)
    non_colinear_calculation = outputs.get('magnetization', {}).get('noncolin', False)
    do_magnetization = outputs.get('magnetization', {}).get('do_magnetization', False)

    # Time reversal symmetry of the system
    if non_colinear_calculation and do_magnetization:
        time_reversal = False
    else:
        time_reversal = True

    # If no specific tags are present, the default is 1
    if non_colinear_calculation or spin_orbit_calculation:
        nspin = 4
    elif lsda:
        nspin = 2
    else:
        nspin = 1

    symmetries = []
    lattice_symmetries = []  # note: will only contain lattice symmetries that are NOT crystal symmetries
    inversion_symmetry = False

    # See also PW/src/setup.f90
    nsym = outputs.get('symmetries', {}).get('nsym', None)  # crystal symmetries
    nrot = outputs.get('symmetries', {}).get('nrot', None)  # lattice symmetries

    for symmetry in outputs.get('symmetries', {}).get('symmetry', []):

        # There are two types of symmetries, lattice and crystal. The pure inversion (-I) is always a lattice symmetry,
        # so we don't care. But if the pure inversion is also a crystal symmetry, then then the system as a whole
        # has (by definition) inversion symmetry; so we set the global property inversion_symmetry = True.
        symmetry_type = symmetry['info']['$']
        symmetry_name = symmetry['info']['@name']
        if symmetry_type == 'crystal_symmetry' and symmetry_name.lower() == 'inversion':
            inversion_symmetry = True

        sym = {
            'rotation': [
                symmetry['rotation']['$'][0:3],
                symmetry['rotation']['$'][3:6],
                symmetry['rotation']['$'][6:9],
            ],
            'name': symmetry_name,
        }

        try:
            sym['t_rev'] = '1' if symmetry['info']['@time_reversal'] else '0'
        except KeyError:
            sym['t_rev'] = '0'

        try:
            sym['equivalent_atoms'] = symmetry['equivalent_atoms']['$']
        except KeyError:
            pass

        try:
            sym['fractional_translation'] = symmetry['fractional_translation']
        except KeyError:
            pass

        if symmetry_type == 'crystal_symmetry':
            symmetries.append(sym)
        elif symmetry_type == 'lattice_symmetry':
            lattice_symmetries.append(sym)
        else:
            raise XMLParseError(f'Unexpected type of symmetry: {symmetry_type}')

    if (nsym != len(symmetries)) or (nrot != len(symmetries) + len(lattice_symmetries)):
        logs.warning.append(
            'Inconsistent number of symmetries: nsym={}, nrot={}, len(symmetries)={}, len(lattice_symmetries)={}'.
            format(nsym, nrot, len(symmetries), len(lattice_symmetries))
        )

    xml_data = {
        #'pp_check_flag': True, # Currently not printed in the new format.
        # Signals whether the XML file is complete
        # and can be used for post-processing. Everything should be in the XML now, but in
        # any case, the new XML schema should mostly protect from incomplete files.
        'lkpoint_dir': False,  # Currently not printed in the new format.
        # Signals whether kpt-data are written in sub-directories.
        # Was generally true in the old format, but now all the eigenvalues are
        # in the XML file, under output / band_structure, so this is False.
        'charge_density': './charge-density.dat',  # A file name. Not printed in the new format.
        # The filename and path are considered fixed: <outdir>/<prefix>.save/charge-density.dat
        # TODO: change to .hdf5 if output format is HDF5 (issue #222)
        'rho_cutoff_units': 'eV',
        'wfc_cutoff_units': 'eV',
        'fermi_energy_units': 'eV',
        'k_points_units': '1 / angstrom',
        'symmetries_units': 'crystal',
        'constraint_mag': constraint_mag,
        'magnetization_angle2': magnetization_angle2,
        'magnetization_angle1': magnetization_angle1,
        'starting_magnetization': starting_magnetization,
        'has_electric_field': has_electric_field,
        'has_dipole_correction': has_dipole_correction,
        'lda_plus_u_calculation': 'dftU' in outputs,
        'format_name': xml_dictionary['general_info']['xml_format']['@NAME'],
        'format_version': xml_dictionary['general_info']['xml_format']['@VERSION'],
        # TODO: check that format version: a) matches the XSD schema version; b) is updated as well
        #       See line 43 in Modules/qexsd.f90
        'creator_name': xml_dictionary['general_info']['creator']['@NAME'].lower(),
        'creator_version': xml_dictionary['general_info']['creator']['@VERSION'],
        'non_colinear_calculation': non_colinear_calculation,
        'do_magnetization': do_magnetization,
        'time_reversal_flag': time_reversal,
        'symmetries': symmetries,
        'lattice_symmetries': lattice_symmetries,
        'do_not_use_time_reversal': inputs.get('symmetry_flags', {}).get('noinv', None),
        'spin_orbit_domag': do_magnetization,
        'fft_grid': [value for _, value in sorted(outputs['basis_set']['fft_grid'].items())],
        'lsda': lsda,
        'number_of_spin_components': nspin,
        'no_time_rev_operations': inputs.get('symmetry_flags', {}).get('no_t_rev', None),
        'inversion_symmetry':
        inversion_symmetry,  # the old tag was INVERSION_SYMMETRY and was set to (from the code): "invsym    if true the system has inversion symmetry"
        'number_of_bravais_symmetries': nrot,  # lattice symmetries
        'number_of_symmetries': nsym,  # crystal symmetries
        'wfc_cutoff': inputs.get('basis', {}).get('ecutwfc', -1.0) * CONSTANTS.hartree_to_ev,
        'rho_cutoff': outputs['basis_set']['ecutrho'] * CONSTANTS.hartree_to_ev,  # not always printed in input->basis
        'smooth_fft_grid': [value for _, value in sorted(outputs['basis_set']['fft_smooth'].items())],
        'dft_exchange_correlation': inputs.get('dft', {}).get('functional',
                                                              None),  # TODO: also parse optional elements of 'dft' tag
        # WARNING: this is different between old XML and new XML
        'spin_orbit_calculation': spin_orbit_calculation,
        'q_real_space': outputs['algorithmic_info']['real_space_q'],
    }

    # alat is technically an optional attribute according to the schema,
    # but I don't know what to do if it's missing. atomic_structure is mandatory.
    output_alat_bohr = outputs['atomic_structure']['@alat']
    output_alat_angstrom = output_alat_bohr * CONSTANTS.bohr_to_ang

    # Band structure
    if 'band_structure' in outputs:
        band_structure = outputs['band_structure']

        smearing_xml = None

        if 'smearing' in outputs['band_structure']:
            smearing_xml = outputs['band_structure']['smearing']
        elif 'smearing' in inputs:
            smearing_xml = inputs['smearing']

        if smearing_xml:
            degauss = smearing_xml['@degauss']

            # Versions below 19.03.04 (Quantum ESPRESSO<=6.4.1) incorrectly print degauss in Ry instead of Hartree
            if xml_version < Version('19.03.04'):
                degauss *= CONSTANTS.ry_to_ev
            else:
                degauss *= CONSTANTS.hartree_to_ev

            xml_data['degauss'] = degauss
            xml_data['smearing_type'] = smearing_xml['$']

        num_k_points = band_structure['nks']
        num_electrons = band_structure['nelec']
        num_atomic_wfc = band_structure['num_of_atomic_wfc']
        num_bands = band_structure.get('nbnd', None)
        num_bands_up = band_structure.get('nbnd_up', None)
        num_bands_down = band_structure.get('nbnd_dw', None)

        if num_bands is None and num_bands_up is None and num_bands_down is None:
            raise XMLParseError('None of `nbnd`, `nbnd_up` or `nbdn_dw` could be parsed.')

        # If both channels are `None` we are dealing with a non spin-polarized or non-collinear calculation
        elif num_bands_up is None and num_bands_down is None:
            spins = False

        # If only one of the channels is `None` we raise, because that is an inconsistent result
        elif num_bands_up is None or num_bands_down is None:
            raise XMLParseError('Only one of `nbnd_up` and `nbnd_dw` could be parsed')

        # Here it is a spin-polarized calculation, where for pw.x the number of bands in each channel should be identical.
        else:
            spins = True
            if num_bands_up != num_bands_down:
                raise XMLParseError(f'different number of bands for spin channels: {num_bands_up} and {num_bands_down}')

            if num_bands is not None and num_bands != num_bands_up + num_bands_down:
                raise XMLParseError(
                    'Inconsistent number of bands: nbnd={}, nbnd_up={}, nbnd_down={}'.format(
                        num_bands, num_bands_up, num_bands_down
                    )
                )

            if num_bands is None:
                num_bands = num_bands_up + num_bands_down  # backwards compatibility;

        k_points = []
        k_points_weights = []
        ks_states = band_structure['ks_energies']
        for ks_state in ks_states:
            k_points.append([kp * 2 * np.pi / output_alat_angstrom for kp in ks_state['k_point']['$']])
            k_points_weights.append(ks_state['k_point']['@weight'])

        if not spins:
            band_eigenvalues = [[]]
            band_occupations = [[]]
            for ks_state in ks_states:
                band_eigenvalues[0].append(ks_state['eigenvalues']['$'])
                band_occupations[0].append(ks_state['occupations']['$'])
        else:
            band_eigenvalues = [[], []]
            band_occupations = [[], []]
            for ks_state in ks_states:
                band_eigenvalues[0].append(ks_state['eigenvalues']['$'][0:num_bands_up])
                band_eigenvalues[1].append(ks_state['eigenvalues']['$'][num_bands_up:num_bands])
                band_occupations[0].append(ks_state['occupations']['$'][0:num_bands_up])
                band_occupations[1].append(ks_state['occupations']['$'][num_bands_up:num_bands])

        band_eigenvalues = np.array(band_eigenvalues) * CONSTANTS.hartree_to_ev
        band_occupations = np.array(band_occupations)

        if not spins:
            parser_assert_equal(
                band_eigenvalues.shape, (1, num_k_points, num_bands), 'Unexpected shape of band_eigenvalues'
            )
            parser_assert_equal(
                band_occupations.shape, (1, num_k_points, num_bands), 'Unexpected shape of band_occupations'
            )
        else:
            parser_assert_equal(
                band_eigenvalues.shape, (2, num_k_points, num_bands_up), 'Unexpected shape of band_eigenvalues'
            )
            parser_assert_equal(
                band_occupations.shape, (2, num_k_points, num_bands_up), 'Unexpected shape of band_occupations'
            )

        if not spins:
            xml_data['number_of_bands'] = num_bands
        else:
            # For collinear spin-polarized calculations `spins=True` and `num_bands` is sum of both channels. To get the
            # actual number of bands, we divide by two using integer division
            xml_data['number_of_bands'] = num_bands // 2

        for key, value in [('number_of_bands_up', num_bands_up), ('number_of_bands_down', num_bands_down)]:
            if value is not None:
                xml_data[key] = value

        if 'fermi_energy' in band_structure:
            xml_data['fermi_energy'] = band_structure['fermi_energy'] * CONSTANTS.hartree_to_ev

        if 'two_fermi_energies' in band_structure:
            xml_data['fermi_energy_up'], xml_data['fermi_energy_down'] = [
                energy * CONSTANTS.hartree_to_ev for energy in band_structure['two_fermi_energies']
            ]

        bands_dict = {
            'occupations': band_occupations,
            'bands': band_eigenvalues,
            'bands_units': 'eV',
        }

        xml_data['number_of_atomic_wfc'] = num_atomic_wfc
        xml_data['number_of_k_points'] = num_k_points
        xml_data['number_of_electrons'] = num_electrons
        xml_data['k_points'] = k_points
        xml_data['k_points_weights'] = k_points_weights
        xml_data['bands'] = bands_dict

    try:
        monkhorst_pack = inputs['k_points_IBZ']['monkhorst_pack']
    except KeyError:
        pass  # not using Monkhorst pack
    else:
        xml_data['monkhorst_pack_grid'] = [monkhorst_pack[attr] for attr in ['@nk1', '@nk2', '@nk3']]
        xml_data['monkhorst_pack_offset'] = [monkhorst_pack[attr] for attr in ['@k1', '@k2', '@k3']]

    if occupations is not None:
        xml_data['occupations'] = occupations

    if 'boundary_conditions' in outputs and 'assume_isolated' in outputs['boundary_conditions']:
        xml_data['assume_isolated'] = outputs['boundary_conditions']['assume_isolated']

    # This is not printed by QE 6.3, but will be re-added before the next version
    if 'real_space_beta' in outputs['algorithmic_info']:
        xml_data['beta_real_space'] = outputs['algorithmic_info']['real_space_beta']

    conv_info = {}
    conv_info_scf = {}
    conv_info_opt = {}
    # NOTE: n_scf_steps refers to the number of SCF steps in the *last* loop only.
    # To get the total number of SCF steps in the run you should sum up the individual steps.
    # TODO: should we parse 'steps' too? Are they already added in the output trajectory?
    for key in ['convergence_achieved', 'n_scf_steps', 'scf_error']:
        try:
            conv_info_scf[key] = outputs['convergence_info']['scf_conv'][key]
        except KeyError:
            pass
    for key in ['convergence_achieved', 'n_opt_steps', 'grad_norm']:
        try:
            conv_info_opt[key] = outputs['convergence_info']['opt_conv'][key]
        except KeyError:
            pass
    if conv_info_scf:
        conv_info['scf_conv'] = conv_info_scf
    if conv_info_opt:
        conv_info['opt_conv'] = conv_info_opt
    if conv_info:
        xml_data['convergence_info'] = conv_info

    if 'status' in xml_dictionary:
        xml_data['exit_status'] = xml_dictionary['status']
        # 0 = convergence reached;
        # -1 = SCF convergence failed;
        # 3 = ionic convergence failed
        # These might be changed in the future. Also see PW/src/run_pwscf.f90

    try:
        berry_phase = outputs['electric_field']['BerryPhase']
    except KeyError:
        pass
    else:
        # This is what I would like to do, but it's not retro-compatible
        # xml_data['berry_phase'] = {}
        # xml_data['berry_phase']['total_phase']         = berry_phase['totalPhase']['$']
        # xml_data['berry_phase']['total_phase_modulus'] = berry_phase['totalPhase']['@modulus']
        # xml_data['berry_phase']['total_ionic_phase']      = berry_phase['totalPhase']['@ionic']
        # xml_data['berry_phase']['total_electronic_phase'] = berry_phase['totalPhase']['@electronic']
        # xml_data['berry_phase']['total_polarization']           = berry_phase['totalPolarization']['polarization']['$']
        # xml_data['berry_phase']['total_polarization_modulus']   = berry_phase['totalPolarization']['modulus']
        # xml_data['berry_phase']['total_polarization_units']     = berry_phase['totalPolarization']['polarization']['@Units']
        # xml_data['berry_phase']['total_polarization_direction'] = berry_phase['totalPolarization']['direction']
        # parser_assert_equal(xml_data['berry_phase']['total_phase_modulus'].lower(), '(mod 2)',
        #                    "Unexpected modulus for total phase")
        # parser_assert_equal(xml_data['berry_phase']['total_polarization_units'].lower(), 'e/bohr^2',
        #                    "Unsupported units for total polarization")
        # Retro-compatible keys:
        polarization = berry_phase['totalPolarization']['polarization']['$']
        polarization_units = berry_phase['totalPolarization']['polarization']['@Units']
        polarization_modulus = berry_phase['totalPolarization']['modulus']
        parser_assert(
            polarization_units in ['e/bohr^2', 'C/m^2'],
            f"Unsupported units '{polarization_units}' of total polarization"
        )
        if polarization_units == 'e/bohr^2':
            polarization *= e_bohr2_to_coulomb_m2
            polarization_modulus *= e_bohr2_to_coulomb_m2

        xml_data['total_phase'] = berry_phase['totalPhase']['$']
        xml_data['total_phase_units'] = '2pi'
        xml_data['ionic_phase'] = berry_phase['totalPhase']['@ionic']
        xml_data['ionic_phase_units'] = '2pi'
        xml_data['electronic_phase'] = berry_phase['totalPhase']['@electronic']
        xml_data['electronic_phase_units'] = '2pi'
        xml_data['polarization'] = polarization
        xml_data['polarization_module'] = polarization_modulus  # should be called "modulus"
        xml_data['polarization_units'] = 'C / m^2'
        xml_data['polarization_direction'] = berry_phase['totalPolarization']['direction']
        # TODO: add conversion for (e/Omega).bohr (requires to know Omega, the volume of the cell)
        # TODO (maybe): Not parsed:
        # - individual ionic phases
        # - individual electronic phases and weights

    # TODO: We should put the `non_periodic_cell_correction` string in (?)
    atoms = [[atom['@name'], [coord * CONSTANTS.bohr_to_ang
                              for coord in atom['$']]]
             for atom in outputs['atomic_structure']['atomic_positions']['atom']]
    species = outputs['atomic_species']['species']
    structure_data = {
        'atomic_positions_units':
        'Angstrom',
        'direct_lattice_vectors_units':
        'Angstrom',
        # ??? 'atoms_if_pos_list': [[1, 1, 1], [1, 1, 1]],
        'number_of_atoms':
        outputs['atomic_structure']['@nat'],
        'lattice_parameter':
        output_alat_angstrom,
        'reciprocal_lattice_vectors': [
            outputs['basis_set']['reciprocal_lattice']['b1'], outputs['basis_set']['reciprocal_lattice']['b2'],
            outputs['basis_set']['reciprocal_lattice']['b3']
        ],
        'atoms':
        atoms,
        'cell': {
            'lattice_vectors': lattice_vectors,
            'volume': cell_volume(*lattice_vectors),
            'atoms': atoms,
        },
        'lattice_parameter_xml':
        output_alat_bohr,
        'number_of_species':
        outputs['atomic_species']['@ntyp'],
        'species': {
            'index': [i + 1 for i, specie in enumerate(species)],
            'pseudo': [specie['pseudo_file'] for specie in species],
            'mass': [specie['mass'] for specie in species],
            'type': [specie['@name'] for specie in species]
        },
    }

    xml_data['structure'] = structure_data

    return xml_data, logs