Source code for aiida_quantumespresso.parsers.parse_raw.pw

# -*- coding: utf-8 -*-
"""A collection of function that are used to parse the output of Quantum Espresso PW.

The function that needs to be called from outside is parse_raw_output(). The functions mostly work without aiida
specific functionalities. The parsing will try to convert whatever it can in some dictionary, which by operative
decision doesn't have much structure encoded, [the values are simple ]
"""
import re

import numpy
from qe_tools import CONSTANTS

from aiida_quantumespresso.parsers import QEOutputParsingError
from aiida_quantumespresso.parsers.parse_raw import convert_qe_time_to_sec
from aiida_quantumespresso.utils.mapping import get_logging_container

[docs]lattice_tolerance = 1.e-5
[docs]units_suffix = '_units'
[docs]default_charge_units = 'e'
[docs]default_dipole_units = 'Debye'
[docs]default_energy_units = 'eV'
[docs]default_force_units = 'ev / angstrom'
[docs]default_k_points_units = '1 / angstrom'
[docs]default_length_units = 'Angstrom'
[docs]default_magnetization_units = 'Bohrmag / cell'
[docs]default_polarization_units = 'C / m^2'
[docs]default_stress_units = 'GPascal'
[docs]def reduce_symmetries(parsed_parameters, parsed_structure, logger): """Reduce the symmetry information parsed from the output to save space. In the standard output, each symmetry operation print two rotation matrices: * S_cryst^T: matrix in crystal coordinates, transposed * S_cart: matrix in cartesian coordinates, The XML files only print one matrix: * S_cryst: matrix in crystal coordinates The raw parsed symmetry information from the XML is large and will load the database heavily if stored as is for each calculation. Instead, we will map these dictionaries onto a static dictionary of rotation matrices generated by the _get_qe_symmetry_list static method. This dictionary will return the rotation matrices in cartesian coordinates, i.e. S_cart. In order to compare the raw matrices from the XML to these static matrices we have to convert S_cryst into S_cart. We derive here how that is done: S_cryst * v_cryst = v_cryst' where v_cryst' is the rotated vector v_cryst under S_cryst We define `cell` where cell vectors are rows. Converting a vector from crystal to cartesian coordinates is defined as: cell^T * v_cryst = v_cart The inverse of this operation is defined as v_cryst = cell^Tinv * v_cart Replacing the last equation into the first we find: S_cryst * cell^Tinv * v_cart = cell^Tinv * v_cart' Multiply on the left with cell^T gives: cell^T * S_cryst * cell^Tinv * v_cart = v_cart' which can be rewritten as: S_cart * v_cart = v_cart' where: S_cart = cell^T * S_cryst * cell^Tinv We compute here the transpose and its inverse of the structure cell basis, which is needed to transform the parsed rotation matrices, which are in crystal coordinates, to cartesian coordinates, which are the matrices that are returned by the _get_qe_symmetry_list staticmethod """ from aiida_quantumespresso.parsers.parse_raw.pw import get_symmetry_mapping from aiida_quantumespresso.utils.linalg import are_matrices_equal cell = parsed_structure['cell']['lattice_vectors'] cell_T = numpy.transpose(cell) cell_Tinv = numpy.linalg.inv(cell_T) possible_symmetries = get_symmetry_mapping() for symmetry_type in ['symmetries', 'lattice_symmetries']: # crystal vs. lattice symmetries if symmetry_type in list(parsed_parameters.keys()): try: old_symmetries = parsed_parameters[symmetry_type] new_symmetries = [] for this_sym in old_symmetries: name = this_sym['name'].strip() for i, this in enumerate(possible_symmetries): # Since we do an exact comparison we strip the string name from whitespace # and as soon as it is matched, we break to prevent it from matching another if name == this['name'].strip(): index = i break else: index = None logger.error(f'Symmetry {name} not found') new_dict = {} if index is not None: # The raw parsed rotation matrix is in crystal coordinates, whereas the mapped rotation # in possible_symmetries is in cartesian coordinates. To allow them to be compared # to make sure we matched the correct rotation symmetry, we first convert the parsed matrix # to cartesian coordinates. For explanation of the method, see comment above. rotation_cryst = this_sym['rotation'] rotation_cart_new = possible_symmetries[index]['matrix'] rotation_cart_old = numpy.dot(cell_T, numpy.dot(rotation_cryst, cell_Tinv)) inversion = possible_symmetries[index]['inversion'] if not are_matrices_equal(rotation_cart_old, rotation_cart_new, swap_sign_matrix_b=inversion): logger.error( 'Mapped rotation matrix {} does not match the original rotation {}'.format( rotation_cart_new, rotation_cart_old ) ) new_dict['all_symmetries'] = this_sym else: # Note: here I lose the information about equivalent ions and fractional_translation # since I don't copy them to new_dict (but they can be reconstructed). new_dict['t_rev'] = this_sym['t_rev'] new_dict['symmetry_number'] = index else: new_dict['all_symmetries'] = this_sym new_symmetries.append(new_dict) parsed_parameters[symmetry_type] = new_symmetries # and overwrite the old one except KeyError: logger.warning(f"key '{symmetry_type}' is not present in raw output dictionary") else: # backwards-compatiblity: 'lattice_symmetries' is not created in older versions of the parser if symmetry_type != 'lattice_symmetries': logger.warning(f"key '{symmetry_type}' is not present in raw output dictionary")
[docs]def get_symmetry_mapping(): """Hard coded names and rotation matrices + inversion from QE v 5.0.2 function for Parser class usage only. :return: a list of dictionaries, each containing name (string), inversion (boolean) and matrix (list of lists) """ sin3 = 0.866025403784438597 cos3 = 0.5 msin3 = -0.866025403784438597 mcos3 = -0.5 # 32 rotations that are checked + inversion taken from symm_base.f90 from the QE source code # They are in Fortran format and therefore transposed with respect to the default python format transposed_matrices_cartesian = [ [[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]], [[-1., 0., 0.], [0., -1., 0.], [0., 0., 1.]], [[-1., 0., 0.], [0., 1., 0.], [0., 0., -1.]], [[1., 0., 0.], [0., -1., 0.], [0., 0., -1.]], [[0., 1., 0.], [1., 0., 0.], [0., 0., -1.]], [[0., -1., 0.], [-1., 0., 0.], [0., 0., -1.]], [[0., -1., 0.], [1., 0., 0.], [0., 0., 1.]], [[0., 1., 0.], [-1., 0., 0.], [0., 0., 1.]], [[0., 0., 1.], [0., -1., 0.], [1., 0., 0.]], [[0., 0., -1.], [0., -1., 0.], [-1., 0., 0.]], [[0., 0., -1.], [0., 1., 0.], [1., 0., 0.]], [[0., 0., 1.], [0., 1., 0.], [-1., 0., 0.]], [[-1., 0., 0.], [0., 0., 1.], [0., 1., 0.]], [[-1., 0., 0.], [0., 0., -1.], [0., -1., 0.]], [[1., 0., 0.], [0., 0., -1.], [0., 1., 0.]], [[1., 0., 0.], [0., 0., 1.], [0., -1., 0.]], [[0., 0., 1.], [1., 0., 0.], [0., 1., 0.]], [[0., 0., -1.], [-1., 0., 0.], [0., 1., 0.]], [[0., 0., -1.], [1., 0., 0.], [0., -1., 0.]], [[0., 0., 1.], [-1., 0., 0.], [0., -1., 0.]], [[0., 1., 0.], [0., 0., 1.], [1., 0., 0.]], [[0., -1., 0.], [0., 0., -1.], [1., 0., 0.]], [[0., -1., 0.], [0., 0., 1.], [-1., 0., 0.]], [[0., 1., 0.], [0., 0., -1.], [-1., 0., 0.]], [[cos3, sin3, 0.], [msin3, cos3, 0.], [0., 0., 1.]], [[cos3, msin3, 0.], [sin3, cos3, 0.], [0., 0., 1.]], [[mcos3, sin3, 0.], [msin3, mcos3, 0.], [0., 0., 1.]], [[mcos3, msin3, 0.], [sin3, mcos3, 0.], [0., 0., 1.]], [[cos3, msin3, 0.], [msin3, mcos3, 0.], [0., 0., -1.]], [[cos3, sin3, 0.], [sin3, mcos3, 0.], [0., 0., -1.]], [[mcos3, msin3, 0.], [msin3, cos3, 0.], [0., 0., -1.]], [[mcos3, sin3, 0.], [sin3, cos3, 0.], [0., 0., -1.]], ] # Names for the 32 matrices, with and without inversion matrices_name = [ 'identity ', '180 deg rotation - cart. axis [0,0,1] ', '180 deg rotation - cart. axis [0,1,0] ', '180 deg rotation - cart. axis [1,0,0] ', '180 deg rotation - cart. axis [1,1,0] ', '180 deg rotation - cart. axis [1,-1,0] ', ' 90 deg rotation - cart. axis [0,0,-1] ', ' 90 deg rotation - cart. axis [0,0,1] ', '180 deg rotation - cart. axis [1,0,1] ', '180 deg rotation - cart. axis [-1,0,1] ', ' 90 deg rotation - cart. axis [0,1,0] ', ' 90 deg rotation - cart. axis [0,-1,0] ', '180 deg rotation - cart. axis [0,1,1] ', '180 deg rotation - cart. axis [0,1,-1] ', ' 90 deg rotation - cart. axis [-1,0,0] ', ' 90 deg rotation - cart. axis [1,0,0] ', '120 deg rotation - cart. axis [-1,-1,-1] ', '120 deg rotation - cart. axis [-1,1,1] ', '120 deg rotation - cart. axis [1,1,-1] ', '120 deg rotation - cart. axis [1,-1,1] ', '120 deg rotation - cart. axis [1,1,1] ', '120 deg rotation - cart. axis [-1,1,-1] ', '120 deg rotation - cart. axis [1,-1,-1] ', '120 deg rotation - cart. axis [-1,-1,1] ', ' 60 deg rotation - cryst. axis [0,0,1] ', ' 60 deg rotation - cryst. axis [0,0,-1] ', '120 deg rotation - cryst. axis [0,0,1] ', '120 deg rotation - cryst. axis [0,0,-1] ', '180 deg rotation - cryst. axis [1,-1,0] ', '180 deg rotation - cryst. axis [2,1,0] ', '180 deg rotation - cryst. axis [0,1,0] ', '180 deg rotation - cryst. axis [1,1,0] ', 'inversion ', 'inv. 180 deg rotation - cart. axis [0,0,1] ', 'inv. 180 deg rotation - cart. axis [0,1,0] ', 'inv. 180 deg rotation - cart. axis [1,0,0] ', 'inv. 180 deg rotation - cart. axis [1,1,0] ', 'inv. 180 deg rotation - cart. axis [1,-1,0] ', 'inv. 90 deg rotation - cart. axis [0,0,-1] ', 'inv. 90 deg rotation - cart. axis [0,0,1] ', 'inv. 180 deg rotation - cart. axis [1,0,1] ', 'inv. 180 deg rotation - cart. axis [-1,0,1] ', 'inv. 90 deg rotation - cart. axis [0,1,0] ', 'inv. 90 deg rotation - cart. axis [0,-1,0] ', 'inv. 180 deg rotation - cart. axis [0,1,1] ', 'inv. 180 deg rotation - cart. axis [0,1,-1] ', 'inv. 90 deg rotation - cart. axis [-1,0,0] ', 'inv. 90 deg rotation - cart. axis [1,0,0] ', 'inv. 120 deg rotation - cart. axis [-1,-1,-1]', 'inv. 120 deg rotation - cart. axis [-1,1,1] ', 'inv. 120 deg rotation - cart. axis [1,1,-1] ', 'inv. 120 deg rotation - cart. axis [1,-1,1] ', 'inv. 120 deg rotation - cart. axis [1,1,1] ', 'inv. 120 deg rotation - cart. axis [-1,1,-1] ', 'inv. 120 deg rotation - cart. axis [1,-1,-1] ', 'inv. 120 deg rotation - cart. axis [-1,-1,1] ', 'inv. 60 deg rotation - cryst. axis [0,0,1] ', 'inv. 60 deg rotation - cryst. axis [0,0,-1] ', 'inv. 120 deg rotation - cryst. axis [0,0,1] ', 'inv. 120 deg rotation - cryst. axis [0,0,-1] ', 'inv. 180 deg rotation - cryst. axis [1,-1,0] ', 'inv. 180 deg rotation - cryst. axis [2,1,0] ', 'inv. 180 deg rotation - cryst. axis [0,1,0] ', 'inv. 180 deg rotation - cryst. axis [1,1,0] ' ] rotations = [] for key, value in zip(matrices_name[:len(transposed_matrices_cartesian)], transposed_matrices_cartesian): rotations.append({'name': key, 'matrix': numpy.transpose(value), 'inversion': False}) for key, value in zip(matrices_name[len(transposed_matrices_cartesian):], transposed_matrices_cartesian): rotations.append({'name': key, 'matrix': numpy.transpose(value), 'inversion': True}) return rotations
[docs]REG_ERROR_NPOOLS_TOO_HIGH = re.compile(r'\s+some nodes have no k-points.*')
[docs]def detect_important_message(logs, line): message_map = { 'error': { 'Maximum CPU time exceeded': 'ERROR_OUT_OF_WALLTIME', 'convergence NOT achieved after': 'ERROR_ELECTRONIC_CONVERGENCE_NOT_REACHED', 'history already reset at previous step: stopping': 'ERROR_IONIC_CYCLE_BFGS_HISTORY_FAILURE', 'problems computing cholesky': 'ERROR_DIAGONALIZATION_CHOLESKY_DECOMPOSITION', 'charge is wrong': 'ERROR_CHARGE_IS_WRONG', 'not orthogonal operation': 'ERROR_SYMMETRY_NON_ORTHOGONAL_OPERATION', 'problems computing cholesky': 'ERROR_COMPUTING_CHOLESKY', 'dexx is negative': 'ERROR_DEXX_IS_NEGATIVE', 'too many bands are not converged': 'ERROR_DIAGONALIZATION_TOO_MANY_BANDS_NOT_CONVERGED', 'S matrix not positive definite': 'ERROR_S_MATRIX_NOT_POSITIVE_DEFINITE', 'zhegvd failed': 'ERROR_ZHEGVD_FAILED', '[Q, R] = qr(X, 0) failed': 'ERROR_QR_FAILED', 'probably because G_par is NOT a reciprocal lattice vector': 'ERROR_G_PAR', 'eigenvectors failed to converge': 'ERROR_EIGENVECTOR_CONVERGENCE', 'Error in routine broyden': 'ERROR_BROYDEN_FACTORIZATION', 'Not enough space allocated for radial FFT: try restarting with a larger cell_factor': 'ERROR_RADIAL_FFT_SIGNIFICANT_VOLUME_CONTRACTION', REG_ERROR_NPOOLS_TOO_HIGH: 'ERROR_NPOOLS_TOO_HIGH', }, 'warning': { 'Warning:': None, 'DEPRECATED:': None, } } # Match any known error and warning messages for marker, message in message_map['error'].items(): # Replace with isinstance(marker, re.Pattern) once Python 3.6 is dropped if hasattr(marker, 'search'): if marker.match(line): if message is None: message = line logs.error.append(message) else: if marker in line: if message is None: message = line logs.error.append(message) for marker, message in message_map['warning'].items(): if marker in line: if message is None: message = line logs.warning.append(message)
[docs]def parse_stdout(stdout, input_parameters, parser_options=None, parsed_xml=None, crash_file=None): """Parses the stdout content of a Quantum ESPRESSO `pw.x` calculation. :param stdout: the stdout content as a string :param input_parameters: dictionary with the input parameters :param crash_file: the content of the ``CRASH`` file as a string if it was written, ``None`` otherwise. :param parser_options: the parser options from the settings input parameter node :param parsed_xml: dictionary with data parsed from the XML output file :returns: tuple of two dictionaries, with the parsed data and log messages, respectively """ if parser_options is None: parser_options = {} if parsed_xml is None: parsed_xml = {} # Separate the input string into separate lines data_lines = stdout.split('\n') logs = get_logging_container() parsed_data = {} vdw_correction = False bands_data = parsed_xml.pop('bands', {}) structure_data = parsed_xml.pop('structure', {}) trajectory_data = {} maximum_ionic_steps = None marker_bfgs_converged = False # First check whether the `JOB DONE` message was written, otherwise the job was interrupted for line in data_lines: if 'JOB DONE' in line: break else: if crash_file is None: logs.error.append('ERROR_OUTPUT_STDOUT_INCOMPLETE') # Determine whether the input switched on an electric field lelfield = input_parameters.get('CONTROL', {}).get('lelfield', False) # Find some useful quantities. if not parsed_xml.get('number_of_bands', None): try: for line in stdout.split('\n'): if 'lattice parameter (alat)' in line: alat = float(line.split('=')[1].split('a.u')[0]) elif 'number of atoms/cell' in line: nat = int(line.split('=')[1]) elif 'number of atomic types' in line: ntyp = int(line.split('=')[1]) elif 'unit-cell volume' in line: if '(a.u.)^3' in line: volume = float(line.split('=')[1].split('(a.u.)^3')[0]) else: # occurs in v5.3.0 volume = float(line.split('=')[1].split('a.u.^3')[0]) elif 'number of Kohn-Sham states' in line: nbnd = int(line.split('=')[1]) elif 'number of k points' in line: nk = int(line.split('=')[1].split()[0]) if input_parameters.get('SYSTEM', {}).get('nspin', 1) > 1: # QE counts twice each k-point in spin-polarized calculations nk /= 2 elif 'Dense grid' in line: FFT_grid = [int(g) for g in line.split('(')[1].split(')')[0].split(',')] elif 'Smooth grid' in line: smooth_FFT_grid = [int(g) for g in line.split('(')[1].split(')')[0].split(',')] break alat *= CONSTANTS.bohr_to_ang volume *= CONSTANTS.bohr_to_ang**3 parsed_data['lattice_parameter_initial'] = alat parsed_data['number_of_bands'] = nbnd try: parsed_data['number_of_k_points'] = nk parsed_data['fft_grid'] = FFT_grid parsed_data['smooth_fft_grid'] = smooth_FFT_grid except NameError: # these are not crucial, so parsing does not fail if they are not found pass except NameError: # nat or other variables where not found, and thus not initialized # Try to get some error messages lines = stdout.split('\n') if crash_file is None else crash_file.split('\n') for line_number, line in enumerate(lines): # Compare the line to the known set of error and warning messages and add them to the log container detect_important_message(logs, line) if len(logs.error) or len(logs.warning) > 0: parsed_data['trajectory'] = trajectory_data return parsed_data, logs # did not find any error message -> raise an Error and do not return anything raise QEOutputParsingError('Parser cannot load basic info.') else: nat = structure_data['number_of_atoms'] ntyp = structure_data['number_of_species'] alat = structure_data['lattice_parameter_xml'] volume = structure_data['cell']['volume'] # NOTE: lattice_parameter_xml is the lattice parameter of the xml file # in the units used by the code. lattice_parameter instead in angstroms. # Save these two quantities in the parsed_data, because they will be # useful for queries (maybe), and structure_data will not be stored as a Dict parsed_data['number_of_atoms'] = nat parsed_data['number_of_species'] = ntyp parsed_data['volume'] = volume c_bands_error = False # now grep quantities that can be considered isolated informations. for count, line in enumerate(data_lines): # Compare the line to the known set of error and warning messages and add them to the log container detect_important_message(logs, line) # to be used for later if 'Non-local correlation energy' in line: vdw_correction = True elif 'Cartesian axes' in line: # this is the part when initial positions and chemical # symbols are printed (they do not change during a run) i = count + 1 while i < count + 10 and not ('site n.' in data_lines[i] and 'atom' in data_lines[i]): i += 1 if 'site n.' in data_lines[i] and 'atom' in data_lines[i]: trajectory_data['atomic_species_name'] = [data_lines[i + 1 + j].split()[1] for j in range(nat)] # parse the initialization time (take only first occurence) elif ('init_wall_time_seconds' not in parsed_data and 'total cpu time spent up to now is' in line): init_time = float(line.split('total cpu time spent up to now is')[1].split('secs')[0]) parsed_data['init_wall_time_seconds'] = init_time # parse dynamical RAM estimates elif 'Estimated max dynamical RAM per process' in line: value = line.split('>')[-1] match = re.match(r'\s+([+-]?\d+(\.\d*)?|\.\d+([eE][+-]?\d+)?)\s*(Mb|MB|GB)', value) if match: try: parsed_data['estimated_ram_per_process'] = float(match.group(1)) parsed_data[f'estimated_ram_per_process{units_suffix}'] = match.group(4) except (IndexError, ValueError): pass # parse dynamical RAM estimates elif 'Estimated total dynamical RAM' in line: value = line.split('>')[-1] match = re.match(r'\s+([+-]?\d+(\.\d*)?|\.\d+([eE][+-]?\d+)?)\s*(Mb|MB|GB)', value) if match: try: parsed_data['estimated_ram_total'] = float(match.group(1)) parsed_data[f'estimated_ram_total{units_suffix}'] = match.group(4) except (IndexError, ValueError): pass # parse the global file, for informations that are written only once elif 'PWSCF' in line and 'WALL' in line: try: time = line.split('CPU')[1].split('WALL')[0] parsed_data['wall_time'] = time except Exception: logs.warning.append('Error while parsing wall time.') try: parsed_data['wall_time_seconds'] = convert_qe_time_to_sec(time) except ValueError: raise QEOutputParsingError('Unable to convert wall_time in seconds.') # for later control on relaxation-dynamics convergence elif 'nstep' in line and '=' in line: maximum_ionic_steps = int(line.split()[2]) elif 'bfgs converged in' in line: marker_bfgs_converged = True elif 'number of bfgs steps' in line: try: parsed_data['number_ionic_steps'] += 1 except KeyError: parsed_data['number_ionic_steps'] = 1 elif 'A final scf calculation at the relaxed structure' in line: parsed_data['final_scf'] = True elif 'point group' in line: if 'k-point group' not in line: try: # Split line in components delimited by either space(s) or # parenthesis and filter out empty strings line_elems = [_f for _f in re.split(r' +|\(|\)', line) if _f] pg_international = line_elems[-1] pg_schoenflies = line_elems[-2] parsed_data['pointgroup_international'] = pg_international parsed_data['pointgroup_schoenflies'] = pg_schoenflies except Exception: warning = f'Problem parsing point group, I found: {line.strip()}' logs.warning.append(warning) # special parsing of c_bands error elif 'c_bands' in line and 'eigenvalues not converged' in line: c_bands_error = True elif 'iteration #' in line: if 'Calculation restarted' not in line and 'Calculation stopped' not in line: try: parsed_data['total_number_of_scf_iterations'] += 1 except KeyError: parsed_data['total_number_of_scf_iterations'] = 1 if c_bands_error: # if there is another iteration, c_bands is not necessarily a problem # I put a warning only if c_bands error appears in the last iteration c_bands_error = False if c_bands_error: logs.warning.append('c_bands: at least 1 eigenvalues not converged') # I split the output text in the atomic SCF calculations. # the initial part should be things already contained in the xml. # (cell, initial positions, kpoints, ...) and I skip them. # In case, parse for them before this point. # Put everything in a trajectory_data dictionary relax_steps = stdout.split('Self-consistent Calculation')[1:] relax_steps = [i.split('\n') for i in relax_steps] # now I create a bunch of arrays for every step. for data_step in relax_steps: trajectory_frame = {} for count, line in enumerate(data_step): if 'CELL_PARAMETERS' in line: try: a1 = [float(s) for s in data_step[count + 1].split()] a2 = [float(s) for s in data_step[count + 2].split()] a3 = [float(s) for s in data_step[count + 3].split()] # try except indexerror for not enough lines lattice = line.split('(')[1].split(')')[0].split('=') if lattice[0].lower() not in ['alat', 'bohr', 'angstrom']: raise QEOutputParsingError( 'Error while parsing cell_parameters: ' + f'unsupported units {lattice[0]}' ) if 'alat' in lattice[0].lower(): a1 = [alat * CONSTANTS.bohr_to_ang * float(s) for s in a1] a2 = [alat * CONSTANTS.bohr_to_ang * float(s) for s in a2] a3 = [alat * CONSTANTS.bohr_to_ang * float(s) for s in a3] lattice_parameter_b = float(lattice[1]) if abs(lattice_parameter_b - alat) > lattice_tolerance: raise QEOutputParsingError( 'Lattice parameters mismatch! ' + f'{lattice_parameter_b} vs {alat}' ) elif 'bohr' in lattice[0].lower(): lattice_parameter_b *= CONSTANTS.bohr_to_ang a1 = [CONSTANTS.bohr_to_ang * float(s) for s in a1] a2 = [CONSTANTS.bohr_to_ang * float(s) for s in a2] a3 = [CONSTANTS.bohr_to_ang * float(s) for s in a3] trajectory_data.setdefault('lattice_vectors_relax', []).append([a1, a2, a3]) except Exception: logs.warning.append('Error while parsing relaxation cell parameters.') elif 'ATOMIC_POSITIONS' in line: try: this_key = 'atomic_positions_relax' # the inizialization of tau prevent parsed_data to be associated # to the pointer of the previous iteration metric = line.split('(')[1].split(')')[0] if metric == 'crystal': this_key = 'atomic_fractionals_relax' elif metric not in ['alat', 'bohr', 'angstrom']: raise QEOutputParsingError('Error while parsing atomic_positions: units not supported.') # TODO: check how to map the atoms in the original scheme positions = [] for i in range(nat): line2 = data_step[count + 1 + i].split() tau = [float(s) for s in line2[1:4]] if metric == 'alat': tau = [alat * float(s) for s in tau] elif metric == 'bohr': tau = [CONSTANTS.bohr_to_ang * float(s) for s in tau] positions.append(tau) trajectory_data.setdefault(this_key, []).append(positions) except Exception: logs.warning.append('Error while parsing relaxation atomic positions.') # NOTE: in the above, the chemical symbols are not those of AiiDA # since the AiiDA structure is different. So, I assume now that the # order of atoms is the same of the input atomic structure. # Computed dipole correction in slab geometries. # save dipole in debye units, only at last iteration of scf cycle elif 'Computed dipole along edir' in line: j = count + 3 line2 = data_step[j] try: units = line2.split()[-1] if default_dipole_units.lower() not in units.lower(): # only debye raise QEOutputParsingError( f'Error parsing the dipole correction. Units {units} are not supported.' ) value = float(line2.split()[-2]) except IndexError: # on units pass # save only the last dipole correction while 'Computed dipole along edir' not in line2: j += 1 try: line2 = data_step[j] except IndexError: # The dipole is also written at the beginning of a new bfgs iteration break if 'End of self-consistent calculation' in line2: trajectory_data.setdefault('dipole', []).append(value) parsed_data['dipole' + units_suffix] = default_dipole_units break # saving the SCF convergence accuracy for each SCF cycle # If for some step this line is not printed, the later check with the scf_accuracy array length should catch it elif 'estimated scf accuracy' in line: try: value = float(line.split()[-2]) * CONSTANTS.ry_to_ev trajectory_data.setdefault('scf_accuracy', []).append(value) except Exception: logs.warning.append('Error while parsing scf accuracy.') elif 'convergence has been achieved in' in line or 'convergence NOT achieved after' in line: try: value = int(line.split('iterations')[0].split()[-1]) trajectory_data.setdefault('scf_iterations', []).append(value) except Exception: logs.warning.append('Error while parsing scf iterations.') elif 'Calculation stopped in scf loop at iteration' in line: try: value = int(line.split()[-1]) trajectory_data.setdefault('scf_iterations', []).append(value) except Exception: logs.warning.append('Error while parsing scf iterations.') elif 'End of self-consistent calculation' in line: # parse energy threshold for diagonalization algorithm try: j = 0 while True: j -= 1 line2 = data_step[count + j] if 'ethr' in line2: value = float(line2.split('=')[1].split(',')[0]) break trajectory_data.setdefault('energy_threshold', []).append(value) except Exception: logs.warning.append('Error while parsing ethr.') # parse final magnetic moments, if present try: j = 0 while True: j -= 1 line2 = data_step[count + j] if 'Magnetic moment per site' in line2: break if 'iteration' in line2: raise QEOutputParsingError mag_moments = [] charges = [] while True: j += 1 line2 = data_step[count + j] if 'atom:' in line2: mag_moments.append(float(line2.split('magn:')[1].split()[0])) charges.append(float(line2.split('charge:')[1].split()[0])) # Alternate parsing for new format introduced in v6.8 elif 'atom' in line2: mag_moments.append(float(line2.split('magn=')[1].split()[0])) charges.append(float(line2.split('charge=')[1].split()[0])) if len(mag_moments) == nat: break trajectory_data.setdefault('atomic_magnetic_moments', []).append(mag_moments) trajectory_data.setdefault('atomic_charges', []).append(charges) parsed_data['atomic_magnetic_moments' + units_suffix] = default_magnetization_units parsed_data['atomic_charges' + units_suffix] = default_charge_units except QEOutputParsingError: pass # grep energy and possibly, magnetization elif '!' in line: try: En = float(line.split('=')[1].split('Ry')[0]) * CONSTANTS.ry_to_ev # Up till v6.5, the line after total energy would be the Harris-Foulkes estimate, followed by the # estimated SCF accuracy. However, pw.x v6.6 removed the HF estimate line. marker = 'estimated scf accuracy' for i in range(5): subline = data_step[count + i] if marker in subline: try: E_acc = float(subline.split('<')[1].split('Ry')[0]) * CONSTANTS.ry_to_ev except Exception: pass else: break else: raise KeyError(f'could not find and parse the line with `{marker}`') for key, value in [['energy', En], ['energy_accuracy', E_acc]]: trajectory_data.setdefault(key, []).append(value) parsed_data[key + units_suffix] = default_energy_units # TODO: decide units for magnetization. now bohr mag/cell j = 0 while True: j += 1 line2 = data_step[count + j] for string, key in [ ['one-electron contribution', 'energy_one_electron'], ['hartree contribution', 'energy_hartree'], ['xc contribution', 'energy_xc'], ['ewald contribution', 'energy_ewald'], ['smearing contrib.', 'energy_smearing'], ['one-center paw contrib.', 'energy_one_center_paw'], ['est. exchange err', 'energy_est_exchange'], ['Fock energy', 'energy_fock'], ['Hubbard energy', 'energy_hubbard'], # Add also ENVIRON specific contribution to the total energy ['solvation energy', 'energy_solvation'], ['cavitation energy', 'energy_cavitation'], ['PV energy', 'energy_pv'], ['periodic energy correct.', 'energy_pbc_correction'], ['ionic charge energy', 'energy_ionic_charge'], ['external charges energy', 'energy_external_charges'] ]: if string in line2: value = grep_energy_from_line(line2) trajectory_data.setdefault(key, []).append(value) parsed_data[key + units_suffix] = default_energy_units # magnetizations if 'total magnetization' in line2: this_m = line2.split('=')[1].split('Bohr')[0] try: # magnetization might be a scalar value = float(this_m) except ValueError: # but can also be a three vector component in non-collinear calcs value = [float(i) for i in this_m.split()] trajectory_data.setdefault('total_magnetization', []).append(value) parsed_data['total_magnetization' + units_suffix] = default_magnetization_units elif 'absolute magnetization' in line2: value = float(line2.split('=')[1].split('Bohr')[0]) trajectory_data.setdefault('absolute_magnetization', []).append(value) parsed_data['absolute_magnetization' + units_suffix] = default_magnetization_units # exit loop elif 'convergence' in line2: break if vdw_correction: j = 0 while True: j += -1 line2 = data_step[count + j] if 'Non-local correlation energy' in line2: value = grep_energy_from_line(line2) trajectory_data.setdefault('energy_vdw', []).append(value) break parsed_data['energy_vdw' + units_suffix] = default_energy_units except Exception as exception: import traceback traceback.print_exc() print(exception) logs.warning.append('Error while parsing for energy terms.') elif 'the Fermi energy is' in line: try: value = float(line.split('is')[1].split('ev')[0]) trajectory_data.setdefault('fermi_energy', []).append(value) parsed_data['fermi_energy' + units_suffix] = default_energy_units except Exception: logs.warning.append('Error while parsing Fermi energy from the output file.') elif 'Forces acting on atoms' in line: try: forces = [] j = 0 while True: j += 1 line2 = data_step[count + j] if 'atom ' in line2: line2 = line2.split('=')[1].split() # CONVERT FORCES IN eV/Ang vec = [float(s) * CONSTANTS.ry_to_ev / CONSTANTS.bohr_to_ang for s in line2] forces.append(vec) if len(forces) == nat: break trajectory_data.setdefault('forces', []).append(forces) parsed_data['forces' + units_suffix] = default_force_units except Exception: logs.warning.append('Error while parsing forces.') # TODO: adding the parsing support for the decomposition of the forces elif 'Total force =' in line: try: # note that I can't check the units: not written in output! value = float(line.split('=')[1].split('Total')[0]) * CONSTANTS.ry_to_ev / CONSTANTS.bohr_to_ang trajectory_data.setdefault('total_force', []).append(value) parsed_data['total_force' + units_suffix] = default_force_units except Exception: logs.warning.append('Error while parsing total force.') elif ('entering subroutine stress ...' in line) or ('Computing stress (Cartesian axis) and pressure' in line): try: stress = [] count2 = None for k in range(15): # Up to 15 lines later - more than 10 are needed if vdW is turned on if 'P=' in data_step[count + k + 1]: count2 = count + k + 1 if count2 is None: logs.warning.append( 'Error while parsing stress tensor: ' '"P=" not found within 15 lines from the start of the stress block' ) else: if '(Ry/bohr**3)' not in data_step[count2]: raise QEOutputParsingError('Error while parsing stress: unexpected units.') for k in range(3): line2 = data_step[count2 + k + 1].split() vec = [float(s) * 10**(-9) * CONSTANTS.ry_si / (CONSTANTS.bohr_si)**3 for s in line2[0:3]] stress.append(vec) trajectory_data.setdefault('stress', []).append(stress) parsed_data['stress' + units_suffix] = default_stress_units except Exception: import traceback logs.warning.append(f'Error while parsing stress tensor: {traceback.format_exc()}') # Electronic and ionic dipoles when 'lelfield' was set to True in input parameters elif lelfield is True: if 'Electronic Dipole per cell' in line: electronic_dipole = float(line.split()[-1]) trajectory_frame.setdefault('electronic_dipole_cell_average', []).append(electronic_dipole) elif 'Ionic Dipole per cell' in line: ionic_dipole = float(line.split()[-1]) trajectory_frame.setdefault('ionic_dipole_cell_average', []).append(ionic_dipole) elif 'Electronic Dipole on Cartesian axes' in line: electronic_dipole = [float(data_step[count + i + 1].split()[1]) for i in range(3)] trajectory_frame.setdefault('electronic_dipole_cartesian_axes', []).append(electronic_dipole) elif 'Ionic Dipole on Cartesian axes' in line: ionic_dipole = [float(data_step[count + i + 1].split()[1]) for i in range(3)] trajectory_frame.setdefault('ionic_dipole_cartesian_axes', []).append(ionic_dipole) # End of trajectory frame, only keep last entries for dipole related values if lelfield is True: # For every property only get the last entry if possible try: ed_cell = trajectory_frame['electronic_dipole_cell_average'].pop() except (IndexError, KeyError): ed_cell = None try: ed_axes = trajectory_frame['electronic_dipole_cartesian_axes'].pop() except (IndexError, KeyError): ed_axes = None try: id_cell = trajectory_frame['ionic_dipole_cell_average'].pop() except (IndexError, KeyError): id_cell = None try: id_axes = trajectory_frame['ionic_dipole_cartesian_axes'].pop() except (IndexError, KeyError): id_axes = None # Only add them if all four properties were successfully parsed if all([value is not None for value in [ed_cell, ed_axes, id_cell, id_axes]]): trajectory_data.setdefault('electronic_dipole_cell_average', []).append(ed_cell) trajectory_data.setdefault('electronic_dipole_cartesian_axes', []).append(ed_axes) trajectory_data.setdefault('ionic_dipole_cell_average', []).append(id_cell) trajectory_data.setdefault('ionic_dipole_cartesian_axes', []).append(id_axes) # check consistency of scf_accuracy and scf_iterations if 'scf_accuracy' in trajectory_data: if 'scf_iterations' in trajectory_data: if len(trajectory_data['scf_accuracy']) != sum(trajectory_data['scf_iterations']): logs.warning.append( 'the length of scf_accuracy does not match the sum of the elements of scf_iterations.' ) else: logs.warning.append('"the scf_accuracy array was parsed but the scf_iterations was not.') # If specified in the parser options, parse the atomic occupations parse_atomic_occupations = parser_options.get('parse_atomic_occupations', False) if parse_atomic_occupations: atomic_occupations = {} hubbard_blocks = stdout.split('LDA+U parameters') for line in hubbard_blocks[-1].split('\n'): if 'Tr[ns(na)]' in line: values = line.split('=') atomic_index = values[0].split()[1] occupations = values[1].split() if len(occupations) == 1: atomic_occupations[atomic_index] = {'total': occupations[0]} elif len(occupations) == 3: atomic_occupations[atomic_index] = { 'up': occupations[0], 'down': occupations[1], 'total': occupations[2] } else: continue parsed_data['atomic_occupations'] = atomic_occupations # Ionic calculations and BFGS algorithm did not print that calculation is converged if 'atomic_positions_relax' in trajectory_data and not marker_bfgs_converged: logs.error.append('ERROR_IONIC_CONVERGENCE_NOT_REACHED') # Ionic calculation that hit the maximum number of ionic steps. Note: does not necessarily mean that convergence was # not reached as it could have occurred in the last step. if maximum_ionic_steps is not None and maximum_ionic_steps == parsed_data.get('number_ionic_steps', None): logs.warning.append('ERROR_MAXIMUM_IONIC_STEPS_REACHED') parsed_data['bands'] = bands_data parsed_data['structure'] = structure_data parsed_data['trajectory'] = trajectory_data # Double check to detect error messages if CRASH is found if crash_file is not None: for line in crash_file.split('\n'): # Compare the line to the known set of error and warning messages and add them to the log container detect_important_message(logs, line) if len(logs.error) == len(logs.warning) == 0: raise QEOutputParsingError('Parser cannot load basic info.') # Remove duplicate log messages by turning it into a set. Then convert back to list as that is what is expected logs.error = list(set(logs.error)) logs.warning = list(set(logs.warning)) return parsed_data, logs
[docs]def grep_energy_from_line(line): try: return float(line.split('=')[1].split('Ry')[0]) * CONSTANTS.ry_to_ev except Exception: raise QEOutputParsingError('Error while parsing energy')