diff --git a/package/AUTHORS b/package/AUTHORS index 1cb4b5b5c39..9ff617de041 100644 --- a/package/AUTHORS +++ b/package/AUTHORS @@ -262,7 +262,9 @@ Chronological list of authors - Sirsha Ganguly - Amruthesh Thirumalaiswamy - Ch Zhang - - Raúl Lois-Cuns + - Raúl Lois-Cuns + - Jeremy M.G. Leung + External code ------------- diff --git a/package/CHANGELOG b/package/CHANGELOG index e009d480303..cae4e45d22a 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -14,23 +14,28 @@ The rules for this file: ------------------------------------------------------------------------------- -??/??/?? IAlibay +??/??/?? IAlibay, jeremyleung521 * 2.11.0 Fixes + * Enhancements + * Allow NCDF reader to read Amber Restart Files with extension `.ncrst` + (Issue #5030, PR #5031) Changes + * Deprecations + * 10/18/25 IAlibay, orbeckst, BHM-Bob, TRY-ER, Abdulrahman-PROG, pbuslaev, - yuxuanzhuang, yuyuan871111, tanishy7777, tulga-rdn, Gareth-elliott, - hmacdope, tylerjereddy, cbouy, talagayev, DrDomenicoMarson, amruthesht, - p-j-smith, gitzhangch, raulloiscuns + yuxuanzhuang, yuyuan871111, tanishy7777, jpkrowe, tulga-rdn, + Gareth-elliott, hmacdope, tylerjereddy, cbouy, talagayev, + DrDomenicoMarson, amruthesht, p-j-smith, gitzhangch, raulloiscuns * 2.10.0 @@ -103,7 +108,6 @@ Enhancements MDAnalysisTest.util (PR #5038) * Enables parallelization for `analysis.polymer.PersistenceLength` (Issue #4671, PR #5074) - Changes * Output precision for LAMMPS Data files set to 10 decimals (PR #5053) * Support for Python 3.10 was removed, in line with SPEC0 (PR #5121). @@ -114,7 +118,6 @@ Changes `analysis.lineardensity.LinearDensity.totalmass` (PR #5007) * Remove `default` channel from RTD conda env. (Issue # 5036, PR # 5037) - Deprecations * The RDKit converter parameter `NoImplicit` has been deprecated in favour of `implicit_hydrogens` and `inferrer` parameters. `max_iter` has been moved diff --git a/package/MDAnalysis/coordinates/INPCRD.py b/package/MDAnalysis/coordinates/INPCRD.py index 2755a5e163e..79d04a96c9d 100644 --- a/package/MDAnalysis/coordinates/INPCRD.py +++ b/package/MDAnalysis/coordinates/INPCRD.py @@ -22,29 +22,92 @@ # -"""INPCRD structure files in MDAnalysis --- :mod:`MDAnalysis.coordinates.INPCRD` +"""AMBER coordinate/restart files in MDAnalysis --- :mod:`MDAnalysis.coordinates.INPCRD` ================================================================================ -Read coordinates in Amber_ coordinate/restart file (suffix "inpcrd"). +AMBER_ can write :ref:`ASCII restart` ("inpcrd") and +:ref:`binary restart` ("ncrst") coordinate files. MDAnalysis +supports reading of both file formats. -.. _Amber: https://ambermd.org/FileFormats.php +.. rubric:: Units +AMBER restart files are assumed to be in the following units: -Classes -------- +* length in Angstrom (Å) +* time in ps +* velocity (NCRST only) in Å / ps +* force (NCRST only) in kcal / (mol * Å) + + +.. _ascii-restart: + +ASCII INPCRD restart files +-------------------------- + +ASCII AMBER_ INPCRD coordinate files (as defined in `AMBER INPCRD FORMAT`_) +are handled by the :class:`INPReader`. + +AMBER ASICC restart files are recognised by the suffix '.inpcrd', '.restrt', or +'.rst7' .. autoclass:: INPReader :members: + +.. _netcdf-restart: + +Binary NetCDF restart files +--------------------------- + +The `AMBER netcdf`_ restart format makes use of NetCDF_ (Network Common Data +Form) format. Such binary restart files are recognised in MDAnalysis by the +suffix '.ncrst', '.ncrestrt' or '.ncrst7' and read by the :class:`NCRSTReader`. + +Binary restart files can also contain velocity and force information, and can +record the simulation time step. Whilst the `AMBER netcdf`_ format details +default unit values of ångström and picoseconds, these can in theory occupy +any unit type. However, at the moment MDAnalysis only supports the default +types and will raise a :exc:`NotImplementedError` if anything else is detected. + +.. autoclass:: NCRSTReader + :members: + + +.. Links + +.. _AMBER: http://ambermd.org +.. _AMBER INPCRD FORMAT: https://ambermd.org/FileFormats.php#restart +.. _AMBER netcdf: http://ambermd.org/netcdf/nctraj.xhtml +.. _NetCDF: http://www.unidata.ucar.edu/software/netcdf + """ from . import base +import warnings +import logging +from scipy.io import netcdf_file + +from .timestep import Timestep +from ..lib.util import store_init_arguments +from .TRJ import NCDFPicklable, NCDFMixin + +logger = logging.getLogger("MDAnalysis.coordinates.AMBER") class INPReader(base.SingleFrameReaderBase): - """Reader for Amber restart files.""" + """Reader for Amber restart files. + + .. rubric:: Limitations + + * Box information is not read (or checked for). + * Velocities are currently *not supported*. + + .. versionchanged: 2.10.0 + Now automatically detects files with .rst7 extension. + + """ - format = ["INPCRD", "RESTRT"] + format = ["INPCRD", "RESTRT", "RST7"] units = {"length": "Angstrom"} def _read_first_frame(self): @@ -89,3 +152,117 @@ def parse_n_atoms(filename, **kwargs): f.readline() n_atoms = int(f.readline().split()[0]) return n_atoms + + +class NCRSTReader(base.SingleFrameReaderBase, NCDFMixin): + """Reader for `AMBER NETCDF format`_ (version 1.0 rev C) restart files. + + This reader is a :class:`SingleFrameReaderBase` adaptation of the + :class:`~MDAnalysis.coordinates.TRJ.NCDFReader` AMBER NETCDF trajectory reader. + + AMBER binary restart files are automatically recognised by the file + extensions ".ncrst", ".ncrestrt", and ".ncrst7". + + The number of atoms (`n_atoms`) does not have to be provided as it can + be read from the input NETCDF file. + + Current simulation time is autodetected and if available is read into the + :attr:`Timestep.time` attribute. + + Velocities are autodetected and read into the :attr:`Timestep._velocities` + attribute. + + Forces are autodetected and read into the :attr:`Timestep._forces` + attribute. + + Periodic unit cell information is detected and used to populate the + :attr:`Timestep.dimensions` attribute. (If no unit cell is available in + the restart file, then :attr:`Timestep.dimensions` will return + ``[0,0,0,0,0,0]``). + + Support for the *mmap* keyword is available as detailed + in :class:`~MDAnalysis.coordinates.TRJ.NCDFReader` and + :mod:`scipy.io.netcdf.netcdf_file`. The use of + ``mmap=True`` leads to around a 2x read speed improvement in a ~ 1 million + atom system (AMBER STMV benchmark). As per the :class:`NCDFReader`, the + default behaviour is ``mmap=None``, which means that the default behaviour + of :class:`scipy.io.netcdf.netcdf_file` prevails. + + The NCRST reader also uses a custom Timestep object with C-style memory + mapping in order to match the NCDFReader. + + .. rubric:: Limitations + + * Only NCRST files with time in ps, lengths in Angstroem and angles in + degree are processed. + * Restart files without coordinate information are not supported. + * Replica exchange variables are not supported. + + .. _AMBER NETCDF format: http://ambermd.org/netcdf/nctraj.xhtml + + See Also + -------- + :class:`~MDAnalysis.coordinates.TRJ.NCDFReader` + :class:`~MDAnalysis.coordinates.TRJ.NCDFWriter` + + + .. versionadded: 2.10.0 + """ + + format = ["NCRST", "NCRESTRT", "NCRST7"] + version = "1.0" + units = { + "time": "ps", + "length": "Angstrom", + "velocity": "Angstrom/ps", + "force": "kcal/(mol*Angstrom)", + } + + _Timestep = Timestep + + @store_init_arguments + def __init__( + self, filename, n_atoms=None, convert_units=None, mmap=None, **kwargs + ): + # Assign input mmap value + self._mmap = mmap + + super(NCRSTReader, self).__init__( + filename, convert_units, n_atoms, **kwargs + ) + + @staticmethod + def parse_n_atoms(filename, **kwargs): + with scipy.io.netcdf_file(filename, mmap=None) as f: + n_atoms = f.dimensions["atom"] + return n_atoms + + @staticmethod + def _verify_units(eval_units, expected_units): + if eval_units.decode("utf-8") != expected_units: + errmsg = ( + "NCRSTReader currently assumes that the trajectory " + "was written in units of {0} instead of {1}".format( + eval_units.decode("utf-8"), expected_units + ) + ) + raise NotImplementedError(errmsg) + + def _read_first_frame(self): + """Function to read NetCDF restart file and fill timestep""" + # Open netcdf file via context manager + # ensure maskandscale is off so we don't end up double scaling + + with NCDFPicklable( + self.filename, mode="r", mmap=self._mmap, maskandscale=False + ) as self.trjfile: + + self._check_conventions(n_atoms=self.n_atoms) + + self.n_frames = 1 + + self._read_values( + frame=() + ) # AMBERRESTART convention files have dimensionless datasets + + self.ts.frame = 0 # 0-indexed diff --git a/package/MDAnalysis/coordinates/TRJ.py b/package/MDAnalysis/coordinates/TRJ.py index e9799e4d945..46296f9afd8 100644 --- a/package/MDAnalysis/coordinates/TRJ.py +++ b/package/MDAnalysis/coordinates/TRJ.py @@ -129,20 +129,20 @@ .. _GitHub Discussions: https://github.com/MDAnalysis/mdanalysis/discussions """ -import scipy.io.netcdf import numpy as np -import warnings -import errno -import logging from math import isclose +from scipy.io import netcdf_file import MDAnalysis from .timestep import Timestep from . import base from ..lib import util from ..lib.util import store_init_arguments -logger = logging.getLogger("MDAnalysis.coordinates.AMBER") +import warnings +import errno +import logging +logger = logging.getLogger("MDAnalysis.coordinates.AMBER") try: import netCDF4 @@ -355,113 +355,28 @@ def close(self): self.trjfile = None -class NCDFReader(base.ReaderBase): - """Reader for `AMBER NETCDF format`_ (version 1.0). - - AMBER binary trajectories are automatically recognised by the - file extension ".ncdf". - - The number of atoms (`n_atoms`) does not have to be provided as it can - be read from the trajectory. The trajectory reader can randomly access - frames and therefore supports direct indexing (with 0-based frame - indices) and full-feature trajectory iteration, including slicing. - - Velocities are autodetected and read into the - :attr:`Timestep._velocities` attribute. - - Forces are autodetected and read into the - :attr:`Timestep._forces` attribute. - - Periodic unit cell information is detected and used to populate the - :attr:`Timestep.dimensions` attribute. (If no unit cell is available in - the trajectory, then :attr:`Timestep.dimensions` will return - ``[0,0,0,0,0,0]``). - - Current limitations: - - * only trajectories with time in ps and lengths in Angstroem are processed - - The NCDF reader uses :mod:`scipy.io.netcdf` and therefore :mod:`scipy` must - be installed. It supports the *mmap* keyword argument (when reading): - ``mmap=True`` is memory efficient and directly maps the trajectory on disk - to memory (using the :class:`~mmap.mmap`); ``mmap=False`` may consume large - amounts of memory because it loads the whole trajectory into memory but it - might be faster. The default is ``mmap=None`` and then default behavior of - :class:`scipy.io.netcdf_file` prevails, i.e. ``True`` when - *filename* is a file name, ``False`` when *filename* is a file-like object. - - .. _AMBER NETCDF format: http://ambermd.org/netcdf/nctraj.xhtml - - See Also - -------- - :class:`NCDFWriter` - - - .. versionadded: 0.7.6 - .. versionchanged:: 0.10.0 - Added ability to read Forces - .. versionchanged:: 0.11.0 - Frame labels now 0-based instead of 1-based. - kwarg `delta` renamed to `dt`, for uniformity with other Readers. - .. versionchanged:: 0.17.0 - Uses :mod:`scipy.io.netcdf` and supports the *mmap* kwarg. - .. versionchanged:: 0.20.0 - Now reads scale_factors for all expected AMBER convention variables. - Timestep variables now adhere standard MDAnalysis units, with lengths - of angstrom, time of ps, velocity of angstrom/ps and force of - kJ/(mol*Angstrom). It is noted that with 0.19.2 and earlier versions, - velocities would have often been reported in values of angstrom/AKMA - time units instead (Issue #2323). - .. versionchanged:: 1.0.0 - Support for reading `degrees` units for `cell_angles` has now been - removed (Issue #2327) - .. versionchanged:: 2.0.0 - Now use a picklable :class:`scipy.io.netcdf_file`-- - :class:`NCDFPicklable`. - Reading of `dt` now defaults to 1.0 ps if `dt` cannot be extracted from - the first two frames of the trajectory. - :meth:`Writer` now also sets `convert_units`, `velocities`, `forces` and - `scale_factor` information for the :class:`NCDFWriter`. - - """ - - format = ['NCDF', 'NC'] - multiframe = True - version = "1.0" - units = {'time': 'ps', - 'length': 'Angstrom', - 'velocity': 'Angstrom/ps', - 'force': 'kcal/(mol*Angstrom)'} - - _Timestep = Timestep - - @store_init_arguments - def __init__(self, filename, n_atoms=None, mmap=None, **kwargs): - - self._mmap = mmap - - super(NCDFReader, self).__init__(filename, **kwargs) - - # ensure maskandscale is off so we don't end up double scaling - self.trjfile = NCDFPicklable(self.filename, - mmap=self._mmap, - maskandscale=False) +class NCDFMixin(): + def _check_conventions(self, n_atoms=None): + convention = 'AMBERRESTART' if self.__class__.__name__ == 'NCRSTReader' else 'AMBER' + file_format = 'restart file' if self.__class__.__name__ == 'NCRSTReader' else 'trajectory' # AMBER NetCDF files should always have a convention try: conventions = self.trjfile.Conventions - if not ('AMBER' in conventions.decode('utf-8').split(',') or - 'AMBER' in conventions.decode('utf-8').split()): - errmsg = ("NCDF trajectory {0} does not conform to AMBER " + check = [conventions.decode('utf-8').split(','), + conventions.decode('utf-8').split()] + + if not (convention in check[0] or convention in check[1]): + errmsg = ("NCDF {0} {1} does not conform to AMBER " "specifications, " "http://ambermd.org/netcdf/nctraj.xhtml " - "('AMBER' must be one of the token in attribute " - "Conventions)".format(self.filename)) + "({2} must be one of the token in attribute " + "Conventions)".format(file_format, self.filename, convention)) logger.fatal(errmsg) raise TypeError(errmsg) except AttributeError: - errmsg = "NCDF trajectory {0} is missing Conventions".format( - self.filename) + errmsg = "NCDF {0} {1} is missing Conventions".format( + file_format, self.filename) logger.fatal(errmsg) raise ValueError(errmsg) from None @@ -469,23 +384,23 @@ def __init__(self, filename, n_atoms=None, mmap=None, **kwargs): try: ConventionVersion = self.trjfile.ConventionVersion.decode('utf-8') if not ConventionVersion == self.version: - wmsg = ("NCDF trajectory format is {0!s} but the reader " - "implements format {1!s}".format( - ConventionVersion, self.version)) + wmsg = ("NCDF {0} format is {1!s} but the reader " + "implements format {2!s}".format( + file_format, ConventionVersion, self.version)) warnings.warn(wmsg) logger.warning(wmsg) except AttributeError: - errmsg = "NCDF trajectory {0} is missing ConventionVersion".format( - self.filename) + errmsg = "NCDF {0} {1} is missing ConventionVersion".format( + file_format, self.filename) raise ValueError(errmsg) from None # The AMBER NetCDF standard enforces 64 bit offsets if not self.trjfile.version_byte == 2: - errmsg = ("NCDF trajectory {0} does not conform to AMBER " + errmsg = ("NCDF {0} {1} does not conform to AMBER " "specifications, as detailed in " "https://ambermd.org/netcdf/nctraj.xhtml " "(NetCDF file does not use 64 bit offsets " - "[version_byte = 2])".format(self.filename)) + "[version_byte = 2])".format(file_format, self.filename)) logger.fatal(errmsg) raise TypeError(errmsg) @@ -495,16 +410,16 @@ def __init__(self, filename, n_atoms=None, mmap=None, **kwargs): errmsg = "Incorrect spatial value for NCDF trajectory file" raise TypeError(errmsg) except KeyError: - errmsg = "NCDF trajectory does not contain spatial dimension" + errmsg = "NCDF {0} does not contain spatial dimension".format(file_format) raise ValueError(errmsg) from None # AMBER NetCDF specs require program and programVersion. Warn users # if those attributes do not exist if not (hasattr(self.trjfile, 'program') and hasattr(self.trjfile, 'programVersion')): - wmsg = ("NCDF trajectory {0} may not fully adhere to AMBER " + wmsg = ("The NCDF {0} {1} may not fully adhere to AMBER " "standards as either the `program` or `programVersion` " - "attributes are missing".format(self.filename)) + "attributes are missing".format(file_format, self.filename)) warnings.warn(wmsg) logger.warning(wmsg) @@ -516,22 +431,8 @@ def __init__(self, filename, n_atoms=None, mmap=None, **kwargs): "is used!".format(n_atoms, self.n_atoms)) raise ValueError(errmsg) except KeyError: - errmsg = ("NCDF trajectory {0} does not contain atom " - "information".format(self.filename)) - raise ValueError(errmsg) from None - - try: - self.n_frames = self.trjfile.dimensions['frame'] - # example trajectory when read with scipy.io.netcdf has - # dimensions['frame'] == None (indicating a record dimension that - # can grow) whereas if read with netCDF4 I get - # len(dimensions['frame']) == 10: in any case, we need to get - # the number of frames from somewhere such as the time variable: - if self.n_frames is None: - self.n_frames = self.trjfile.variables['coordinates'].shape[0] - except KeyError: - errmsg = (f"NCDF trajectory {self.filename} does not contain " - f"frame information") + errmsg = ("NCDF {0} {1} does not contain atom " + "information".format(file_format, self.filename)) raise ValueError(errmsg) from None try: @@ -549,15 +450,12 @@ def __init__(self, filename, n_atoms=None, mmap=None, **kwargs): self.has_time = True except KeyError: self.has_time = False - wmsg = ("NCDF trajectory does not contain `time` information;" - " `time` will be set as an increasing index") + wmsg = ("NCDF {0} {1} does not contain `time` information;" + " `time` will be set as an increasing index".format( + file_format, self.filename)) warnings.warn(wmsg) logger.warning(wmsg) - - self._verify_units(self.trjfile.variables['coordinates'].units, - 'angstrom') - # Check for scale_factor attributes for all data variables and # store this to multiply through later (Issue #2323) self.scale_factors = {'time': None, @@ -579,6 +477,18 @@ def __init__(self, filename, n_atoms=None, mmap=None, **kwargs): "not implemented".format(variable)) raise NotImplementedError(errmsg) + # Default to length units of Angstrom + try: + self._verify_units(self.trjfile.variables['coordinates'].units, + 'angstrom') + except KeyError: + # Technically coordinate information is not required, however + # the lack of it in a restart file is highly unlikely + errmsg = ("NetCDF restart file {0} is missing coordinate " + "information ".format(self.filename)) + logger.fatal(errmsg) + raise ValueError(errmsg) + self.has_velocities = 'velocities' in self.trjfile.variables if self.has_velocities: self._verify_units(self.trjfile.variables['velocities'].units, @@ -605,53 +515,23 @@ def __init__(self, filename, n_atoms=None, mmap=None, **kwargs): reader=self, # for dt **self._ts_kwargs) - # load first data frame - self._read_frame(0) - - @staticmethod - def _verify_units(eval_unit, expected_units): - if eval_unit.decode('utf-8') != expected_units: - errmsg = ("NETCDFReader currently assumes that the trajectory " - "was written in units of {0} instead of {1}".format( - eval_unit.decode('utf-8'), expected_units)) - raise NotImplementedError(errmsg) - - @staticmethod - def parse_n_atoms(filename, **kwargs): - with scipy.io.netcdf_file(filename, mmap=None) as f: - n_atoms = f.dimensions['atom'] - return n_atoms - - def _get_var_and_scale(self, variable, frame): - """Helper function to get variable at given frame from NETCDF file and - scale if necessary. - - Note - ---- - If scale_factor is 1.0 within numerical precision then we don't apply - the scaling. - """ - scale_factor = self.scale_factors[variable] - if scale_factor is None or isclose(scale_factor, 1): - return self.trjfile.variables[variable][frame] - else: - return self.trjfile.variables[variable][frame] * scale_factor - - def _read_frame(self, frame): + def _read_values(self, frame): ts = self.ts if self.trjfile is None: raise IOError("Trajectory is closed") - if np.dtype(type(frame)) != np.dtype(int): + if (not isinstance(frame, tuple)) and (np.dtype(type(frame)) != np.dtype(int)): # convention... for netcdf could also be a slice raise TypeError("frame must be a positive integer or zero") - if frame >= self.n_frames or frame < 0: + if (not isinstance(frame, tuple)) and (frame >= self.n_frames or frame < 0): raise IndexError("frame index must be 0 <= frame < {0}".format( self.n_frames)) # note: self.trjfile.variables['coordinates'].shape == (frames, n_atoms, 3) + ts._pos[:] = self._get_var_and_scale('coordinates', frame) if self.has_time: - ts.time = self._get_var_and_scale('time', frame) + time = self._get_var_and_scale('time', frame) + ts.time = time[0] if isinstance(time, (list, np.ndarray)) else time if self.has_velocities: ts._velocities[:] = self._get_var_and_scale('velocities', frame) if self.has_forces: @@ -663,8 +543,7 @@ def _read_frame(self, frame): ts.dimensions = unitcell if self.convert_units: self.convert_pos_from_native(ts._pos) # in-place ! - self.convert_time_from_native( - ts.time) # in-place ! (hope this works...) + self.convert_time_from_native(ts.time) # in-place ! if self.has_velocities: self.convert_velocities_from_native(ts._velocities, inplace=True) @@ -673,6 +552,156 @@ def _read_frame(self, frame): if self.periodic: # in-place ! (only lengths) self.convert_pos_from_native(ts.dimensions[:3]) + + + @staticmethod + def _verify_units(eval_unit, expected_units): + if eval_unit.decode('utf-8') != expected_units: + errmsg = ("NETCDFReader currently assumes that the trajectory " + "was written in units of {0} instead of {1}".format( + eval_unit.decode('utf-8'), expected_units)) + raise NotImplementedError(errmsg) + + + def _get_var_and_scale(self, variable, frame): + """Helper function to get variable at given frame from NETCDF file and + scale if necessary. + + Note + ---- + If scale_factor is 1.0 within numerical precision then we don't apply + the scaling. + """ + scale_factor = self.scale_factors[variable] + if scale_factor is None or isclose(scale_factor, 1): + return self.trjfile.variables[variable][frame] + else: + return self.trjfile.variables[variable][frame] * scale_factor + + + +class NCDFReader(base.ReaderBase, NCDFMixin): + """Reader for `AMBER NETCDF format`_ (version 1.0). + + AMBER binary trajectories are automatically recognised by the + file extension ".ncdf". + + The number of atoms (`n_atoms`) does not have to be provided as it can + be read from the trajectory. The trajectory reader can randomly access + frames and therefore supports direct indexing (with 0-based frame + indices) and full-feature trajectory iteration, including slicing. + + Velocities are autodetected and read into the + :attr:`Timestep._velocities` attribute. + + Forces are autodetected and read into the + :attr:`Timestep._forces` attribute. + + Periodic unit cell information is detected and used to populate the + :attr:`Timestep.dimensions` attribute. (If no unit cell is available in + the trajectory, then :attr:`Timestep.dimensions` will return + ``[0,0,0,0,0,0]``). + + Current limitations: + + * only trajectories with time in ps and lengths in Angstroem are processed + + The NCDF reader uses :mod:`scipy.io.netcdf` and therefore :mod:`scipy` must + be installed. It supports the *mmap* keyword argument (when reading): + ``mmap=True`` is memory efficient and directly maps the trajectory on disk + to memory (using the :class:`~mmap.mmap`); ``mmap=False`` may consume large + amounts of memory because it loads the whole trajectory into memory but it + might be faster. The default is ``mmap=None`` and then default behavior of + :class:`scipy.io.netcdf_file` prevails, i.e. ``True`` when + *filename* is a file name, ``False`` when *filename* is a file-like object. + + .. _AMBER NETCDF format: http://ambermd.org/netcdf/nctraj.xhtml + + See Also + -------- + :class:`NCDFWriter` + + + .. versionadded: 0.7.6 + .. versionchanged:: 0.10.0 + Added ability to read Forces + .. versionchanged:: 0.11.0 + Frame labels now 0-based instead of 1-based. + kwarg `delta` renamed to `dt`, for uniformity with other Readers. + .. versionchanged:: 0.17.0 + Uses :mod:`scipy.io.netcdf` and supports the *mmap* kwarg. + .. versionchanged:: 0.20.0 + Now reads scale_factors for all expected AMBER convention variables. + Timestep variables now adhere standard MDAnalysis units, with lengths + of angstrom, time of ps, velocity of angstrom/ps and force of + kJ/(mol*Angstrom). It is noted that with 0.19.2 and earlier versions, + velocities would have often been reported in values of angstrom/AKMA + time units instead (Issue #2323). + .. versionchanged:: 1.0.0 + Support for reading `degrees` units for `cell_angles` has now been + removed (Issue #2327) + .. versionchanged:: 2.0.0 + Now use a picklable :class:`scipy.io.netcdf_file`-- + :class:`NCDFPicklable`. + Reading of `dt` now defaults to 1.0 ps if `dt` cannot be extracted from + the first two frames of the trajectory. + :meth:`Writer` now also sets `convert_units`, `velocities`, `forces` and + `scale_factor` information for the :class:`NCDFWriter`. + + """ + + format = ['NCDF', 'NC'] + multiframe = True + version = "1.0" + units = {'time': 'ps', + 'length': 'Angstrom', + 'velocity': 'Angstrom/ps', + 'force': 'kcal/(mol*Angstrom)'} + + _Timestep = Timestep + + @store_init_arguments + def __init__(self, filename, n_atoms=None, mmap=None, **kwargs): + + self._mmap = mmap + + super(NCDFReader, self).__init__(filename, **kwargs) + + # ensure maskandscale is off so we don't end up double scaling + self.trjfile = NCDFPicklable(self.filename, + mmap=self._mmap, + maskandscale=False) + + self._check_conventions(n_atoms) + + try: + self.n_frames = self.trjfile.dimensions['frame'] + # example trajectory when read with scipy.io.netcdf has + # dimensions['frame'] == None (indicating a record dimension that + # can grow) whereas if read with netCDF4 I get + # len(dimensions['frame']) == 10: in any case, we need to get + # the number of frames from somewhere such as the time variable: + if self.n_frames is None: + self.n_frames = self.trjfile.variables['coordinates'].shape[0] + except KeyError: + errmsg = (f"NCDF trajectory {self.filename} does not contain " + f"frame information") + raise ValueError(errmsg) from None + + # load first data frame + self._read_frame(0) + + @staticmethod + def parse_n_atoms(filename, **kwargs): + with netcdf_file(filename, mmap=None) as f: + n_atoms = f.dimensions['atom'] + return n_atoms + + def _read_frame(self, frame): + ts = self.ts + + self._read_values(frame) + ts.frame = frame # frame labels are 0-based self._current_frame = frame return ts @@ -960,9 +989,9 @@ def _init_netcdf(self, periodic=True): ncfile = netCDF4.Dataset(self.filename, 'w', format='NETCDF3_64BIT') else: - ncfile = scipy.io.netcdf_file(self.filename, - mode='w', version=2, - maskandscale=False) + ncfile = netcdf_file(self.filename, + mode='w', version=2, + maskandscale=False) wmsg = ("Could not find netCDF4 module. Falling back to MUCH " "slower scipy.io.netcdf implementation for writing.") logger.warning(wmsg) @@ -1181,12 +1210,15 @@ def _write_next_timestep(self, ts): self.curr_frame += 1 def close(self): - if self.trjfile is not None: - self.trjfile.close() - self.trjfile = None + try: + if self.trjfile is not None: + self.trjfile.close() + self.trjfile = None + except AttributeError: + pass -class NCDFPicklable(scipy.io.netcdf_file): +class NCDFPicklable(netcdf_file): """NetCDF file object (read-only) that can be pickled. This class provides a file-like object (as returned by diff --git a/package/MDAnalysis/coordinates/__init__.py b/package/MDAnalysis/coordinates/__init__.py index f81c4915514..007e260f351 100644 --- a/package/MDAnalysis/coordinates/__init__.py +++ b/package/MDAnalysis/coordinates/__init__.py @@ -216,12 +216,17 @@ class can choose an appropriate reader automatically. | | | | Module :mod:`MDAnalysis.coordinates.TRJ` | +---------------+-----------+-------+------------------------------------------------------+ | AMBER | inpcrd, | r | formatted (ASCII) coordinate/restart file | - | | restrt | | Module :mod:`MDAnalysis.coordinates.INPCRD` | + | | restrt, | | Module :mod:`MDAnalysis.coordinates.INPCRD` | + | | rst7 | | | +---------------+-----------+-------+------------------------------------------------------+ | AMBER | ncdf, nc | r/w | binary (NetCDF) trajectories are fully supported with| | | | | optional `netcdf4-python`_ module (coordinates and | | | | | velocities). Module :mod:`MDAnalysis.coordinates.TRJ`| +---------------+-----------+-------+------------------------------------------------------+ + | AMBER | ncrst, | r | binary (NetCDF) restart file | + | | ncrestrt, | | Module :mod:`MDAnalysis.coordinates.INPCRD` | + | | ncrst7 | | | + +---------------+-----------+-------+------------------------------------------------------+ | Brookhaven | pdb/ent | r/w | a relaxed PDB format (as used in MD simulations) | | [#a]_ | | | is read by default; Multiple frames (MODEL) | | | | | are supported but require the *multiframe* keyword. | diff --git a/testsuite/MDAnalysisTests/coordinates/test_ncrst.py b/testsuite/MDAnalysisTests/coordinates/test_ncrst.py new file mode 100644 index 00000000000..b36ec960237 --- /dev/null +++ b/testsuite/MDAnalysisTests/coordinates/test_ncrst.py @@ -0,0 +1,332 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +from __future__ import absolute_import +import MDAnalysis as mda +import numpy as np +from scipy.io import netcdf_file +from MDAnalysis.coordinates.INPCRD import NCRSTReader +import pytest +from numpy.testing import assert_equal, assert_almost_equal + +from MDAnalysisTests.datafiles import PRMNCRSTbox, NCRSTbox + + +class _NCRestartTest(object): + + prec = 5 + + @pytest.fixture(scope="class") + def universe(self): + return mda.Universe(self.topology, self.filename, mmap=self.mmap) + + def test_wrong_natoms(self): + with pytest.raises(ValueError): + NCRSTReader(self.filename, n_atoms=2, mmap=self.mmap) + + def test_mmap_value(self): + reader = NCRSTReader(self.filename, mmap=self.mmap) + if not self.mmap: + assert_equal(reader._mmap, None) + else: + assert_equal(reader._mmap, self.mmap) + + def test_remarks(self, universe): + title = universe.trajectory.remarks.decode("utf-8") + assert_equal(self.title, title) + + def test_n_atoms(self, universe): + assert_equal(self.n_atoms, len(universe.atoms)) + + def test_numres(self, universe): + assert_equal(self.n_res, len(universe.residues)) + + def test_time(self, universe): + assert_equal(self.time, universe.trajectory.time) + + def test_frame(self, universe): + assert_equal(universe.trajectory.frame, 0) + + # Test slices as per base + def test_full_slice(self, universe): + trj_iter = universe.trajectory[:] + frames = [ts.frame for ts in trj_iter] + assert_equal(frames, np.arange(universe.trajectory.n_frames)) + + def test_last_slice(self, universe): + trj_iter = universe.trajectory[-1:] + frames = [ts.frame for ts in trj_iter] + assert_equal(frames, np.arange(universe.trajectory.n_frames)) + + def test_num_frames(self, universe): + assert_equal(universe.trajectory.n_frames, 1) + + def test_dt(self, universe): + with pytest.warns(UserWarning) as record: + assert_equal(universe.trajectory.dt, 1.0) + + assert len(record) == 1 + wmsg = "Reader has no dt information, set to 1.0 ps" + assert str(record[0].message.args[0]) == wmsg + + def test_units(self, universe): + units = { + "time": "ps", + "length": "Angstrom", + "velocity": "Angstrom/ps", + "force": "kcal/(mol*Angstrom)", + } + assert_equal(universe.trajectory.units, units) + + def test_has_velocities(self, universe): + assert_equal(self.has_velocities, universe.trajectory.has_velocities) + + def test_has_forces(self, universe): + assert_equal(self.has_forces, universe.trajectory.has_forces) + + +class TestReadACEBox(_NCRestartTest): + """ACE in water with Coordinates, Forces and Velocities""" + + # Placeholder values + topology = PRMNCRSTbox + filename = NCRSTbox + title = "Cpptraj Generated Restart" + n_atoms = 1398 + n_res = 465 + time = 1.0 + has_velocities = True + has_forces = True + mmap = None + + def test_positions_resid1(self, universe): + ag = universe.select_atoms("resid 1") + expected = np.array( + [ + [15.24987316, 12.57817841, 15.19173145], + [14.92551136, 13.58887959, 14.94400883], + [15.28570271, 14.3409605, 15.64596176], + [13.8408432, 13.63474751, 15.04143524], + [15.29404449, 14.23300934, 13.60826206], + [14.43975544, 14.56124306, 12.85594559], + ], + dtype=np.float32, + ) + assert_almost_equal(expected, ag.positions, self.prec) + + def test_positions_resid42(self, universe): + ag = universe.select_atoms("resid 42") + + expected = np.array( + [ + [21.78003883, 21.15958595, 19.864048], + [20.85104752, 21.08408165, 20.08200645], + [22.18930817, 20.41044617, 20.29708672], + ], + dtype=np.float32, + ) + assert_almost_equal(expected, ag.positions, self.prec) + + +class _NCRSTGenerator(object): + """A basic modular ncrst writer based on :class:`NCDFWriter`""" + + def create_ncrst(self, params): + # Create under context manager + with netcdf_file( + params["filename"], mode="w", version=params["version_byte"] + ) as ncrst: + # Top level attributes + if params["Conventions"]: + setattr(ncrst, "Conventions", params["Conventions"]) + if params["ConventionVersion"]: + setattr( + ncrst, "ConventionVersion", params["ConventionVersion"] + ) + if params["program"]: + setattr(ncrst, "program", params["program"]) + if params["programVersion"]: + setattr(ncrst, "programVersion", params["programVersion"]) + + # Dimensions + if params["n_atoms"]: + ncrst.createDimension("atom", params["n_atoms"]) + if params["spatial"]: + ncrst.createDimension("spatial", params["spatial"]) + if params["time"]: + ncrst.createDimension("time", 1) + + # Variables + if params["time"]: + time = ncrst.createVariable("time", "d", ("time",)) + setattr(time, "units", params["time"]) + time[:] = 1.0 + # Spatial or atom dependent variables + if (params["spatial"]) and (params["n_atoms"]): + if params["coords"]: + coords = ncrst.createVariable( + "coordinates", "f8", ("atom", "spatial") + ) + setattr(coords, "units", params["coords"]) + coords[:] = np.asarray( + range(params["spatial"]), dtype=np.float32 + ) + spatial = ncrst.createVariable("spatial", "c", ("spatial",)) + spatial[:] = np.asarray(list("xyz")[: params["spatial"]]) + velocs = ncrst.createVariable( + "velocities", "f8", ("atom", "spatial") + ) + setattr(velocs, "units", "angstrom/picosecond") + velocs[:] = np.asarray( + range(params["spatial"]), dtype=np.float32 + ) + forces = ncrst.createVariable( + "forces", "f8", ("atom", "spatial") + ) + setattr(forces, "units", "kilocalorie/mole/angstrom") + forces[:] = np.asarray( + range(params["spatial"]), dtype=np.float32 + ) + + # self.scale_factor overrides which variable gets a scale_factor + if params["scale_factor"]: + setattr( + ncrst.variables[params["scale_factor"]], + "scale_factor", + 2.0, + ) + + def gen_params(self, key=None, value=None): + """Generate writer parameters, key and value can be used to overwrite + a given dictionary entry + """ + + params = { + "filename": "test.ncrst", + "version_byte": 2, + "Conventions": "AMBERRESTART", + "ConventionVersion": "1.0", + "program": "mda test_writer", + "programVersion": "V42", + "n_atoms": 1, + "spatial": 3, + "coords": "angstrom", + "time": "picosecond", + "scale_factor": None, + } + + if key: + params[key] = value + + return params + + +class TestNCRSTReaderExceptionsWarnings(_NCRSTGenerator): + + @pytest.mark.parametrize( + "key,value", + (("version_byte", 1), ("Conventions", "Foo"), ("spatial", 2)), + ) + def test_read_type_errors(self, tmpdir, key, value): + params = self.gen_params(key=key, value=value) + with tmpdir.as_cwd(): + self.create_ncrst(params) + with pytest.raises(TypeError): + NCRSTReader(params["filename"]) + + @pytest.mark.parametrize( + "key,value", (("Conventions", None), ("ConventionVersion", None)) + ) + def test_attribute_errors(self, tmpdir, key, value): + params = self.gen_params(key=key, value=value) + with tmpdir.as_cwd(): + self.create_ncrst(params) + with pytest.raises(ValueError): + NCRSTReader(params["filename"]) + + @pytest.mark.parametrize( + "key,value", (("spatial", None), ("n_atoms", None), ("coords", None)) + ) + def test_key_errors(self, tmpdir, key, value): + params = self.gen_params(key=key, value=value) + with tmpdir.as_cwd(): + self.create_ncrst(params) + with pytest.raises(ValueError): + NCRSTReader(params["filename"]) + + @pytest.mark.parametrize( + "key,value", (("time", "femtosecond"), ("coords", "nanometer")) + ) + def test_not_implemented_errors(self, tmpdir, key, value): + params = self.gen_params(key=key, value=value) + with tmpdir.as_cwd(): + self.create_ncrst(params) + with pytest.raises(NotImplementedError): + NCRSTReader(params["filename"]) + + def test_conventionversion_warn(self, tmpdir): + params = self.gen_params(key="ConventionVersion", value="2.0") + with tmpdir.as_cwd(): + self.create_ncrst(params) + with pytest.warns(UserWarning) as record: + NCRSTReader(params["filename"]) + + assert len(record) == 1 + wmsg = "NCDF restart file format is 2.0 but the reader implements format 1.0" + assert str(record[0].message.args[0]) == wmsg + + @pytest.mark.parametrize( + "key,value", (("program", None), ("programVersion", None)) + ) + def test_program_warn(self, tmpdir, key, value): + params = self.gen_params(key=key, value=value) + with tmpdir.as_cwd(): + self.create_ncrst(params) + with pytest.warns(UserWarning) as record: + NCRSTReader(params["filename"]) + + assert len(record) == 1 + wmsg = ( + "The NCDF restart file {0} may not fully adhere to AMBER " + "standards as either the `program` or `programVersion` " + "attributes are missing".format(params["filename"]) + ) + assert str(record[0].message.args[0]) == wmsg + + def test_notime_warn(self, tmpdir): + params = self.gen_params(key="time", value=None) + with tmpdir.as_cwd(): + self.create_ncrst(params) + with pytest.warns(UserWarning) as record: + NCRSTReader(params["filename"]) + + # Lack of time triggers two warnings + assert len(record) == 1 + wmsg1 = ( + "NCDF restart file {0} does not contain `time` information;" + " `time` will be set as an increasing index".format( + params["filename"] + ) + ) + assert str(record[0].message.args[0]) == wmsg1 + # wmsg2 = ("Reader has no dt information, set to 1.0 ps") + # assert str(record[1].message.args[0]) == wmsg2 diff --git a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py index 1ebad11665c..5c099621ec8 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py +++ b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py @@ -23,7 +23,6 @@ import MDAnalysis as mda import numpy as np import sys - from scipy.io import netcdf_file import pytest @@ -41,6 +40,8 @@ DLP_CONFIG, CPPTRAJ_TRAJ_TOP, CPPTRAJ_TRAJ, + PRMNCRST, + TRJNCRST, ) from MDAnalysisTests.coordinates.test_trj import _TRJReaderTest from MDAnalysisTests.coordinates.reference import RefVGV, RefTZ2 @@ -385,10 +386,76 @@ def test_warn_user_no_time_information(self, u): "NCDF trajectory does not contain `time` information;" " `time` will be set as an increasing index" ) - with pytest.warns(UserWarning, match=wmsg): + with pytest.warns(UserWarning, match=wmsg[0]): u2 = mda.Universe(CPPTRAJ_TRAJ_TOP, CPPTRAJ_TRAJ) +class TestNCDFReader5(object): + """NCRST Restart File with positions and forces, exported by CPPTRAJ. + + Contributed by Jeremy M. G. Leung + """ + + prec = 6 + + @pytest.fixture(scope="class") + def u(self): + return mda.Universe(PRMNCRST, TRJNCRST) + + def test_positions(self, u): + """Check positions on first frame""" + u.trajectory[0] + ref_1 = np.array( + [ + [-1.1455358, -2.0177484, -0.55771565], + [-0.19042611, -2.2511053, -1.0282656], + [0.53238064, -1.5778863, -0.56737846], + ], + dtype=np.float64, + ) + assert_almost_equal(ref_1, u.atoms.positions[:3], self.prec) + + def test_velocities(self, u): + """Check forces on first frame""" + u.trajectory[0] + ref_1 = np.array( + [ + [11.86471367, 31.22108269, -4.03538418], + [7.36676359, -4.68035316, 1.78124952], + [12.86675262, 1.39324546, -14.97190762], + ], + dtype=np.float64, + ) + assert_almost_equal(ref_1, u.atoms.velocities[:3], self.prec) + + def test_forces(self, u): + """Check forces on first frame""" + u.trajectory[0] + ref_1 = np.array( + [ + [-2.32462358, -0.0899322, -5.9270463], + [-7.7518754, 8.95741653, -2.97663188], + [5.76228237, -4.73379087, -4.0858593], + ], + dtype=np.float64, + ) + assert_almost_equal(ref_1, u.atoms.forces[:3], self.prec) + + def test_time(self, u): + """Check time on first frame""" + ref = 5.0 + assert_almost_equal(ref, u.trajectory[0].time, self.prec) + + def test_dt(self, u): + """Default 1.0 fs""" + ref = 1.0 + assert_almost_equal(ref, u.trajectory.dt, self.prec) + assert_almost_equal(ref, u.trajectory.ts.dt, self.prec) + + def test_box(self, u): + assert u.trajectory[0].dimensions is None + + class _NCDFGenerator(object): """A class for generating abitrary ncdf files and exhaustively test edge cases which might not be found in the wild""" @@ -727,7 +794,7 @@ def test_program_warn(self, tmpdir, mutation): assert len(record) == 1 wmsg = ( - "NCDF trajectory test.nc may not fully adhere to AMBER " + "The NCDF trajectory test.nc may not fully adhere to AMBER " "standards as either the `program` or `programVersion` " "attributes are missing" ) @@ -785,21 +852,21 @@ def _test_write_trajectory(self, universe, outfile): # which should be "float32". # See http://docs.scipy.org/doc/numpy-1.10.0/reference/arrays.dtypes.html # and https://github.com/MDAnalysis/mdanalysis/pull/503 - dataset = netcdf_file(outfile, "r") - coords = dataset.variables["coordinates"] - time = dataset.variables["time"] - assert_equal( - coords[:].dtype.name, - np.dtype(np.float32).name, - err_msg="ncdf coord output not float32 " - "but {}".format(coords[:].dtype), - ) - assert_equal( - time[:].dtype.name, - np.dtype(np.float32).name, - err_msg="ncdf time output not float32 " - "but {}".format(time[:].dtype), - ) + with netcdf_file(outfile, "r") as dataset: + coords = dataset.variables["coordinates"] + time = dataset.variables["time"] + assert_equal( + coords[:].dtype.name, + np.dtype(np.float32).name, + err_msg="ncdf coord output not float32 " + "but {}".format(coords[:].dtype), + ) + assert_equal( + time[:].dtype.name, + np.dtype(np.float32).name, + err_msg="ncdf time output not float32 " + "but {}".format(time[:].dtype), + ) def test_write_trajectory_netCDF4(self, universe, outfile): pytest.importorskip("netCDF4") diff --git a/testsuite/MDAnalysisTests/data/Amber/ace_mbondi3.ncrst b/testsuite/MDAnalysisTests/data/Amber/ace_mbondi3.ncrst new file mode 100644 index 00000000000..9639b16fb28 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/Amber/ace_mbondi3.ncrst differ diff --git a/testsuite/MDAnalysisTests/data/Amber/ace_tip3p.ncrst b/testsuite/MDAnalysisTests/data/Amber/ace_tip3p.ncrst new file mode 100644 index 00000000000..48ccf477f5d Binary files /dev/null and b/testsuite/MDAnalysisTests/data/Amber/ace_tip3p.ncrst differ diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py index 8c56de9c25d..c7de6ed5955 100644 --- a/testsuite/MDAnalysisTests/datafiles.py +++ b/testsuite/MDAnalysisTests/datafiles.py @@ -192,7 +192,10 @@ "CPPTRAJ_TRAJ_TOP", "CPPTRAJ_TRAJ", # Amber ncdf extracted from CPPTRAJ without time variable "PRMcs", # Amber (format, Issue 1331) - "PRMNCRST", # Amber ncrst with positions/forces/velocities + "PRMNCRST", # Amber topology for ncrst with positions/forces/velocities + "TRJNCRST", # Amber ncrst with positions/forces/velocties + "PRMNCRSTbox", + "NCRSTbox", "PRM_NCBOX", "TRJ_NCBOX", # Amber parm7 + nc w/ pos/forces/vels/box "PRMNEGATIVE", # Amber negative ATOMIC_NUMBER (Issue 2306) @@ -668,6 +671,9 @@ PRMcs = (_data_ref / "Amber/chitosan.prmtop").as_posix() PRMNCRST = (_data_ref / "Amber/ace_mbondi3.parm7").as_posix() +TRJNCRST = (_data_ref / "Amber/ace_mbondi3.ncrst").as_posix() +PRMNCRSTbox = (_data_ref / "Amber/ace_tip3p.parm7").as_posix() +NCRSTbox = (_data_ref / "Amber/ace_tip3p.ncrst").as_posix() PRM_NCBOX = (_data_ref / "Amber/ace_tip3p.parm7").as_posix() TRJ_NCBOX = (_data_ref / "Amber/ace_tip3p.nc").as_posix()