Coverage for /builds/ase/ase/ase/io/lammpsrun.py : 87.44%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import gzip
2import struct
3from collections import deque
4from os.path import splitext
6import numpy as np
8from ase.atoms import Atoms
9from ase.calculators.lammps import convert
10from ase.calculators.singlepoint import SinglePointCalculator
11from ase.parallel import paropen
12from ase.quaternions import Quaternions
15def read_lammps_dump(infileobj, **kwargs):
16 """Method which reads a LAMMPS dump file.
18 LAMMPS chooses output method depending on the given suffix:
19 - .bin : binary file
20 - .gz : output piped through gzip
21 - .mpiio: using mpiio (should be like cleartext,
22 with different ordering)
23 - else : normal clear-text format
25 :param infileobj: string to file, opened file or file-like stream
27 """
28 # !TODO: add support for lammps-regex naming schemes (output per
29 # processor and timestep wildcards)
31 opened = False
32 if isinstance(infileobj, str):
33 opened = True
34 suffix = splitext(infileobj)[-1]
35 if suffix == ".bin":
36 fileobj = paropen(infileobj, "rb")
37 elif suffix == ".gz":
38 # !TODO: save for parallel execution?
39 fileobj = gzip.open(infileobj, "rb")
40 else:
41 fileobj = paropen(infileobj)
42 else:
43 suffix = splitext(infileobj.name)[-1]
44 fileobj = infileobj
46 if suffix == ".bin":
47 out = read_lammps_dump_binary(fileobj, **kwargs)
48 if opened:
49 fileobj.close()
50 return out
52 out = read_lammps_dump_text(fileobj, **kwargs)
54 if opened:
55 fileobj.close()
57 return out
60def lammps_data_to_ase_atoms(
61 data,
62 colnames,
63 cell,
64 celldisp,
65 pbc=False,
66 atomsobj=Atoms,
67 order=True,
68 specorder=None,
69 prismobj=None,
70 units="metal",
71):
72 """Extract positions and other per-atom parameters and create Atoms
74 :param data: per atom data
75 :param colnames: index for data
76 :param cell: cell dimensions
77 :param celldisp: origin shift
78 :param pbc: periodic boundaries
79 :param atomsobj: function to create ase-Atoms object
80 :param order: sort atoms by id. Might be faster to turn off.
81 Disregarded in case `id` column is not given in file.
82 :param specorder: list of species to map lammps types to ase-species
83 (usually .dump files to not contain type to species mapping)
84 :param prismobj: Coordinate transformation between lammps and ase
85 :type prismobj: Prism
86 :param units: lammps units for unit transformation between lammps and ase
87 :returns: Atoms object
88 :rtype: Atoms
90 """
92 # read IDs if given and order if needed
93 if "id" in colnames:
94 ids = data[:, colnames.index("id")].astype(int)
95 if order:
96 sort_order = np.argsort(ids)
97 data = data[sort_order, :]
99 # determine the elements
100 if "element" in colnames:
101 # priority to elements written in file
102 elements = data[:, colnames.index("element")]
103 elif "type" in colnames:
104 # fall back to `types` otherwise
105 elements = data[:, colnames.index("type")].astype(int)
107 # reconstruct types from given specorder
108 if specorder:
109 elements = [specorder[t - 1] for t in elements]
110 else:
111 # todo: what if specorder give but no types?
112 # in principle the masses could work for atoms, but that needs
113 # lots of cases and new code I guess
114 raise ValueError("Cannot determine atom types form LAMMPS dump file")
116 def get_quantity(labels, quantity=None):
117 try:
118 cols = [colnames.index(label) for label in labels]
119 if quantity:
120 return convert(data[:, cols].astype(float), quantity,
121 units, "ASE")
123 return data[:, cols].astype(float)
124 except ValueError:
125 return None
127 # Positions
128 positions = None
129 scaled_positions = None
130 if "x" in colnames:
131 # doc: x, y, z = unscaled atom coordinates
132 positions = get_quantity(["x", "y", "z"], "distance")
133 elif "xs" in colnames:
134 # doc: xs,ys,zs = scaled atom coordinates
135 scaled_positions = get_quantity(["xs", "ys", "zs"])
136 elif "xu" in colnames:
137 # doc: xu,yu,zu = unwrapped atom coordinates
138 positions = get_quantity(["xu", "yu", "zu"], "distance")
139 elif "xsu" in colnames:
140 # xsu,ysu,zsu = scaled unwrapped atom coordinates
141 scaled_positions = get_quantity(["xsu", "ysu", "zsu"])
142 else:
143 raise ValueError("No atomic positions found in LAMMPS output")
145 velocities = get_quantity(["vx", "vy", "vz"], "velocity")
146 charges = get_quantity(["q"], "charge")
147 forces = get_quantity(["fx", "fy", "fz"], "force")
148 # !TODO: how need quaternions be converted?
149 quaternions = get_quantity(["c_q[1]", "c_q[2]", "c_q[3]", "c_q[4]"])
151 # convert cell
152 cell = convert(cell, "distance", units, "ASE")
153 celldisp = convert(celldisp, "distance", units, "ASE")
154 if prismobj:
155 celldisp = prismobj.vector_to_ase(celldisp)
156 cell = prismobj.update_cell(cell)
158 if quaternions:
159 out_atoms = Quaternions(
160 symbols=elements,
161 positions=positions,
162 cell=cell,
163 celldisp=celldisp,
164 pbc=pbc,
165 quaternions=quaternions,
166 )
167 elif positions is not None:
168 # reverse coordinations transform to lammps system
169 # (for all vectors = pos, vel, force)
170 if prismobj:
171 positions = prismobj.vector_to_ase(positions, wrap=True)
173 out_atoms = atomsobj(
174 symbols=elements,
175 positions=positions,
176 pbc=pbc,
177 celldisp=celldisp,
178 cell=cell
179 )
180 elif scaled_positions is not None:
181 out_atoms = atomsobj(
182 symbols=elements,
183 scaled_positions=scaled_positions,
184 pbc=pbc,
185 celldisp=celldisp,
186 cell=cell,
187 )
189 if velocities is not None:
190 if prismobj:
191 velocities = prismobj.vector_to_ase(velocities)
192 out_atoms.set_velocities(velocities)
193 if charges is not None:
194 out_atoms.set_initial_charges(charges)
195 if forces is not None:
196 if prismobj:
197 forces = prismobj.vector_to_ase(forces)
198 # !TODO: use another calculator if available (or move forces
199 # to atoms.property) (other problem: synchronizing
200 # parallel runs)
201 calculator = SinglePointCalculator(out_atoms, energy=0.0,
202 forces=forces)
203 out_atoms.calc = calculator
205 # process the extra columns of fixes, variables and computes
206 # that can be dumped, add as additional arrays to atoms object
207 for colname in colnames:
208 # determine if it is a compute or fix (but not the quaternian)
209 if (colname.startswith('f_') or colname.startswith('v_') or
210 (colname.startswith('c_') and not colname.startswith('c_q['))):
211 out_atoms.new_array(colname, get_quantity([colname]),
212 dtype='float')
214 return out_atoms
217def construct_cell(diagdisp, offdiag):
218 """Help function to create an ASE-cell with displacement vector from
219 the lammps coordination system parameters.
221 :param diagdisp: cell dimension convoluted with the displacement vector
222 :param offdiag: off-diagonal cell elements
223 :returns: cell and cell displacement vector
224 :rtype: tuple
225 """
226 xlo, xhi, ylo, yhi, zlo, zhi = diagdisp
227 xy, xz, yz = offdiag
229 # create ase-cell from lammps-box
230 xhilo = (xhi - xlo) - abs(xy) - abs(xz)
231 yhilo = (yhi - ylo) - abs(yz)
232 zhilo = zhi - zlo
233 celldispx = xlo - min(0, xy) - min(0, xz)
234 celldispy = ylo - min(0, yz)
235 celldispz = zlo
236 cell = np.array([[xhilo, 0, 0], [xy, yhilo, 0], [xz, yz, zhilo]])
237 celldisp = np.array([celldispx, celldispy, celldispz])
239 return cell, celldisp
242def get_max_index(index):
243 if np.isscalar(index):
244 return index
245 elif isinstance(index, slice):
246 return index.stop if (index.stop is not None) else float("inf")
249def read_lammps_dump_text(fileobj, index=-1, **kwargs):
250 """Process cleartext lammps dumpfiles
252 :param fileobj: filestream providing the trajectory data
253 :param index: integer or slice object (default: get the last timestep)
254 :returns: list of Atoms objects
255 :rtype: list
256 """
257 # Load all dumped timesteps into memory simultaneously
258 lines = deque(fileobj.readlines())
259 index_end = get_max_index(index)
261 n_atoms = 0
262 images = []
264 # avoid references before assignment in case of incorrect file structure
265 cell, celldisp, pbc = None, None, False
267 while len(lines) > n_atoms:
268 line = lines.popleft()
270 if "ITEM: TIMESTEP" in line:
271 n_atoms = 0
272 line = lines.popleft()
273 # !TODO: pyflakes complains about this line -> do something
274 # ntimestep = int(line.split()[0]) # NOQA
276 if "ITEM: NUMBER OF ATOMS" in line:
277 line = lines.popleft()
278 n_atoms = int(line.split()[0])
280 if "ITEM: BOX BOUNDS" in line:
281 # save labels behind "ITEM: BOX BOUNDS" in triclinic case
282 # (>=lammps-7Jul09)
283 tilt_items = line.split()[3:]
284 celldatarows = [lines.popleft() for _ in range(3)]
285 celldata = np.loadtxt(celldatarows)
286 diagdisp = celldata[:, :2].reshape(6, 1).flatten()
288 # determine cell tilt (triclinic case!)
289 if len(celldata[0]) > 2:
290 # for >=lammps-7Jul09 use labels behind "ITEM: BOX BOUNDS"
291 # to assign tilt (vector) elements ...
292 offdiag = celldata[:, 2]
293 # ... otherwise assume default order in 3rd column
294 # (if the latter was present)
295 if len(tilt_items) >= 3:
296 sort_index = [tilt_items.index(i)
297 for i in ["xy", "xz", "yz"]]
298 offdiag = offdiag[sort_index]
299 else:
300 offdiag = (0.0,) * 3
302 cell, celldisp = construct_cell(diagdisp, offdiag)
304 # Handle pbc conditions
305 if len(tilt_items) == 3:
306 pbc_items = tilt_items
307 elif len(tilt_items) > 3:
308 pbc_items = tilt_items[3:6]
309 else:
310 pbc_items = ["f", "f", "f"]
311 pbc = ["p" in d.lower() for d in pbc_items]
313 if "ITEM: ATOMS" in line:
314 colnames = line.split()[2:]
315 datarows = [lines.popleft() for _ in range(n_atoms)]
316 data = np.loadtxt(datarows, dtype=str)
317 out_atoms = lammps_data_to_ase_atoms(
318 data=data,
319 colnames=colnames,
320 cell=cell,
321 celldisp=celldisp,
322 atomsobj=Atoms,
323 pbc=pbc,
324 **kwargs
325 )
326 images.append(out_atoms)
328 if len(images) > index_end >= 0:
329 break
331 return images[index]
334def read_lammps_dump_binary(
335 fileobj, index=-1, colnames=None, intformat="SMALLBIG", **kwargs
336):
337 """Read binary dump-files (after binary2txt.cpp from lammps/tools)
339 :param fileobj: file-stream containing the binary lammps data
340 :param index: integer or slice object (default: get the last timestep)
341 :param colnames: data is columns and identified by a header
342 :param intformat: lammps support different integer size. Parameter set \
343 at compile-time and can unfortunately not derived from data file
344 :returns: list of Atoms-objects
345 :rtype: list
346 """
347 # depending on the chosen compilation flag lammps uses either normal
348 # integers or long long for its id or timestep numbering
349 # !TODO: tags are cast to double -> missing/double ids (add check?)
350 tagformat, bigformat = dict(
351 SMALLSMALL=("i", "i"), SMALLBIG=("i", "q"), BIGBIG=("q", "q")
352 )[intformat]
354 index_end = get_max_index(index)
356 # Standard columns layout from lammpsrun
357 if not colnames:
358 colnames = ["id", "type", "x", "y", "z",
359 "vx", "vy", "vz", "fx", "fy", "fz"]
361 images = []
363 # wrap struct.unpack to raise EOFError
364 def read_variables(string):
365 obj_len = struct.calcsize(string)
366 data_obj = fileobj.read(obj_len)
367 if obj_len != len(data_obj):
368 raise EOFError
369 return struct.unpack(string, data_obj)
371 while True:
372 try:
373 # Assume that the binary dump file is in the old (pre-29Oct2020)
374 # format
375 magic_string = None
377 # read header
378 ntimestep, = read_variables("=" + bigformat)
380 # In the new LAMMPS binary dump format (version 29Oct2020 and
381 # onward), a negative timestep is used to indicate that the next
382 # few bytes will contain certain metadata
383 if ntimestep < 0:
384 # First bigint was actually encoding the negative of the format
385 # name string length (we call this 'magic_string' to
386 magic_string_len = -ntimestep
388 # The next `magic_string_len` bytes will hold a string
389 # indicating the format of the dump file
390 magic_string = b''.join(read_variables(
391 "=" + str(magic_string_len) + "c"))
393 # Read endianness (integer). For now, we'll disregard the value
394 # and simply use the host machine's endianness (via '='
395 # character used with struct.calcsize).
396 #
397 # TODO: Use the endianness of the dump file in subsequent
398 # read_variables rather than just assuming it will match
399 # that of the host
400 endian, = read_variables("=i")
402 # Read revision number (integer)
403 revision, = read_variables("=i")
405 # Finally, read the actual timestep (bigint)
406 ntimestep, = read_variables("=" + bigformat)
408 n_atoms, triclinic = read_variables("=" + bigformat + "i")
409 boundary = read_variables("=6i")
410 diagdisp = read_variables("=6d")
411 if triclinic != 0:
412 offdiag = read_variables("=3d")
413 else:
414 offdiag = (0.0,) * 3
415 size_one, = read_variables("=i")
417 if len(colnames) != size_one:
418 raise ValueError("Provided columns do not match binary file")
420 if magic_string and revision > 1:
421 # New binary dump format includes units string,
422 # columns string, and time
423 units_str_len, = read_variables("=i")
425 if units_str_len > 0:
426 # Read lammps units style
427 _ = b''.join(
428 read_variables("=" + str(units_str_len) + "c"))
430 flag, = read_variables("=c")
431 if flag != b'\x00':
432 # Flag was non-empty string
433 time, = read_variables("=d")
435 # Length of column string
436 columns_str_len, = read_variables("=i")
438 # Read column string (e.g., "id type x y z vx vy vz fx fy fz")
439 _ = b''.join(read_variables("=" + str(columns_str_len) + "c"))
441 nchunk, = read_variables("=i")
443 # lammps cells/boxes can have different boundary conditions on each
444 # sides (makes mainly sense for different non-periodic conditions
445 # (e.g. [f]ixed and [s]hrink for a irradiation simulation))
446 # periodic case: b 0 = 'p'
447 # non-peridic cases 1: 'f', 2 : 's', 3: 'm'
448 pbc = np.sum(np.array(boundary).reshape((3, 2)), axis=1) == 0
450 cell, celldisp = construct_cell(diagdisp, offdiag)
452 data = []
453 for _ in range(nchunk):
454 # number-of-data-entries
455 n_data, = read_variables("=i")
456 # retrieve per atom data
457 data += read_variables("=" + str(n_data) + "d")
458 data = np.array(data).reshape((-1, size_one))
460 # map data-chunk to ase atoms
461 out_atoms = lammps_data_to_ase_atoms(
462 data=data,
463 colnames=colnames,
464 cell=cell,
465 celldisp=celldisp,
466 pbc=pbc,
467 **kwargs
468 )
470 images.append(out_atoms)
472 # stop if requested index has been found
473 if len(images) > index_end >= 0:
474 break
476 except EOFError:
477 break
479 return images[index]