Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import gzip 

2import struct 

3from collections import deque 

4from os.path import splitext 

5 

6import numpy as np 

7 

8from ase.atoms import Atoms 

9from ase.calculators.lammps import convert 

10from ase.calculators.singlepoint import SinglePointCalculator 

11from ase.parallel import paropen 

12from ase.quaternions import Quaternions 

13 

14 

15def read_lammps_dump(infileobj, **kwargs): 

16 """Method which reads a LAMMPS dump file. 

17 

18 LAMMPS chooses output method depending on the given suffix: 

19 - .bin : binary file 

20 - .gz : output piped through gzip 

21 - .mpiio: using mpiio (should be like cleartext, 

22 with different ordering) 

23 - else : normal clear-text format 

24 

25 :param infileobj: string to file, opened file or file-like stream 

26 

27 """ 

28 # !TODO: add support for lammps-regex naming schemes (output per 

29 # processor and timestep wildcards) 

30 

31 opened = False 

32 if isinstance(infileobj, str): 

33 opened = True 

34 suffix = splitext(infileobj)[-1] 

35 if suffix == ".bin": 

36 fileobj = paropen(infileobj, "rb") 

37 elif suffix == ".gz": 

38 # !TODO: save for parallel execution? 

39 fileobj = gzip.open(infileobj, "rb") 

40 else: 

41 fileobj = paropen(infileobj) 

42 else: 

43 suffix = splitext(infileobj.name)[-1] 

44 fileobj = infileobj 

45 

46 if suffix == ".bin": 

47 out = read_lammps_dump_binary(fileobj, **kwargs) 

48 if opened: 

49 fileobj.close() 

50 return out 

51 

52 out = read_lammps_dump_text(fileobj, **kwargs) 

53 

54 if opened: 

55 fileobj.close() 

56 

57 return out 

58 

59 

60def lammps_data_to_ase_atoms( 

61 data, 

62 colnames, 

63 cell, 

64 celldisp, 

65 pbc=False, 

66 atomsobj=Atoms, 

67 order=True, 

68 specorder=None, 

69 prismobj=None, 

70 units="metal", 

71): 

72 """Extract positions and other per-atom parameters and create Atoms 

73 

74 :param data: per atom data 

75 :param colnames: index for data 

76 :param cell: cell dimensions 

77 :param celldisp: origin shift 

78 :param pbc: periodic boundaries 

79 :param atomsobj: function to create ase-Atoms object 

80 :param order: sort atoms by id. Might be faster to turn off. 

81 Disregarded in case `id` column is not given in file. 

82 :param specorder: list of species to map lammps types to ase-species 

83 (usually .dump files to not contain type to species mapping) 

84 :param prismobj: Coordinate transformation between lammps and ase 

85 :type prismobj: Prism 

86 :param units: lammps units for unit transformation between lammps and ase 

87 :returns: Atoms object 

88 :rtype: Atoms 

89 

90 """ 

91 

92 # read IDs if given and order if needed 

93 if "id" in colnames: 

94 ids = data[:, colnames.index("id")].astype(int) 

95 if order: 

96 sort_order = np.argsort(ids) 

97 data = data[sort_order, :] 

98 

99 # determine the elements 

100 if "element" in colnames: 

101 # priority to elements written in file 

102 elements = data[:, colnames.index("element")] 

103 elif "type" in colnames: 

104 # fall back to `types` otherwise 

105 elements = data[:, colnames.index("type")].astype(int) 

106 

107 # reconstruct types from given specorder 

108 if specorder: 

109 elements = [specorder[t - 1] for t in elements] 

110 else: 

111 # todo: what if specorder give but no types? 

112 # in principle the masses could work for atoms, but that needs 

113 # lots of cases and new code I guess 

114 raise ValueError("Cannot determine atom types form LAMMPS dump file") 

115 

116 def get_quantity(labels, quantity=None): 

117 try: 

118 cols = [colnames.index(label) for label in labels] 

119 if quantity: 

120 return convert(data[:, cols].astype(float), quantity, 

121 units, "ASE") 

122 

123 return data[:, cols].astype(float) 

124 except ValueError: 

125 return None 

126 

127 # Positions 

128 positions = None 

129 scaled_positions = None 

130 if "x" in colnames: 

131 # doc: x, y, z = unscaled atom coordinates 

132 positions = get_quantity(["x", "y", "z"], "distance") 

133 elif "xs" in colnames: 

134 # doc: xs,ys,zs = scaled atom coordinates 

135 scaled_positions = get_quantity(["xs", "ys", "zs"]) 

136 elif "xu" in colnames: 

137 # doc: xu,yu,zu = unwrapped atom coordinates 

138 positions = get_quantity(["xu", "yu", "zu"], "distance") 

139 elif "xsu" in colnames: 

140 # xsu,ysu,zsu = scaled unwrapped atom coordinates 

141 scaled_positions = get_quantity(["xsu", "ysu", "zsu"]) 

142 else: 

143 raise ValueError("No atomic positions found in LAMMPS output") 

144 

145 velocities = get_quantity(["vx", "vy", "vz"], "velocity") 

146 charges = get_quantity(["q"], "charge") 

147 forces = get_quantity(["fx", "fy", "fz"], "force") 

148 # !TODO: how need quaternions be converted? 

149 quaternions = get_quantity(["c_q[1]", "c_q[2]", "c_q[3]", "c_q[4]"]) 

150 

151 # convert cell 

152 cell = convert(cell, "distance", units, "ASE") 

153 celldisp = convert(celldisp, "distance", units, "ASE") 

154 if prismobj: 

155 celldisp = prismobj.vector_to_ase(celldisp) 

156 cell = prismobj.update_cell(cell) 

157 

158 if quaternions: 

159 out_atoms = Quaternions( 

160 symbols=elements, 

161 positions=positions, 

162 cell=cell, 

163 celldisp=celldisp, 

164 pbc=pbc, 

165 quaternions=quaternions, 

166 ) 

167 elif positions is not None: 

168 # reverse coordinations transform to lammps system 

169 # (for all vectors = pos, vel, force) 

170 if prismobj: 

171 positions = prismobj.vector_to_ase(positions, wrap=True) 

172 

173 out_atoms = atomsobj( 

174 symbols=elements, 

175 positions=positions, 

176 pbc=pbc, 

177 celldisp=celldisp, 

178 cell=cell 

179 ) 

180 elif scaled_positions is not None: 

181 out_atoms = atomsobj( 

182 symbols=elements, 

183 scaled_positions=scaled_positions, 

184 pbc=pbc, 

185 celldisp=celldisp, 

186 cell=cell, 

187 ) 

188 

189 if velocities is not None: 

190 if prismobj: 

191 velocities = prismobj.vector_to_ase(velocities) 

192 out_atoms.set_velocities(velocities) 

193 if charges is not None: 

194 out_atoms.set_initial_charges(charges) 

195 if forces is not None: 

196 if prismobj: 

197 forces = prismobj.vector_to_ase(forces) 

198 # !TODO: use another calculator if available (or move forces 

199 # to atoms.property) (other problem: synchronizing 

200 # parallel runs) 

201 calculator = SinglePointCalculator(out_atoms, energy=0.0, 

202 forces=forces) 

203 out_atoms.calc = calculator 

204 

205 # process the extra columns of fixes, variables and computes 

206 # that can be dumped, add as additional arrays to atoms object 

207 for colname in colnames: 

208 # determine if it is a compute or fix (but not the quaternian) 

209 if (colname.startswith('f_') or colname.startswith('v_') or 

210 (colname.startswith('c_') and not colname.startswith('c_q['))): 

211 out_atoms.new_array(colname, get_quantity([colname]), 

212 dtype='float') 

213 

214 return out_atoms 

215 

216 

217def construct_cell(diagdisp, offdiag): 

218 """Help function to create an ASE-cell with displacement vector from 

219 the lammps coordination system parameters. 

220 

221 :param diagdisp: cell dimension convoluted with the displacement vector 

222 :param offdiag: off-diagonal cell elements 

223 :returns: cell and cell displacement vector 

224 :rtype: tuple 

225 """ 

226 xlo, xhi, ylo, yhi, zlo, zhi = diagdisp 

227 xy, xz, yz = offdiag 

228 

229 # create ase-cell from lammps-box 

230 xhilo = (xhi - xlo) - abs(xy) - abs(xz) 

231 yhilo = (yhi - ylo) - abs(yz) 

232 zhilo = zhi - zlo 

233 celldispx = xlo - min(0, xy) - min(0, xz) 

234 celldispy = ylo - min(0, yz) 

235 celldispz = zlo 

236 cell = np.array([[xhilo, 0, 0], [xy, yhilo, 0], [xz, yz, zhilo]]) 

237 celldisp = np.array([celldispx, celldispy, celldispz]) 

238 

239 return cell, celldisp 

240 

241 

242def get_max_index(index): 

243 if np.isscalar(index): 

244 return index 

245 elif isinstance(index, slice): 

246 return index.stop if (index.stop is not None) else float("inf") 

247 

248 

249def read_lammps_dump_text(fileobj, index=-1, **kwargs): 

250 """Process cleartext lammps dumpfiles 

251 

252 :param fileobj: filestream providing the trajectory data 

253 :param index: integer or slice object (default: get the last timestep) 

254 :returns: list of Atoms objects 

255 :rtype: list 

256 """ 

257 # Load all dumped timesteps into memory simultaneously 

258 lines = deque(fileobj.readlines()) 

259 index_end = get_max_index(index) 

260 

261 n_atoms = 0 

262 images = [] 

263 

264 # avoid references before assignment in case of incorrect file structure 

265 cell, celldisp, pbc = None, None, False 

266 

267 while len(lines) > n_atoms: 

268 line = lines.popleft() 

269 

270 if "ITEM: TIMESTEP" in line: 

271 n_atoms = 0 

272 line = lines.popleft() 

273 # !TODO: pyflakes complains about this line -> do something 

274 # ntimestep = int(line.split()[0]) # NOQA 

275 

276 if "ITEM: NUMBER OF ATOMS" in line: 

277 line = lines.popleft() 

278 n_atoms = int(line.split()[0]) 

279 

280 if "ITEM: BOX BOUNDS" in line: 

281 # save labels behind "ITEM: BOX BOUNDS" in triclinic case 

282 # (>=lammps-7Jul09) 

283 tilt_items = line.split()[3:] 

284 celldatarows = [lines.popleft() for _ in range(3)] 

285 celldata = np.loadtxt(celldatarows) 

286 diagdisp = celldata[:, :2].reshape(6, 1).flatten() 

287 

288 # determine cell tilt (triclinic case!) 

289 if len(celldata[0]) > 2: 

290 # for >=lammps-7Jul09 use labels behind "ITEM: BOX BOUNDS" 

291 # to assign tilt (vector) elements ... 

292 offdiag = celldata[:, 2] 

293 # ... otherwise assume default order in 3rd column 

294 # (if the latter was present) 

295 if len(tilt_items) >= 3: 

296 sort_index = [tilt_items.index(i) 

297 for i in ["xy", "xz", "yz"]] 

298 offdiag = offdiag[sort_index] 

299 else: 

300 offdiag = (0.0,) * 3 

301 

302 cell, celldisp = construct_cell(diagdisp, offdiag) 

303 

304 # Handle pbc conditions 

305 if len(tilt_items) == 3: 

306 pbc_items = tilt_items 

307 elif len(tilt_items) > 3: 

308 pbc_items = tilt_items[3:6] 

309 else: 

310 pbc_items = ["f", "f", "f"] 

311 pbc = ["p" in d.lower() for d in pbc_items] 

312 

313 if "ITEM: ATOMS" in line: 

314 colnames = line.split()[2:] 

315 datarows = [lines.popleft() for _ in range(n_atoms)] 

316 data = np.loadtxt(datarows, dtype=str) 

317 out_atoms = lammps_data_to_ase_atoms( 

318 data=data, 

319 colnames=colnames, 

320 cell=cell, 

321 celldisp=celldisp, 

322 atomsobj=Atoms, 

323 pbc=pbc, 

324 **kwargs 

325 ) 

326 images.append(out_atoms) 

327 

328 if len(images) > index_end >= 0: 

329 break 

330 

331 return images[index] 

332 

333 

334def read_lammps_dump_binary( 

335 fileobj, index=-1, colnames=None, intformat="SMALLBIG", **kwargs 

336): 

337 """Read binary dump-files (after binary2txt.cpp from lammps/tools) 

338 

339 :param fileobj: file-stream containing the binary lammps data 

340 :param index: integer or slice object (default: get the last timestep) 

341 :param colnames: data is columns and identified by a header 

342 :param intformat: lammps support different integer size. Parameter set \ 

343 at compile-time and can unfortunately not derived from data file 

344 :returns: list of Atoms-objects 

345 :rtype: list 

346 """ 

347 # depending on the chosen compilation flag lammps uses either normal 

348 # integers or long long for its id or timestep numbering 

349 # !TODO: tags are cast to double -> missing/double ids (add check?) 

350 tagformat, bigformat = dict( 

351 SMALLSMALL=("i", "i"), SMALLBIG=("i", "q"), BIGBIG=("q", "q") 

352 )[intformat] 

353 

354 index_end = get_max_index(index) 

355 

356 # Standard columns layout from lammpsrun 

357 if not colnames: 

358 colnames = ["id", "type", "x", "y", "z", 

359 "vx", "vy", "vz", "fx", "fy", "fz"] 

360 

361 images = [] 

362 

363 # wrap struct.unpack to raise EOFError 

364 def read_variables(string): 

365 obj_len = struct.calcsize(string) 

366 data_obj = fileobj.read(obj_len) 

367 if obj_len != len(data_obj): 

368 raise EOFError 

369 return struct.unpack(string, data_obj) 

370 

371 while True: 

372 try: 

373 # Assume that the binary dump file is in the old (pre-29Oct2020) 

374 # format 

375 magic_string = None 

376 

377 # read header 

378 ntimestep, = read_variables("=" + bigformat) 

379 

380 # In the new LAMMPS binary dump format (version 29Oct2020 and 

381 # onward), a negative timestep is used to indicate that the next 

382 # few bytes will contain certain metadata 

383 if ntimestep < 0: 

384 # First bigint was actually encoding the negative of the format 

385 # name string length (we call this 'magic_string' to 

386 magic_string_len = -ntimestep 

387 

388 # The next `magic_string_len` bytes will hold a string 

389 # indicating the format of the dump file 

390 magic_string = b''.join(read_variables( 

391 "=" + str(magic_string_len) + "c")) 

392 

393 # Read endianness (integer). For now, we'll disregard the value 

394 # and simply use the host machine's endianness (via '=' 

395 # character used with struct.calcsize). 

396 # 

397 # TODO: Use the endianness of the dump file in subsequent 

398 # read_variables rather than just assuming it will match 

399 # that of the host 

400 endian, = read_variables("=i") 

401 

402 # Read revision number (integer) 

403 revision, = read_variables("=i") 

404 

405 # Finally, read the actual timestep (bigint) 

406 ntimestep, = read_variables("=" + bigformat) 

407 

408 n_atoms, triclinic = read_variables("=" + bigformat + "i") 

409 boundary = read_variables("=6i") 

410 diagdisp = read_variables("=6d") 

411 if triclinic != 0: 

412 offdiag = read_variables("=3d") 

413 else: 

414 offdiag = (0.0,) * 3 

415 size_one, = read_variables("=i") 

416 

417 if len(colnames) != size_one: 

418 raise ValueError("Provided columns do not match binary file") 

419 

420 if magic_string and revision > 1: 

421 # New binary dump format includes units string, 

422 # columns string, and time 

423 units_str_len, = read_variables("=i") 

424 

425 if units_str_len > 0: 

426 # Read lammps units style 

427 _ = b''.join( 

428 read_variables("=" + str(units_str_len) + "c")) 

429 

430 flag, = read_variables("=c") 

431 if flag != b'\x00': 

432 # Flag was non-empty string 

433 time, = read_variables("=d") 

434 

435 # Length of column string 

436 columns_str_len, = read_variables("=i") 

437 

438 # Read column string (e.g., "id type x y z vx vy vz fx fy fz") 

439 _ = b''.join(read_variables("=" + str(columns_str_len) + "c")) 

440 

441 nchunk, = read_variables("=i") 

442 

443 # lammps cells/boxes can have different boundary conditions on each 

444 # sides (makes mainly sense for different non-periodic conditions 

445 # (e.g. [f]ixed and [s]hrink for a irradiation simulation)) 

446 # periodic case: b 0 = 'p' 

447 # non-peridic cases 1: 'f', 2 : 's', 3: 'm' 

448 pbc = np.sum(np.array(boundary).reshape((3, 2)), axis=1) == 0 

449 

450 cell, celldisp = construct_cell(diagdisp, offdiag) 

451 

452 data = [] 

453 for _ in range(nchunk): 

454 # number-of-data-entries 

455 n_data, = read_variables("=i") 

456 # retrieve per atom data 

457 data += read_variables("=" + str(n_data) + "d") 

458 data = np.array(data).reshape((-1, size_one)) 

459 

460 # map data-chunk to ase atoms 

461 out_atoms = lammps_data_to_ase_atoms( 

462 data=data, 

463 colnames=colnames, 

464 cell=cell, 

465 celldisp=celldisp, 

466 pbc=pbc, 

467 **kwargs 

468 ) 

469 

470 images.append(out_atoms) 

471 

472 # stop if requested index has been found 

473 if len(images) > index_end >= 0: 

474 break 

475 

476 except EOFError: 

477 break 

478 

479 return images[index]