Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Extended XYZ support 

3 

4Read/write files in "extended" XYZ format, storing additional 

5per-configuration information as key-value pairs on the XYZ 

6comment line, and additional per-atom properties as extra columns. 

7 

8Contributed by James Kermode <james.kermode@gmail.com> 

9""" 

10 

11 

12from itertools import islice 

13import re 

14import warnings 

15from io import StringIO, UnsupportedOperation 

16import json 

17 

18import numpy as np 

19import numbers 

20 

21from ase.atoms import Atoms 

22from ase.calculators.calculator import all_properties, BaseCalculator 

23from ase.calculators.singlepoint import SinglePointCalculator 

24from ase.spacegroup.spacegroup import Spacegroup 

25from ase.parallel import paropen 

26from ase.constraints import FixAtoms, FixCartesian 

27from ase.io.formats import index2range 

28from ase.utils import reader 

29 

30__all__ = ['read_xyz', 'write_xyz', 'iread_xyz'] 

31 

32PROPERTY_NAME_MAP = {'positions': 'pos', 

33 'numbers': 'Z', 

34 'charges': 'charge', 

35 'symbols': 'species'} 

36 

37REV_PROPERTY_NAME_MAP = dict(zip(PROPERTY_NAME_MAP.values(), 

38 PROPERTY_NAME_MAP.keys())) 

39 

40KEY_QUOTED_VALUE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_-]*)' 

41 + r'\s*=\s*["\{\}]([^"\{\}]+)["\{\}]\s*') 

42KEY_VALUE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_]*)\s*=' 

43 + r'\s*([^\s]+)\s*') 

44KEY_RE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_-]*)\s*') 

45 

46UNPROCESSED_KEYS = ['uid'] 

47 

48SPECIAL_3_3_KEYS = ['Lattice', 'virial', 'stress'] 

49 

50# partition ase.calculators.calculator.all_properties into two lists: 

51# 'per-atom' and 'per-config' 

52per_atom_properties = ['forces', 'stresses', 'charges', 'magmoms', 'energies'] 

53per_config_properties = ['energy', 'stress', 'dipole', 'magmom', 'free_energy'] 

54 

55 

56def key_val_str_to_dict(string, sep=None): 

57 """ 

58 Parse an xyz properties string in a key=value and return a dict with 

59 various values parsed to native types. 

60 

61 Accepts brackets or quotes to delimit values. Parses integers, floats 

62 booleans and arrays thereof. Arrays with 9 values whose name is listed 

63 in SPECIAL_3_3_KEYS are converted to 3x3 arrays with Fortran ordering. 

64 

65 If sep is None, string will split on whitespace, otherwise will split 

66 key value pairs with the given separator. 

67 

68 """ 

69 # store the closing delimiters to match opening ones 

70 delimiters = { 

71 "'": "'", 

72 '"': '"', 

73 '{': '}', 

74 '[': ']' 

75 } 

76 

77 # Make pairs and process afterwards 

78 kv_pairs = [ 

79 [[]]] # List of characters for each entry, add a new list for new value 

80 cur_delimiter = None # push and pop closing delimiters 

81 escaped = False # add escaped sequences verbatim 

82 

83 # parse character-by-character unless someone can do nested brackets 

84 # and escape sequences in a regex 

85 for char in string.strip(): 

86 if escaped: # bypass everything if escaped 

87 kv_pairs[-1][-1].append(char) 

88 escaped = False 

89 elif char == '\\': # escape the next thing 

90 escaped = True 

91 elif cur_delimiter: # inside brackets 

92 if char == cur_delimiter: # found matching delimiter 

93 cur_delimiter = None 

94 else: 

95 kv_pairs[-1][-1].append(char) # inside quotes, add verbatim 

96 elif char in delimiters: 

97 cur_delimiter = delimiters[char] # brackets or quotes 

98 elif (sep is None and char.isspace()) or char == sep: 

99 if kv_pairs == [[[]]]: # empty, beginning of string 

100 continue 

101 elif kv_pairs[-1][-1] == []: 

102 continue 

103 else: 

104 kv_pairs.append([[]]) 

105 elif char == '=': 

106 if kv_pairs[-1] == [[]]: 

107 del kv_pairs[-1] 

108 kv_pairs[-1].append([]) # value 

109 else: 

110 kv_pairs[-1][-1].append(char) 

111 

112 kv_dict = {} 

113 

114 for kv_pair in kv_pairs: 

115 if len(kv_pair) == 0: # empty line 

116 continue 

117 elif len(kv_pair) == 1: # default to True 

118 key, value = ''.join(kv_pair[0]), 'T' 

119 else: # Smush anything else with kv-splitter '=' between them 

120 key, value = ''.join(kv_pair[0]), '='.join( 

121 ''.join(x) for x in kv_pair[1:]) 

122 

123 if key.lower() not in UNPROCESSED_KEYS: 

124 # Try to convert to (arrays of) floats, ints 

125 split_value = re.findall(r'[^\s,]+', value) 

126 try: 

127 try: 

128 numvalue = np.array(split_value, dtype=int) 

129 except (ValueError, OverflowError): 

130 # don't catch errors here so it falls through to bool 

131 numvalue = np.array(split_value, dtype=float) 

132 if len(numvalue) == 1: 

133 numvalue = numvalue[0] # Only one number 

134 value = numvalue 

135 except (ValueError, OverflowError): 

136 pass # value is unchanged 

137 

138 # convert special 3x3 matrices 

139 if key in SPECIAL_3_3_KEYS: 

140 if not isinstance(value, np.ndarray) or value.shape != (9,): 

141 raise ValueError("Got info item {}, expecting special 3x3 " 

142 "matrix, but value is not in the form of " 

143 "a 9-long numerical vector".format(key)) 

144 value = np.array(value).reshape((3, 3), order='F') 

145 

146 # parse special strings as boolean or JSON 

147 if isinstance(value, str): 

148 # Parse boolean values: 

149 # T or [tT]rue or TRUE -> True 

150 # F or [fF]alse or FALSE -> False 

151 # For list: 'T T F' -> [True, True, False] 

152 # Cannot use `.lower()` to reduce `str_to_bool` mapping because 

153 # 't'/'f' not accepted 

154 str_to_bool = { 

155 'T': True, 'F': False, 'true': True, 'false': False, 

156 'True': True, 'False': False, 'TRUE': True, 'FALSE': False 

157 } 

158 try: 

159 boolvalue = [str_to_bool[vpart] for vpart in 

160 re.findall(r'[^\s,]+', value)] 

161 

162 if len(boolvalue) == 1: 

163 value = boolvalue[0] 

164 else: 

165 value = boolvalue 

166 except KeyError: 

167 # Try to parse JSON 

168 if value.startswith("_JSON "): 

169 d = json.loads(value.replace("_JSON ", "", 1)) 

170 value = np.array(d) 

171 if value.dtype.kind not in ['i', 'f', 'b']: 

172 value = d 

173 

174 kv_dict[key] = value 

175 

176 return kv_dict 

177 

178 

179def key_val_str_to_dict_regex(s): 

180 """ 

181 Parse strings in the form 'key1=value1 key2="quoted value"' 

182 """ 

183 d = {} 

184 s = s.strip() 

185 while True: 

186 # Match quoted string first, then fall through to plain key=value 

187 m = KEY_QUOTED_VALUE.match(s) 

188 if m is None: 

189 m = KEY_VALUE.match(s) 

190 if m is not None: 

191 s = KEY_VALUE.sub('', s, 1) 

192 else: 

193 # Just a key with no value 

194 m = KEY_RE.match(s) 

195 if m is not None: 

196 s = KEY_RE.sub('', s, 1) 

197 else: 

198 s = KEY_QUOTED_VALUE.sub('', s, 1) 

199 

200 if m is None: 

201 break # No more matches 

202 

203 key = m.group(1) 

204 try: 

205 value = m.group(2) 

206 except IndexError: 

207 # default value is 'T' (True) 

208 value = 'T' 

209 

210 if key.lower() not in UNPROCESSED_KEYS: 

211 # Try to convert to (arrays of) floats, ints 

212 try: 

213 numvalue = [] 

214 for x in value.split(): 

215 if x.find('.') == -1: 

216 numvalue.append(int(float(x))) 

217 else: 

218 numvalue.append(float(x)) 

219 if len(numvalue) == 1: 

220 numvalue = numvalue[0] # Only one number 

221 elif len(numvalue) == 9: 

222 # special case: 3x3 matrix, fortran ordering 

223 numvalue = np.array(numvalue).reshape((3, 3), order='F') 

224 else: 

225 numvalue = np.array(numvalue) # vector 

226 value = numvalue 

227 except (ValueError, OverflowError): 

228 pass 

229 

230 # Parse boolean values: 'T' -> True, 'F' -> False, 

231 # 'T T F' -> [True, True, False] 

232 if isinstance(value, str): 

233 str_to_bool = {'T': True, 'F': False} 

234 

235 if len(value.split()) > 1: 

236 if all([x in str_to_bool.keys() for x in value.split()]): 

237 value = [str_to_bool[x] for x in value.split()] 

238 elif value in str_to_bool: 

239 value = str_to_bool[value] 

240 

241 d[key] = value 

242 

243 return d 

244 

245 

246def escape(string): 

247 if (' ' in string or 

248 '"' in string or "'" in string or 

249 '{' in string or '}' in string or 

250 '[' in string or ']' in string): 

251 string = string.replace('"', '\\"') 

252 string = '"%s"' % string 

253 return string 

254 

255 

256def key_val_dict_to_str(dct, sep=' '): 

257 """ 

258 Convert atoms.info dictionary to extended XYZ string representation 

259 """ 

260 

261 def array_to_string(key, val): 

262 # some ndarrays are special (special 3x3 keys, and scalars/vectors of 

263 # numbers or bools), handle them here 

264 if key in SPECIAL_3_3_KEYS: 

265 # special 3x3 matrix, flatten in Fortran order 

266 val = val.reshape(val.size, order='F') 

267 if val.dtype.kind in ['i', 'f', 'b']: 

268 # numerical or bool scalars/vectors are special, for backwards 

269 # compat. 

270 if len(val.shape) == 0: 

271 # scalar 

272 val = str(known_types_to_str(val)) 

273 elif len(val.shape) == 1: 

274 # vector 

275 val = ' '.join(str(known_types_to_str(v)) for v in val) 

276 return val 

277 

278 def known_types_to_str(val): 

279 if isinstance(val, bool) or isinstance(val, np.bool_): 

280 return 'T' if val else 'F' 

281 elif isinstance(val, numbers.Real): 

282 return '{}'.format(val) 

283 elif isinstance(val, Spacegroup): 

284 return val.symbol 

285 else: 

286 return val 

287 

288 if len(dct) == 0: 

289 return '' 

290 

291 string = '' 

292 for key in dct: 

293 val = dct[key] 

294 

295 if isinstance(val, np.ndarray): 

296 val = array_to_string(key, val) 

297 else: 

298 # convert any known types to string 

299 val = known_types_to_str(val) 

300 

301 if val is not None and not isinstance(val, str): 

302 # what's left is an object, try using JSON 

303 if isinstance(val, np.ndarray): 

304 val = val.tolist() 

305 try: 

306 val = '_JSON ' + json.dumps(val) 

307 # if this fails, let give up 

308 except TypeError: 

309 warnings.warn('Skipping unhashable information ' 

310 '{0}'.format(key)) 

311 continue 

312 

313 key = escape(key) # escape and quote key 

314 eq = "=" 

315 # Should this really be setting empty value that's going to be 

316 # interpreted as bool True? 

317 if val is None: 

318 val = "" 

319 eq = "" 

320 val = escape(val) # escape and quote val 

321 

322 string += '%s%s%s%s' % (key, eq, val, sep) 

323 

324 return string.strip() 

325 

326 

327def parse_properties(prop_str): 

328 """ 

329 Parse extended XYZ properties format string 

330 

331 Format is "[NAME:TYPE:NCOLS]...]", e.g. "species:S:1:pos:R:3". 

332 NAME is the name of the property. 

333 TYPE is one of R, I, S, L for real, integer, string and logical. 

334 NCOLS is number of columns for that property. 

335 """ 

336 

337 properties = {} 

338 properties_list = [] 

339 dtypes = [] 

340 converters = [] 

341 

342 fields = prop_str.split(':') 

343 

344 def parse_bool(x): 

345 """ 

346 Parse bool to string 

347 """ 

348 return {'T': True, 'F': False, 

349 'True': True, 'False': False}.get(x) 

350 

351 fmt_map = {'R': ('d', float), 

352 'I': ('i', int), 

353 'S': (object, str), 

354 'L': ('bool', parse_bool)} 

355 

356 for name, ptype, cols in zip(fields[::3], 

357 fields[1::3], 

358 [int(x) for x in fields[2::3]]): 

359 if ptype not in ('R', 'I', 'S', 'L'): 

360 raise ValueError('Unknown property type: ' + ptype) 

361 ase_name = REV_PROPERTY_NAME_MAP.get(name, name) 

362 

363 dtype, converter = fmt_map[ptype] 

364 if cols == 1: 

365 dtypes.append((name, dtype)) 

366 converters.append(converter) 

367 else: 

368 for c in range(cols): 

369 dtypes.append((name + str(c), dtype)) 

370 converters.append(converter) 

371 

372 properties[name] = (ase_name, cols) 

373 properties_list.append(name) 

374 

375 dtype = np.dtype(dtypes) 

376 return properties, properties_list, dtype, converters 

377 

378 

379def _read_xyz_frame(lines, natoms, properties_parser=key_val_str_to_dict, 

380 nvec=0): 

381 # comment line 

382 line = next(lines).strip() 

383 if nvec > 0: 

384 info = {'comment': line} 

385 else: 

386 info = properties_parser(line) if line else {} 

387 

388 pbc = None 

389 if 'pbc' in info: 

390 pbc = info.pop('pbc') 

391 elif 'Lattice' in info: 

392 # default pbc for extxyz file containing Lattice 

393 # is True in all directions 

394 pbc = [True, True, True] 

395 elif nvec > 0: 

396 # cell information given as pseudo-Atoms 

397 pbc = [False, False, False] 

398 

399 cell = None 

400 if 'Lattice' in info: 

401 # NB: ASE cell is transpose of extended XYZ lattice 

402 cell = info['Lattice'].T 

403 del info['Lattice'] 

404 elif nvec > 0: 

405 # cell information given as pseudo-Atoms 

406 cell = np.zeros((3, 3)) 

407 

408 if 'Properties' not in info: 

409 # Default set of properties is atomic symbols and positions only 

410 info['Properties'] = 'species:S:1:pos:R:3' 

411 properties, names, dtype, convs = parse_properties(info['Properties']) 

412 del info['Properties'] 

413 

414 data = [] 

415 for ln in range(natoms): 

416 try: 

417 line = next(lines) 

418 except StopIteration: 

419 raise XYZError('ase.io.extxyz: Frame has {} atoms, expected {}' 

420 .format(len(data), natoms)) 

421 vals = line.split() 

422 row = tuple([conv(val) for conv, val in zip(convs, vals)]) 

423 data.append(row) 

424 

425 try: 

426 data = np.array(data, dtype) 

427 except TypeError: 

428 raise XYZError('Badly formatted data ' 

429 'or end of file reached before end of frame') 

430 

431 # Read VEC entries if present 

432 if nvec > 0: 

433 for ln in range(nvec): 

434 try: 

435 line = next(lines) 

436 except StopIteration: 

437 raise XYZError('ase.io.adfxyz: Frame has {} cell vectors, ' 

438 'expected {}'.format(len(cell), nvec)) 

439 entry = line.split() 

440 

441 if not entry[0].startswith('VEC'): 

442 raise XYZError('Expected cell vector, got {}'.format(entry[0])) 

443 

444 try: 

445 n = int(entry[0][3:]) 

446 except ValueError as e: 

447 raise XYZError('Expected VEC{}, got VEC{}' 

448 .format(ln + 1, entry[0][3:])) from e 

449 if n != ln + 1: 

450 raise XYZError('Expected VEC{}, got VEC{}' 

451 .format(ln + 1, n)) 

452 

453 cell[ln] = np.array([float(x) for x in entry[1:]]) 

454 pbc[ln] = True 

455 if nvec != pbc.count(True): 

456 raise XYZError('Problem with number of cell vectors') 

457 pbc = tuple(pbc) 

458 

459 arrays = {} 

460 for name in names: 

461 ase_name, cols = properties[name] 

462 if cols == 1: 

463 value = data[name] 

464 else: 

465 value = np.vstack([data[name + str(c)] 

466 for c in range(cols)]).T 

467 arrays[ase_name] = value 

468 

469 symbols = None 

470 if 'symbols' in arrays: 

471 symbols = [s.capitalize() for s in arrays['symbols']] 

472 del arrays['symbols'] 

473 

474 numbers = None 

475 duplicate_numbers = None 

476 if 'numbers' in arrays: 

477 if symbols is None: 

478 numbers = arrays['numbers'] 

479 else: 

480 duplicate_numbers = arrays['numbers'] 

481 del arrays['numbers'] 

482 

483 initial_charges = None 

484 if 'initial_charges' in arrays: 

485 initial_charges = arrays['initial_charges'] 

486 del arrays['initial_charges'] 

487 

488 positions = None 

489 if 'positions' in arrays: 

490 positions = arrays['positions'] 

491 del arrays['positions'] 

492 

493 atoms = Atoms(symbols=symbols, 

494 positions=positions, 

495 numbers=numbers, 

496 charges=initial_charges, 

497 cell=cell, 

498 pbc=pbc, 

499 info=info) 

500 

501 # Read and set constraints 

502 if 'move_mask' in arrays: 

503 move_mask = arrays['move_mask'].astype(bool) 

504 if properties['move_mask'][1] == 3: 

505 cons = [] 

506 for a in range(natoms): 

507 cons.append(FixCartesian(a, mask=~move_mask[a, :])) 

508 atoms.set_constraint(cons) 

509 elif properties['move_mask'][1] == 1: 

510 atoms.set_constraint(FixAtoms(mask=~move_mask)) 

511 else: 

512 raise XYZError('Not implemented constraint') 

513 del arrays['move_mask'] 

514 

515 for name, array in arrays.items(): 

516 atoms.new_array(name, array) 

517 

518 if duplicate_numbers is not None: 

519 atoms.set_atomic_numbers(duplicate_numbers) 

520 

521 # Load results of previous calculations into SinglePointCalculator 

522 results = {} 

523 for key in list(atoms.info.keys()): 

524 if key in per_config_properties: 

525 results[key] = atoms.info[key] 

526 # special case for stress- convert to Voigt 6-element form 

527 if key == 'stress' and results[key].shape == (3, 3): 

528 stress = results[key] 

529 stress = np.array([stress[0, 0], 

530 stress[1, 1], 

531 stress[2, 2], 

532 stress[1, 2], 

533 stress[0, 2], 

534 stress[0, 1]]) 

535 results[key] = stress 

536 for key in list(atoms.arrays.keys()): 

537 if (key in per_atom_properties and len(value.shape) >= 1 

538 and value.shape[0] == len(atoms)): 

539 results[key] = atoms.arrays[key] 

540 if results != {}: 

541 calculator = SinglePointCalculator(atoms, **results) 

542 atoms.calc = calculator 

543 return atoms 

544 

545 

546class XYZError(IOError): 

547 pass 

548 

549 

550class XYZChunk: 

551 def __init__(self, lines, natoms): 

552 self.lines = lines 

553 self.natoms = natoms 

554 

555 def build(self): 

556 """Convert unprocessed chunk into Atoms.""" 

557 return _read_xyz_frame(iter(self.lines), self.natoms) 

558 

559 

560def ixyzchunks(fd): 

561 """Yield unprocessed chunks (header, lines) for each xyz image.""" 

562 while True: 

563 line = next(fd).strip() # Raises StopIteration on empty file 

564 try: 

565 natoms = int(line) 

566 except ValueError: 

567 raise XYZError('Expected integer, found "{0}"'.format(line)) 

568 try: 

569 lines = [next(fd) for _ in range(1 + natoms)] 

570 except StopIteration: 

571 raise XYZError('Incomplete XYZ chunk') 

572 yield XYZChunk(lines, natoms) 

573 

574 

575class ImageIterator: 

576 """""" 

577 

578 def __init__(self, ichunks): 

579 self.ichunks = ichunks 

580 

581 def __call__(self, fd, indices=-1): 

582 if not hasattr(indices, 'start'): 

583 if indices < 0: 

584 indices = slice(indices - 1, indices) 

585 else: 

586 indices = slice(indices, indices + 1) 

587 

588 for chunk in self._getslice(fd, indices): 

589 yield chunk.build() 

590 

591 def _getslice(self, fd, indices): 

592 try: 

593 iterator = islice(self.ichunks(fd), indices.start, indices.stop, 

594 indices.step) 

595 except ValueError: 

596 # Negative indices. Go through the whole thing to get the length, 

597 # which allows us to evaluate the slice, and then read it again 

598 startpos = fd.tell() 

599 nchunks = 0 

600 for chunk in self.ichunks(fd): 

601 nchunks += 1 

602 fd.seek(startpos) 

603 indices_tuple = indices.indices(nchunks) 

604 iterator = islice(self.ichunks(fd), *indices_tuple) 

605 return iterator 

606 

607 

608iread_xyz = ImageIterator(ixyzchunks) 

609 

610 

611@reader 

612def read_xyz(fileobj, index=-1, properties_parser=key_val_str_to_dict): 

613 r""" 

614 Read from a file in Extended XYZ format 

615 

616 index is the frame to read, default is last frame (index=-1). 

617 properties_parser is the parse to use when converting the properties line 

618 to a dictionary, ``extxyz.key_val_str_to_dict`` is the default and can 

619 deal with most use cases, ``extxyz.key_val_str_to_dict_regex`` is slightly 

620 faster but has fewer features. 

621 

622 Extended XYZ format is an enhanced version of the `basic XYZ format 

623 <http://en.wikipedia.org/wiki/XYZ_file_format>`_ that allows extra 

624 columns to be present in the file for additonal per-atom properties as 

625 well as standardising the format of the comment line to include the 

626 cell lattice and other per-frame parameters. 

627 

628 It's easiest to describe the format with an example. Here is a 

629 standard XYZ file containing a bulk cubic 8 atom silicon cell :: 

630 

631 8 

632 Cubic bulk silicon cell 

633 Si 0.00000000 0.00000000 0.00000000 

634 Si 1.36000000 1.36000000 1.36000000 

635 Si 2.72000000 2.72000000 0.00000000 

636 Si 4.08000000 4.08000000 1.36000000 

637 Si 2.72000000 0.00000000 2.72000000 

638 Si 4.08000000 1.36000000 4.08000000 

639 Si 0.00000000 2.72000000 2.72000000 

640 Si 1.36000000 4.08000000 4.08000000 

641 

642 The first line is the number of atoms, followed by a comment and 

643 then one line per atom, giving the element symbol and cartesian 

644 x y, and z coordinates in Angstroms. 

645 

646 Here's the same configuration in extended XYZ format :: 

647 

648 8 

649 Lattice="5.44 0.0 0.0 0.0 5.44 0.0 0.0 0.0 5.44" Properties=species:S:1:pos:R:3 Time=0.0 

650 Si 0.00000000 0.00000000 0.00000000 

651 Si 1.36000000 1.36000000 1.36000000 

652 Si 2.72000000 2.72000000 0.00000000 

653 Si 4.08000000 4.08000000 1.36000000 

654 Si 2.72000000 0.00000000 2.72000000 

655 Si 4.08000000 1.36000000 4.08000000 

656 Si 0.00000000 2.72000000 2.72000000 

657 Si 1.36000000 4.08000000 4.08000000 

658 

659 In extended XYZ format, the comment line is replaced by a series of 

660 key/value pairs. The keys should be strings and values can be 

661 integers, reals, logicals (denoted by `T` and `F` for true and false) 

662 or strings. Quotes are required if a value contains any spaces (like 

663 `Lattice` above). There are two mandatory parameters that any 

664 extended XYZ: `Lattice` and `Properties`. Other parameters -- 

665 e.g. `Time` in the example above --- can be added to the parameter line 

666 as needed. 

667 

668 `Lattice` is a Cartesian 3x3 matrix representation of the cell 

669 vectors, with each vector stored as a column and the 9 values listed in 

670 Fortran column-major order, i.e. in the form :: 

671 

672 Lattice="R1x R1y R1z R2x R2y R2z R3x R3y R3z" 

673 

674 where `R1x R1y R1z` are the Cartesian x-, y- and z-components of the 

675 first lattice vector (:math:`\mathbf{a}`), `R2x R2y R2z` those of the second 

676 lattice vector (:math:`\mathbf{b}`) and `R3x R3y R3z` those of the 

677 third lattice vector (:math:`\mathbf{c}`). 

678 

679 The list of properties in the file is described by the `Properties` 

680 parameter, which should take the form of a series of colon separated 

681 triplets giving the name, format (`R` for real, `I` for integer) and 

682 number of columns of each property. For example:: 

683 

684 Properties="species:S:1:pos:R:3:vel:R:3:select:I:1" 

685 

686 indicates the first column represents atomic species, the next three 

687 columns represent atomic positions, the next three velcoities, and the 

688 last is an single integer called `select`. With this property 

689 definition, the line :: 

690 

691 Si 4.08000000 4.08000000 1.36000000 0.00000000 0.00000000 0.00000000 1 

692 

693 would describe a silicon atom at position (4.08,4.08,1.36) with zero 

694 velocity and the `select` property set to 1. 

695 

696 The property names `pos`, `Z`, `mass`, and `charge` map to ASE 

697 :attr:`ase.atoms.Atoms.arrays` entries named 

698 `positions`, `numbers`, `masses` and `charges` respectively. 

699 

700 Additional key-value pairs in the comment line are parsed into the 

701 :attr:`ase.Atoms.atoms.info` dictionary, with the following conventions 

702 

703 - Values can be quoted with `""`, `''`, `[]` or `{}` (the latter are 

704 included to ease command-line usage as the `{}` are not treated 

705 specially by the shell) 

706 - Quotes within keys or values can be escaped with `\"`. 

707 - Keys with special names `stress` or `virial` are treated as 3x3 matrices 

708 in Fortran order, as for `Lattice` above. 

709 - Otherwise, values with multiple elements are treated as 1D arrays, first 

710 assuming integer format and falling back to float if conversion is 

711 unsuccessful. 

712 - A missing value defaults to `True`, e.g. the comment line 

713 `"cutoff=3.4 have_energy"` leads to 

714 `{'cutoff': 3.4, 'have_energy': True}` in `atoms.info`. 

715 - Value strings starting with `"_JSON"` are interpreted as JSON content; 

716 similarly, when writing, anything which does not match the criteria above 

717 is serialised as JSON. 

718 

719 The extended XYZ format is also supported by the 

720 the `Ovito <http://www.ovito.org>`_ visualisation tool 

721 (from `v2.4 beta 

722 <http://www.ovito.org/index.php/component/content/article?id=25>`_ 

723 onwards). 

724 """ # noqa: E501 

725 

726 if not isinstance(index, int) and not isinstance(index, slice): 

727 raise TypeError('Index argument is neither slice nor integer!') 

728 

729 # If possible, build a partial index up to the last frame required 

730 last_frame = None 

731 if isinstance(index, int) and index >= 0: 

732 last_frame = index 

733 elif isinstance(index, slice): 

734 if index.stop is not None and index.stop >= 0: 

735 last_frame = index.stop 

736 

737 # scan through file to find where the frames start 

738 try: 

739 fileobj.seek(0) 

740 except UnsupportedOperation: 

741 fileobj = StringIO(fileobj.read()) 

742 fileobj.seek(0) 

743 frames = [] 

744 while True: 

745 frame_pos = fileobj.tell() 

746 line = fileobj.readline() 

747 if line.strip() == '': 

748 break 

749 try: 

750 natoms = int(line) 

751 except ValueError as err: 

752 raise XYZError('ase.io.extxyz: Expected xyz header but got: {}' 

753 .format(err)) 

754 fileobj.readline() # read comment line 

755 for i in range(natoms): 

756 fileobj.readline() 

757 # check for VEC 

758 nvec = 0 

759 while True: 

760 lastPos = fileobj.tell() 

761 line = fileobj.readline() 

762 if line.lstrip().startswith('VEC'): 

763 nvec += 1 

764 if nvec > 3: 

765 raise XYZError('ase.io.extxyz: More than 3 VECX entries') 

766 else: 

767 fileobj.seek(lastPos) 

768 break 

769 frames.append((frame_pos, natoms, nvec)) 

770 if last_frame is not None and len(frames) > last_frame: 

771 break 

772 

773 trbl = index2range(index, len(frames)) 

774 

775 for index in trbl: 

776 frame_pos, natoms, nvec = frames[index] 

777 fileobj.seek(frame_pos) 

778 # check for consistency with frame index table 

779 assert int(fileobj.readline()) == natoms 

780 yield _read_xyz_frame(fileobj, natoms, properties_parser, nvec) 

781 

782 

783def output_column_format(atoms, columns, arrays, 

784 write_info=True, results=None): 

785 """ 

786 Helper function to build extended XYZ comment line 

787 """ 

788 fmt_map = {'d': ('R', '%16.8f'), 

789 'f': ('R', '%16.8f'), 

790 'i': ('I', '%8d'), 

791 'O': ('S', '%s'), 

792 'S': ('S', '%s'), 

793 'U': ('S', '%-2s'), 

794 'b': ('L', ' %.1s')} 

795 

796 # NB: Lattice is stored as tranpose of ASE cell, 

797 # with Fortran array ordering 

798 lattice_str = ('Lattice="' 

799 + ' '.join([str(x) for x in np.reshape(atoms.cell.T, 

800 9, order='F')]) + 

801 '"') 

802 

803 property_names = [] 

804 property_types = [] 

805 property_ncols = [] 

806 dtypes = [] 

807 formats = [] 

808 

809 for column in columns: 

810 array = arrays[column] 

811 dtype = array.dtype 

812 

813 property_name = PROPERTY_NAME_MAP.get(column, column) 

814 property_type, fmt = fmt_map[dtype.kind] 

815 property_names.append(property_name) 

816 property_types.append(property_type) 

817 

818 if (len(array.shape) == 1 

819 or (len(array.shape) == 2 and array.shape[1] == 1)): 

820 ncol = 1 

821 dtypes.append((column, dtype)) 

822 else: 

823 ncol = array.shape[1] 

824 for c in range(ncol): 

825 dtypes.append((column + str(c), dtype)) 

826 

827 formats.extend([fmt] * ncol) 

828 property_ncols.append(ncol) 

829 

830 props_str = ':'.join([':'.join(x) for x in 

831 zip(property_names, 

832 property_types, 

833 [str(nc) for nc in property_ncols])]) 

834 

835 comment_str = '' 

836 if atoms.cell.any(): 

837 comment_str += lattice_str + ' ' 

838 comment_str += 'Properties={}'.format(props_str) 

839 

840 info = {} 

841 if write_info: 

842 info.update(atoms.info) 

843 if results is not None: 

844 info.update(results) 

845 info['pbc'] = atoms.get_pbc() # always save periodic boundary conditions 

846 comment_str += ' ' + key_val_dict_to_str(info) 

847 

848 dtype = np.dtype(dtypes) 

849 fmt = ' '.join(formats) + '\n' 

850 

851 return comment_str, property_ncols, dtype, fmt 

852 

853 

854def write_xyz(fileobj, images, comment='', columns=None, 

855 write_info=True, 

856 write_results=True, plain=False, vec_cell=False, 

857 append=False): 

858 """ 

859 Write output in extended XYZ format 

860 

861 Optionally, specify which columns (arrays) to include in output, 

862 whether to write the contents of the `atoms.info` dict to the 

863 XYZ comment line (default is True), the results of any 

864 calculator attached to this Atoms. The `plain` argument 

865 can be used to write a simple XYZ file with no additional information. 

866 `vec_cell` can be used to write the cell vectors as additional 

867 pseudo-atoms. If `append` is set to True, the file is for append (mode `a`), 

868 otherwise it is overwritten (mode `w`). 

869 

870 See documentation for :func:`read_xyz()` for further details of the extended 

871 XYZ file format. 

872 """ 

873 if isinstance(fileobj, str): 

874 mode = 'w' 

875 if append: 

876 mode = 'a' 

877 fileobj = paropen(fileobj, mode) 

878 

879 if hasattr(images, 'get_positions'): 

880 images = [images] 

881 

882 for atoms in images: 

883 natoms = len(atoms) 

884 

885 if columns is None: 

886 fr_cols = None 

887 else: 

888 fr_cols = columns[:] 

889 

890 if fr_cols is None: 

891 fr_cols = (['symbols', 'positions'] 

892 + [key for key in atoms.arrays.keys() if 

893 key not in ['symbols', 'positions', 'numbers', 

894 'species', 'pos']]) 

895 

896 if vec_cell: 

897 plain = True 

898 

899 if plain: 

900 fr_cols = ['symbols', 'positions'] 

901 write_info = False 

902 write_results = False 

903 

904 per_frame_results = {} 

905 per_atom_results = {} 

906 if write_results: 

907 calculator = atoms.calc 

908 if (calculator is not None 

909 and isinstance(calculator, BaseCalculator)): 

910 for key in all_properties: 

911 value = calculator.results.get(key, None) 

912 if value is None: 

913 # skip missing calculator results 

914 continue 

915 if (key in per_atom_properties and len(value.shape) >= 1 

916 and value.shape[0] == len(atoms)): 

917 # per-atom quantities (forces, energies, stresses) 

918 per_atom_results[key] = value 

919 elif key in per_config_properties: 

920 # per-frame quantities (energy, stress) 

921 # special case for stress, which should be converted 

922 # to 3x3 matrices before writing 

923 if key == 'stress': 

924 xx, yy, zz, yz, xz, xy = value 

925 value = np.array( 

926 [(xx, xy, xz), (xy, yy, yz), (xz, yz, zz)]) 

927 per_frame_results[key] = value 

928 

929 # Move symbols and positions to first two properties 

930 if 'symbols' in fr_cols: 

931 i = fr_cols.index('symbols') 

932 fr_cols[0], fr_cols[i] = fr_cols[i], fr_cols[0] 

933 

934 if 'positions' in fr_cols: 

935 i = fr_cols.index('positions') 

936 fr_cols[1], fr_cols[i] = fr_cols[i], fr_cols[1] 

937 

938 # Check first column "looks like" atomic symbols 

939 if fr_cols[0] in atoms.arrays: 

940 symbols = atoms.arrays[fr_cols[0]] 

941 else: 

942 symbols = atoms.get_chemical_symbols() 

943 

944 if natoms > 0 and not isinstance(symbols[0], str): 

945 raise ValueError('First column must be symbols-like') 

946 

947 # Check second column "looks like" atomic positions 

948 pos = atoms.arrays[fr_cols[1]] 

949 if pos.shape != (natoms, 3) or pos.dtype.kind != 'f': 

950 raise ValueError('Second column must be position-like') 

951 

952 # if vec_cell add cell information as pseudo-atoms 

953 if vec_cell: 

954 pbc = list(atoms.get_pbc()) 

955 cell = atoms.get_cell() 

956 

957 if True in pbc: 

958 nPBC = 0 

959 for i, b in enumerate(pbc): 

960 if b: 

961 nPBC += 1 

962 symbols.append('VEC' + str(nPBC)) 

963 pos = np.vstack((pos, cell[i])) 

964 # add to natoms 

965 natoms += nPBC 

966 if pos.shape != (natoms, 3) or pos.dtype.kind != 'f': 

967 raise ValueError( 

968 'Pseudo Atoms containing cell have bad coords') 

969 

970 # Move mask 

971 if 'move_mask' in fr_cols: 

972 cnstr = images[0]._get_constraints() 

973 if len(cnstr) > 0: 

974 c0 = cnstr[0] 

975 if isinstance(c0, FixAtoms): 

976 cnstr = np.ones((natoms,), dtype=bool) 

977 for idx in c0.index: 

978 cnstr[idx] = False 

979 elif isinstance(c0, FixCartesian): 

980 masks = np.ones((natoms, 3), dtype=bool) 

981 for i in range(len(cnstr)): 

982 idx = cnstr[i].index 

983 masks[idx] = cnstr[i].mask 

984 cnstr = masks 

985 else: 

986 fr_cols.remove('move_mask') 

987 

988 # Collect data to be written out 

989 arrays = {} 

990 for column in fr_cols: 

991 if column == 'positions': 

992 arrays[column] = pos 

993 elif column in atoms.arrays: 

994 arrays[column] = atoms.arrays[column] 

995 elif column == 'symbols': 

996 arrays[column] = np.array(symbols) 

997 elif column == 'move_mask': 

998 arrays[column] = cnstr 

999 else: 

1000 raise ValueError('Missing array "%s"' % column) 

1001 

1002 if write_results: 

1003 for key in per_atom_results: 

1004 if key not in fr_cols: 

1005 fr_cols += [key] 

1006 else: 

1007 warnings.warn('write_xyz() overwriting array "{0}" present ' 

1008 'in atoms.arrays with stored results ' 

1009 'from calculator'.format(key)) 

1010 arrays.update(per_atom_results) 

1011 

1012 comm, ncols, dtype, fmt = output_column_format(atoms, 

1013 fr_cols, 

1014 arrays, 

1015 write_info, 

1016 per_frame_results) 

1017 

1018 if plain or comment != '': 

1019 # override key/value pairs with user-speficied comment string 

1020 comm = comment.rstrip() 

1021 if '\n' in comm: 

1022 raise ValueError('Comment line should not have line breaks.') 

1023 

1024 # Pack fr_cols into record array 

1025 data = np.zeros(natoms, dtype) 

1026 for column, ncol in zip(fr_cols, ncols): 

1027 value = arrays[column] 

1028 if ncol == 1: 

1029 data[column] = np.squeeze(value) 

1030 else: 

1031 for c in range(ncol): 

1032 data[column + str(c)] = value[:, c] 

1033 

1034 nat = natoms 

1035 if vec_cell: 

1036 nat -= nPBC 

1037 # Write the output 

1038 fileobj.write('%d\n' % nat) 

1039 fileobj.write('%s\n' % comm) 

1040 for i in range(natoms): 

1041 fileobj.write(fmt % tuple(data[i])) 

1042 

1043 

1044# create aliases for read/write functions 

1045read_extxyz = read_xyz 

1046write_extxyz = write_xyz