Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Extended XYZ support 

3 

4Read/write files in "extended" XYZ format, storing additional 

5per-configuration information as key-value pairs on the XYZ 

6comment line, and additional per-atom properties as extra columns. 

7 

8Contributed by James Kermode <james.kermode@gmail.com> 

9""" 

10 

11 

12from itertools import islice 

13import re 

14import warnings 

15from io import StringIO, UnsupportedOperation 

16import json 

17 

18import numpy as np 

19import numbers 

20 

21from ase.atoms import Atoms 

22from ase.calculators.calculator import all_properties, Calculator 

23from ase.calculators.singlepoint import SinglePointCalculator 

24from ase.spacegroup.spacegroup import Spacegroup 

25from ase.parallel import paropen 

26from ase.constraints import FixAtoms, FixCartesian 

27from ase.io.formats import index2range 

28from ase.utils import reader 

29 

30__all__ = ['read_xyz', 'write_xyz', 'iread_xyz'] 

31 

32PROPERTY_NAME_MAP = {'positions': 'pos', 

33 'numbers': 'Z', 

34 'charges': 'charge', 

35 'symbols': 'species'} 

36 

37REV_PROPERTY_NAME_MAP = dict(zip(PROPERTY_NAME_MAP.values(), 

38 PROPERTY_NAME_MAP.keys())) 

39 

40KEY_QUOTED_VALUE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_-]*)' 

41 + r'\s*=\s*["\{\}]([^"\{\}]+)["\{\}]\s*') 

42KEY_VALUE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_]*)\s*=' 

43 + r'\s*([^\s]+)\s*') 

44KEY_RE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_-]*)\s*') 

45 

46UNPROCESSED_KEYS = ['uid'] 

47 

48SPECIAL_3_3_KEYS = ['Lattice', 'virial', 'stress'] 

49 

50# partition ase.calculators.calculator.all_properties into two lists: 

51# 'per-atom' and 'per-config' 

52per_atom_properties = ['forces', 'stresses', 'charges', 'magmoms', 'energies'] 

53per_config_properties = ['energy', 'stress', 'dipole', 'magmom', 'free_energy'] 

54 

55 

56def key_val_str_to_dict(string, sep=None): 

57 """ 

58 Parse an xyz properties string in a key=value and return a dict with 

59 various values parsed to native types. 

60 

61 Accepts brackets or quotes to delimit values. Parses integers, floats 

62 booleans and arrays thereof. Arrays with 9 values whose name is listed 

63 in SPECIAL_3_3_KEYS are converted to 3x3 arrays with Fortran ordering. 

64 

65 If sep is None, string will split on whitespace, otherwise will split 

66 key value pairs with the given separator. 

67 

68 """ 

69 # store the closing delimiters to match opening ones 

70 delimiters = { 

71 "'": "'", 

72 '"': '"', 

73 '{': '}', 

74 '[': ']' 

75 } 

76 

77 # Make pairs and process afterwards 

78 kv_pairs = [ 

79 [[]]] # List of characters for each entry, add a new list for new value 

80 cur_delimiter = None # push and pop closing delimiters 

81 escaped = False # add escaped sequences verbatim 

82 

83 # parse character-by-character unless someone can do nested brackets 

84 # and escape sequences in a regex 

85 for char in string.strip(): 

86 if escaped: # bypass everything if escaped 

87 kv_pairs[-1][-1].append(char) 

88 escaped = False 

89 elif char == '\\': # escape the next thing 

90 escaped = True 

91 elif cur_delimiter: # inside brackets 

92 if char == cur_delimiter: # found matching delimiter 

93 cur_delimiter = None 

94 else: 

95 kv_pairs[-1][-1].append(char) # inside quotes, add verbatim 

96 elif char in delimiters: 

97 cur_delimiter = delimiters[char] # brackets or quotes 

98 elif (sep is None and char.isspace()) or char == sep: 

99 if kv_pairs == [[[]]]: # empty, beginning of string 

100 continue 

101 elif kv_pairs[-1][-1] == []: 

102 continue 

103 else: 

104 kv_pairs.append([[]]) 

105 elif char == '=': 

106 if kv_pairs[-1] == [[]]: 

107 del kv_pairs[-1] 

108 kv_pairs[-1].append([]) # value 

109 else: 

110 kv_pairs[-1][-1].append(char) 

111 

112 kv_dict = {} 

113 

114 for kv_pair in kv_pairs: 

115 if len(kv_pair) == 0: # empty line 

116 continue 

117 elif len(kv_pair) == 1: # default to True 

118 key, value = ''.join(kv_pair[0]), 'T' 

119 else: # Smush anything else with kv-splitter '=' between them 

120 key, value = ''.join(kv_pair[0]), '='.join( 

121 ''.join(x) for x in kv_pair[1:]) 

122 

123 if key.lower() not in UNPROCESSED_KEYS: 

124 # Try to convert to (arrays of) floats, ints 

125 split_value = re.findall(r'[^\s,]+', value) 

126 try: 

127 try: 

128 numvalue = np.array(split_value, dtype=int) 

129 except (ValueError, OverflowError): 

130 # don't catch errors here so it falls through to bool 

131 numvalue = np.array(split_value, dtype=float) 

132 if len(numvalue) == 1: 

133 numvalue = numvalue[0] # Only one number 

134 value = numvalue 

135 except (ValueError, OverflowError): 

136 pass # value is unchanged 

137 

138 # convert special 3x3 matrices 

139 if key in SPECIAL_3_3_KEYS: 

140 if not isinstance(value, np.ndarray) or value.shape != (9,): 

141 raise ValueError("Got info item {}, expecting special 3x3 " 

142 "matrix, but value is not in the form of " 

143 "a 9-long numerical vector".format(key)) 

144 value = np.array(value).reshape((3, 3), order='F') 

145 

146 # parse special strings as boolean or JSON 

147 if isinstance(value, str): 

148 # Parse boolean values: 'T' -> True, 'F' -> False, 

149 # 'T T F' -> [True, True, False] 

150 str_to_bool = {'T': True, 'F': False} 

151 

152 try: 

153 boolvalue = [str_to_bool[vpart] for vpart in 

154 re.findall(r'[^\s,]+', value)] 

155 if len(boolvalue) == 1: 

156 value = boolvalue[0] 

157 else: 

158 value = boolvalue 

159 except KeyError: 

160 # parse JSON 

161 if value.startswith("_JSON "): 

162 d = json.loads(value.replace("_JSON ", "", 1)) 

163 value = np.array(d) 

164 if value.dtype.kind not in ['i', 'f', 'b']: 

165 value = d 

166 

167 kv_dict[key] = value 

168 

169 return kv_dict 

170 

171 

172def key_val_str_to_dict_regex(s): 

173 """ 

174 Parse strings in the form 'key1=value1 key2="quoted value"' 

175 """ 

176 d = {} 

177 s = s.strip() 

178 while True: 

179 # Match quoted string first, then fall through to plain key=value 

180 m = KEY_QUOTED_VALUE.match(s) 

181 if m is None: 

182 m = KEY_VALUE.match(s) 

183 if m is not None: 

184 s = KEY_VALUE.sub('', s, 1) 

185 else: 

186 # Just a key with no value 

187 m = KEY_RE.match(s) 

188 if m is not None: 

189 s = KEY_RE.sub('', s, 1) 

190 else: 

191 s = KEY_QUOTED_VALUE.sub('', s, 1) 

192 

193 if m is None: 

194 break # No more matches 

195 

196 key = m.group(1) 

197 try: 

198 value = m.group(2) 

199 except IndexError: 

200 # default value is 'T' (True) 

201 value = 'T' 

202 

203 if key.lower() not in UNPROCESSED_KEYS: 

204 # Try to convert to (arrays of) floats, ints 

205 try: 

206 numvalue = [] 

207 for x in value.split(): 

208 if x.find('.') == -1: 

209 numvalue.append(int(float(x))) 

210 else: 

211 numvalue.append(float(x)) 

212 if len(numvalue) == 1: 

213 numvalue = numvalue[0] # Only one number 

214 elif len(numvalue) == 9: 

215 # special case: 3x3 matrix, fortran ordering 

216 numvalue = np.array(numvalue).reshape((3, 3), order='F') 

217 else: 

218 numvalue = np.array(numvalue) # vector 

219 value = numvalue 

220 except (ValueError, OverflowError): 

221 pass 

222 

223 # Parse boolean values: 'T' -> True, 'F' -> False, 

224 # 'T T F' -> [True, True, False] 

225 if isinstance(value, str): 

226 str_to_bool = {'T': True, 'F': False} 

227 

228 if len(value.split()) > 1: 

229 if all([x in str_to_bool.keys() for x in value.split()]): 

230 value = [str_to_bool[x] for x in value.split()] 

231 elif value in str_to_bool: 

232 value = str_to_bool[value] 

233 

234 d[key] = value 

235 

236 return d 

237 

238 

239def escape(string): 

240 if (' ' in string or 

241 '"' in string or "'" in string or 

242 '{' in string or '}' in string or 

243 '[' in string or ']' in string): 

244 string = string.replace('"', '\\"') 

245 string = '"%s"' % string 

246 return string 

247 

248 

249def key_val_dict_to_str(dct, sep=' '): 

250 """ 

251 Convert atoms.info dictionary to extended XYZ string representation 

252 """ 

253 

254 def array_to_string(key, val): 

255 # some ndarrays are special (special 3x3 keys, and scalars/vectors of 

256 # numbers or bools), handle them here 

257 if key in SPECIAL_3_3_KEYS: 

258 # special 3x3 matrix, flatten in Fortran order 

259 val = val.reshape(val.size, order='F') 

260 if val.dtype.kind in ['i', 'f', 'b']: 

261 # numerical or bool scalars/vectors are special, for backwards 

262 # compat. 

263 if len(val.shape) == 0: 

264 # scalar 

265 val = str(known_types_to_str(val)) 

266 elif len(val.shape) == 1: 

267 # vector 

268 val = ' '.join(str(known_types_to_str(v)) for v in val) 

269 return val 

270 

271 def known_types_to_str(val): 

272 if isinstance(val, bool) or isinstance(val, np.bool_): 

273 return 'T' if val else 'F' 

274 elif isinstance(val, numbers.Real): 

275 return '{}'.format(val) 

276 elif isinstance(val, Spacegroup): 

277 return val.symbol 

278 else: 

279 return val 

280 

281 if len(dct) == 0: 

282 return '' 

283 

284 string = '' 

285 for key in dct: 

286 val = dct[key] 

287 

288 if isinstance(val, np.ndarray): 

289 val = array_to_string(key, val) 

290 else: 

291 # convert any known types to string 

292 val = known_types_to_str(val) 

293 

294 if val is not None and not isinstance(val, str): 

295 # what's left is an object, try using JSON 

296 if isinstance(val, np.ndarray): 

297 val = val.tolist() 

298 try: 

299 val = '_JSON ' + json.dumps(val) 

300 # if this fails, let give up 

301 except TypeError: 

302 warnings.warn('Skipping unhashable information ' 

303 '{0}'.format(key)) 

304 continue 

305 

306 key = escape(key) # escape and quote key 

307 eq = "=" 

308 # Should this really be setting empty value that's going to be 

309 # interpreted as bool True? 

310 if val is None: 

311 val = "" 

312 eq = "" 

313 val = escape(val) # escape and quote val 

314 

315 string += '%s%s%s%s' % (key, eq, val, sep) 

316 

317 return string.strip() 

318 

319 

320def parse_properties(prop_str): 

321 """ 

322 Parse extended XYZ properties format string 

323 

324 Format is "[NAME:TYPE:NCOLS]...]", e.g. "species:S:1:pos:R:3". 

325 NAME is the name of the property. 

326 TYPE is one of R, I, S, L for real, integer, string and logical. 

327 NCOLS is number of columns for that property. 

328 """ 

329 

330 properties = {} 

331 properties_list = [] 

332 dtypes = [] 

333 converters = [] 

334 

335 fields = prop_str.split(':') 

336 

337 def parse_bool(x): 

338 """ 

339 Parse bool to string 

340 """ 

341 return {'T': True, 'F': False, 

342 'True': True, 'False': False}.get(x) 

343 

344 fmt_map = {'R': ('d', float), 

345 'I': ('i', int), 

346 'S': (object, str), 

347 'L': ('bool', parse_bool)} 

348 

349 for name, ptype, cols in zip(fields[::3], 

350 fields[1::3], 

351 [int(x) for x in fields[2::3]]): 

352 if ptype not in ('R', 'I', 'S', 'L'): 

353 raise ValueError('Unknown property type: ' + ptype) 

354 ase_name = REV_PROPERTY_NAME_MAP.get(name, name) 

355 

356 dtype, converter = fmt_map[ptype] 

357 if cols == 1: 

358 dtypes.append((name, dtype)) 

359 converters.append(converter) 

360 else: 

361 for c in range(cols): 

362 dtypes.append((name + str(c), dtype)) 

363 converters.append(converter) 

364 

365 properties[name] = (ase_name, cols) 

366 properties_list.append(name) 

367 

368 dtype = np.dtype(dtypes) 

369 return properties, properties_list, dtype, converters 

370 

371 

372def _read_xyz_frame(lines, natoms, properties_parser=key_val_str_to_dict, 

373 nvec=0): 

374 # comment line 

375 line = next(lines).strip() 

376 if nvec > 0: 

377 info = {'comment': line} 

378 else: 

379 info = properties_parser(line) if line else {} 

380 

381 pbc = None 

382 if 'pbc' in info: 

383 pbc = info['pbc'] 

384 del info['pbc'] 

385 elif 'Lattice' in info: 

386 # default pbc for extxyz file containing Lattice 

387 # is True in all directions 

388 pbc = [True, True, True] 

389 elif nvec > 0: 

390 # cell information given as pseudo-Atoms 

391 pbc = [False, False, False] 

392 

393 cell = None 

394 if 'Lattice' in info: 

395 # NB: ASE cell is transpose of extended XYZ lattice 

396 cell = info['Lattice'].T 

397 del info['Lattice'] 

398 elif nvec > 0: 

399 # cell information given as pseudo-Atoms 

400 cell = np.zeros((3, 3)) 

401 

402 if 'Properties' not in info: 

403 # Default set of properties is atomic symbols and positions only 

404 info['Properties'] = 'species:S:1:pos:R:3' 

405 properties, names, dtype, convs = parse_properties(info['Properties']) 

406 del info['Properties'] 

407 

408 data = [] 

409 for ln in range(natoms): 

410 try: 

411 line = next(lines) 

412 except StopIteration: 

413 raise XYZError('ase.io.extxyz: Frame has {} atoms, expected {}' 

414 .format(len(data), natoms)) 

415 vals = line.split() 

416 row = tuple([conv(val) for conv, val in zip(convs, vals)]) 

417 data.append(row) 

418 

419 try: 

420 data = np.array(data, dtype) 

421 except TypeError: 

422 raise XYZError('Badly formatted data ' 

423 'or end of file reached before end of frame') 

424 

425 # Read VEC entries if present 

426 if nvec > 0: 

427 for ln in range(nvec): 

428 try: 

429 line = next(lines) 

430 except StopIteration: 

431 raise XYZError('ase.io.adfxyz: Frame has {} cell vectors, ' 

432 'expected {}'.format(len(cell), nvec)) 

433 entry = line.split() 

434 

435 if not entry[0].startswith('VEC'): 

436 raise XYZError('Expected cell vector, got {}'.format(entry[0])) 

437 

438 try: 

439 n = int(entry[0][3:]) 

440 except ValueError as e: 

441 raise XYZError('Expected VEC{}, got VEC{}' 

442 .format(ln + 1, entry[0][3:])) from e 

443 if n != ln + 1: 

444 raise XYZError('Expected VEC{}, got VEC{}' 

445 .format(ln + 1, n)) 

446 

447 cell[ln] = np.array([float(x) for x in entry[1:]]) 

448 pbc[ln] = True 

449 if nvec != pbc.count(True): 

450 raise XYZError('Problem with number of cell vectors') 

451 pbc = tuple(pbc) 

452 

453 arrays = {} 

454 for name in names: 

455 ase_name, cols = properties[name] 

456 if cols == 1: 

457 value = data[name] 

458 else: 

459 value = np.vstack([data[name + str(c)] 

460 for c in range(cols)]).T 

461 arrays[ase_name] = value 

462 

463 symbols = None 

464 if 'symbols' in arrays: 

465 symbols = [s.capitalize() for s in arrays['symbols']] 

466 del arrays['symbols'] 

467 

468 numbers = None 

469 duplicate_numbers = None 

470 if 'numbers' in arrays: 

471 if symbols is None: 

472 numbers = arrays['numbers'] 

473 else: 

474 duplicate_numbers = arrays['numbers'] 

475 del arrays['numbers'] 

476 

477 initial_charges = None 

478 if 'initial_charges' in arrays: 

479 initial_charges = arrays['initial_charges'] 

480 del arrays['initial_charges'] 

481 

482 positions = None 

483 if 'positions' in arrays: 

484 positions = arrays['positions'] 

485 del arrays['positions'] 

486 

487 atoms = Atoms(symbols=symbols, 

488 positions=positions, 

489 numbers=numbers, 

490 charges=initial_charges, 

491 cell=cell, 

492 pbc=pbc, 

493 info=info) 

494 

495 # Read and set constraints 

496 if 'move_mask' in arrays: 

497 move_mask = arrays['move_mask'].astype(bool) 

498 if properties['move_mask'][1] == 3: 

499 cons = [] 

500 for a in range(natoms): 

501 cons.append(FixCartesian(a, mask=~move_mask[a, :])) 

502 atoms.set_constraint(cons) 

503 elif properties['move_mask'][1] == 1: 

504 atoms.set_constraint(FixAtoms(mask=~move_mask)) 

505 else: 

506 raise XYZError('Not implemented constraint') 

507 del arrays['move_mask'] 

508 

509 for name, array in arrays.items(): 

510 atoms.new_array(name, array) 

511 

512 if duplicate_numbers is not None: 

513 atoms.set_atomic_numbers(duplicate_numbers) 

514 

515 # Load results of previous calculations into SinglePointCalculator 

516 results = {} 

517 for key in list(atoms.info.keys()): 

518 if key in per_config_properties: 

519 results[key] = atoms.info[key] 

520 # special case for stress- convert to Voigt 6-element form 

521 if key == 'stress' and results[key].shape == (3, 3): 

522 stress = results[key] 

523 stress = np.array([stress[0, 0], 

524 stress[1, 1], 

525 stress[2, 2], 

526 stress[1, 2], 

527 stress[0, 2], 

528 stress[0, 1]]) 

529 results[key] = stress 

530 for key in list(atoms.arrays.keys()): 

531 if (key in per_atom_properties and len(value.shape) >= 1 

532 and value.shape[0] == len(atoms)): 

533 results[key] = atoms.arrays[key] 

534 if results != {}: 

535 calculator = SinglePointCalculator(atoms, **results) 

536 atoms.calc = calculator 

537 return atoms 

538 

539 

540class XYZError(IOError): 

541 pass 

542 

543 

544class XYZChunk: 

545 def __init__(self, lines, natoms): 

546 self.lines = lines 

547 self.natoms = natoms 

548 

549 def build(self): 

550 """Convert unprocessed chunk into Atoms.""" 

551 return _read_xyz_frame(iter(self.lines), self.natoms) 

552 

553 

554def ixyzchunks(fd): 

555 """Yield unprocessed chunks (header, lines) for each xyz image.""" 

556 while True: 

557 line = next(fd).strip() # Raises StopIteration on empty file 

558 try: 

559 natoms = int(line) 

560 except ValueError: 

561 raise XYZError('Expected integer, found "{0}"'.format(line)) 

562 try: 

563 lines = [next(fd) for _ in range(1 + natoms)] 

564 except StopIteration: 

565 raise XYZError('Incomplete XYZ chunk') 

566 yield XYZChunk(lines, natoms) 

567 

568 

569class ImageIterator: 

570 """""" 

571 

572 def __init__(self, ichunks): 

573 self.ichunks = ichunks 

574 

575 def __call__(self, fd, indices=-1): 

576 if not hasattr(indices, 'start'): 

577 if indices < 0: 

578 indices = slice(indices - 1, indices) 

579 else: 

580 indices = slice(indices, indices + 1) 

581 

582 for chunk in self._getslice(fd, indices): 

583 yield chunk.build() 

584 

585 def _getslice(self, fd, indices): 

586 try: 

587 iterator = islice(self.ichunks(fd), indices.start, indices.stop, 

588 indices.step) 

589 except ValueError: 

590 # Negative indices. Go through the whole thing to get the length, 

591 # which allows us to evaluate the slice, and then read it again 

592 startpos = fd.tell() 

593 nchunks = 0 

594 for chunk in self.ichunks(fd): 

595 nchunks += 1 

596 fd.seek(startpos) 

597 indices_tuple = indices.indices(nchunks) 

598 iterator = islice(self.ichunks(fd), *indices_tuple) 

599 return iterator 

600 

601 

602iread_xyz = ImageIterator(ixyzchunks) 

603 

604 

605@reader 

606def read_xyz(fileobj, index=-1, properties_parser=key_val_str_to_dict): 

607 r""" 

608 Read from a file in Extended XYZ format 

609 

610 index is the frame to read, default is last frame (index=-1). 

611 properties_parser is the parse to use when converting the properties line 

612 to a dictionary, ``extxyz.key_val_str_to_dict`` is the default and can 

613 deal with most use cases, ``extxyz.key_val_str_to_dict_regex`` is slightly 

614 faster but has fewer features. 

615 

616 Extended XYZ format is an enhanced version of the `basic XYZ format 

617 <http://en.wikipedia.org/wiki/XYZ_file_format>`_ that allows extra 

618 columns to be present in the file for additonal per-atom properties as 

619 well as standardising the format of the comment line to include the 

620 cell lattice and other per-frame parameters. 

621 

622 It's easiest to describe the format with an example. Here is a 

623 standard XYZ file containing a bulk cubic 8 atom silicon cell :: 

624 

625 8 

626 Cubic bulk silicon cell 

627 Si 0.00000000 0.00000000 0.00000000 

628 Si 1.36000000 1.36000000 1.36000000 

629 Si 2.72000000 2.72000000 0.00000000 

630 Si 4.08000000 4.08000000 1.36000000 

631 Si 2.72000000 0.00000000 2.72000000 

632 Si 4.08000000 1.36000000 4.08000000 

633 Si 0.00000000 2.72000000 2.72000000 

634 Si 1.36000000 4.08000000 4.08000000 

635 

636 The first line is the number of atoms, followed by a comment and 

637 then one line per atom, giving the element symbol and cartesian 

638 x y, and z coordinates in Angstroms. 

639 

640 Here's the same configuration in extended XYZ format :: 

641 

642 8 

643 Lattice="5.44 0.0 0.0 0.0 5.44 0.0 0.0 0.0 5.44" Properties=species:S:1:pos:R:3 Time=0.0 

644 Si 0.00000000 0.00000000 0.00000000 

645 Si 1.36000000 1.36000000 1.36000000 

646 Si 2.72000000 2.72000000 0.00000000 

647 Si 4.08000000 4.08000000 1.36000000 

648 Si 2.72000000 0.00000000 2.72000000 

649 Si 4.08000000 1.36000000 4.08000000 

650 Si 0.00000000 2.72000000 2.72000000 

651 Si 1.36000000 4.08000000 4.08000000 

652 

653 In extended XYZ format, the comment line is replaced by a series of 

654 key/value pairs. The keys should be strings and values can be 

655 integers, reals, logicals (denoted by `T` and `F` for true and false) 

656 or strings. Quotes are required if a value contains any spaces (like 

657 `Lattice` above). There are two mandatory parameters that any 

658 extended XYZ: `Lattice` and `Properties`. Other parameters -- 

659 e.g. `Time` in the example above --- can be added to the parameter line 

660 as needed. 

661 

662 `Lattice` is a Cartesian 3x3 matrix representation of the cell 

663 vectors, with each vector stored as a column and the 9 values listed in 

664 Fortran column-major order, i.e. in the form :: 

665 

666 Lattice="R1x R1y R1z R2x R2y R2z R3x R3y R3z" 

667 

668 where `R1x R1y R1z` are the Cartesian x-, y- and z-components of the 

669 first lattice vector (:math:`\mathbf{a}`), `R2x R2y R2z` those of the second 

670 lattice vector (:math:`\mathbf{b}`) and `R3x R3y R3z` those of the 

671 third lattice vector (:math:`\mathbf{c}`). 

672 

673 The list of properties in the file is described by the `Properties` 

674 parameter, which should take the form of a series of colon separated 

675 triplets giving the name, format (`R` for real, `I` for integer) and 

676 number of columns of each property. For example:: 

677 

678 Properties="species:S:1:pos:R:3:vel:R:3:select:I:1" 

679 

680 indicates the first column represents atomic species, the next three 

681 columns represent atomic positions, the next three velcoities, and the 

682 last is an single integer called `select`. With this property 

683 definition, the line :: 

684 

685 Si 4.08000000 4.08000000 1.36000000 0.00000000 0.00000000 0.00000000 1 

686 

687 would describe a silicon atom at position (4.08,4.08,1.36) with zero 

688 velocity and the `select` property set to 1. 

689 

690 The property names `pos`, `Z`, `mass`, and `charge` map to ASE 

691 :attr:`ase.atoms.Atoms.arrays` entries named 

692 `positions`, `numbers`, `masses` and `charges` respectively. 

693 

694 Additional key-value pairs in the comment line are parsed into the 

695 :attr:`ase.Atoms.atoms.info` dictionary, with the following conventions 

696 

697 - Values can be quoted with `""`, `''`, `[]` or `{}` (the latter are 

698 included to ease command-line usage as the `{}` are not treated 

699 specially by the shell) 

700 - Quotes within keys or values can be escaped with `\"`. 

701 - Keys with special names `stress` or `virial` are treated as 3x3 matrices 

702 in Fortran order, as for `Lattice` above. 

703 - Otherwise, values with multiple elements are treated as 1D arrays, first 

704 assuming integer format and falling back to float if conversion is 

705 unsuccessful. 

706 - A missing value defaults to `True`, e.g. the comment line 

707 `"cutoff=3.4 have_energy"` leads to 

708 `{'cutoff': 3.4, 'have_energy': True}` in `atoms.info`. 

709 - Value strings starting with `"_JSON"` are interpreted as JSON content; 

710 similarly, when writing, anything which does not match the criteria above 

711 is serialised as JSON. 

712 

713 The extended XYZ format is also supported by the 

714 the `Ovito <http://www.ovito.org>`_ visualisation tool 

715 (from `v2.4 beta 

716 <http://www.ovito.org/index.php/component/content/article?id=25>`_ 

717 onwards). 

718 """ # noqa: E501 

719 

720 if not isinstance(index, int) and not isinstance(index, slice): 

721 raise TypeError('Index argument is neither slice nor integer!') 

722 

723 # If possible, build a partial index up to the last frame required 

724 last_frame = None 

725 if isinstance(index, int) and index >= 0: 

726 last_frame = index 

727 elif isinstance(index, slice): 

728 if index.stop is not None and index.stop >= 0: 

729 last_frame = index.stop 

730 

731 # scan through file to find where the frames start 

732 try: 

733 fileobj.seek(0) 

734 except UnsupportedOperation: 

735 fileobj = StringIO(fileobj.read()) 

736 fileobj.seek(0) 

737 frames = [] 

738 while True: 

739 frame_pos = fileobj.tell() 

740 line = fileobj.readline() 

741 if line.strip() == '': 

742 break 

743 try: 

744 natoms = int(line) 

745 except ValueError as err: 

746 raise XYZError('ase.io.extxyz: Expected xyz header but got: {}' 

747 .format(err)) 

748 fileobj.readline() # read comment line 

749 for i in range(natoms): 

750 fileobj.readline() 

751 # check for VEC 

752 nvec = 0 

753 while True: 

754 lastPos = fileobj.tell() 

755 line = fileobj.readline() 

756 if line.lstrip().startswith('VEC'): 

757 nvec += 1 

758 if nvec > 3: 

759 raise XYZError('ase.io.extxyz: More than 3 VECX entries') 

760 else: 

761 fileobj.seek(lastPos) 

762 break 

763 frames.append((frame_pos, natoms, nvec)) 

764 if last_frame is not None and len(frames) > last_frame: 

765 break 

766 

767 trbl = index2range(index, len(frames)) 

768 

769 for index in trbl: 

770 frame_pos, natoms, nvec = frames[index] 

771 fileobj.seek(frame_pos) 

772 # check for consistency with frame index table 

773 assert int(fileobj.readline()) == natoms 

774 yield _read_xyz_frame(fileobj, natoms, properties_parser, nvec) 

775 

776 

777def output_column_format(atoms, columns, arrays, 

778 write_info=True, results=None): 

779 """ 

780 Helper function to build extended XYZ comment line 

781 """ 

782 fmt_map = {'d': ('R', '%16.8f'), 

783 'f': ('R', '%16.8f'), 

784 'i': ('I', '%8d'), 

785 'O': ('S', '%s'), 

786 'S': ('S', '%s'), 

787 'U': ('S', '%-2s'), 

788 'b': ('L', ' %.1s')} 

789 

790 # NB: Lattice is stored as tranpose of ASE cell, 

791 # with Fortran array ordering 

792 lattice_str = ('Lattice="' 

793 + ' '.join([str(x) for x in np.reshape(atoms.cell.T, 

794 9, order='F')]) + 

795 '"') 

796 

797 property_names = [] 

798 property_types = [] 

799 property_ncols = [] 

800 dtypes = [] 

801 formats = [] 

802 

803 for column in columns: 

804 array = arrays[column] 

805 dtype = array.dtype 

806 

807 property_name = PROPERTY_NAME_MAP.get(column, column) 

808 property_type, fmt = fmt_map[dtype.kind] 

809 property_names.append(property_name) 

810 property_types.append(property_type) 

811 

812 if (len(array.shape) == 1 

813 or (len(array.shape) == 2 and array.shape[1] == 1)): 

814 ncol = 1 

815 dtypes.append((column, dtype)) 

816 else: 

817 ncol = array.shape[1] 

818 for c in range(ncol): 

819 dtypes.append((column + str(c), dtype)) 

820 

821 formats.extend([fmt] * ncol) 

822 property_ncols.append(ncol) 

823 

824 props_str = ':'.join([':'.join(x) for x in 

825 zip(property_names, 

826 property_types, 

827 [str(nc) for nc in property_ncols])]) 

828 

829 comment_str = '' 

830 if atoms.cell.any(): 

831 comment_str += lattice_str + ' ' 

832 comment_str += 'Properties={}'.format(props_str) 

833 

834 info = {} 

835 if write_info: 

836 info.update(atoms.info) 

837 if results is not None: 

838 info.update(results) 

839 info['pbc'] = atoms.get_pbc() # always save periodic boundary conditions 

840 comment_str += ' ' + key_val_dict_to_str(info) 

841 

842 dtype = np.dtype(dtypes) 

843 fmt = ' '.join(formats) + '\n' 

844 

845 return comment_str, property_ncols, dtype, fmt 

846 

847 

848def write_xyz(fileobj, images, comment='', columns=None, 

849 write_info=True, 

850 write_results=True, plain=False, vec_cell=False, 

851 append=False): 

852 """ 

853 Write output in extended XYZ format 

854 

855 Optionally, specify which columns (arrays) to include in output, 

856 whether to write the contents of the `atoms.info` dict to the 

857 XYZ comment line (default is True), the results of any 

858 calculator attached to this Atoms. The `plain` argument 

859 can be used to write a simple XYZ file with no additional information. 

860 `vec_cell` can be used to write the cell vectors as additional 

861 pseudo-atoms. If `append` is set to True, the file is for append (mode `a`), 

862 otherwise it is overwritten (mode `w`). 

863 

864 See documentation for :func:`read_xyz()` for further details of the extended 

865 XYZ file format. 

866 """ 

867 if isinstance(fileobj, str): 

868 mode = 'w' 

869 if append: 

870 mode = 'a' 

871 fileobj = paropen(fileobj, mode) 

872 

873 if hasattr(images, 'get_positions'): 

874 images = [images] 

875 

876 for atoms in images: 

877 natoms = len(atoms) 

878 

879 if columns is None: 

880 fr_cols = None 

881 else: 

882 fr_cols = columns[:] 

883 

884 if fr_cols is None: 

885 fr_cols = (['symbols', 'positions'] 

886 + [key for key in atoms.arrays.keys() if 

887 key not in ['symbols', 'positions', 'numbers', 

888 'species', 'pos']]) 

889 

890 if vec_cell: 

891 plain = True 

892 

893 if plain: 

894 fr_cols = ['symbols', 'positions'] 

895 write_info = False 

896 write_results = False 

897 

898 per_frame_results = {} 

899 per_atom_results = {} 

900 if write_results: 

901 calculator = atoms.calc 

902 if (calculator is not None 

903 and isinstance(calculator, Calculator)): 

904 for key in all_properties: 

905 value = calculator.results.get(key, None) 

906 if value is None: 

907 # skip missing calculator results 

908 continue 

909 if (key in per_atom_properties and len(value.shape) >= 1 

910 and value.shape[0] == len(atoms)): 

911 # per-atom quantities (forces, energies, stresses) 

912 per_atom_results[key] = value 

913 elif key in per_config_properties: 

914 # per-frame quantities (energy, stress) 

915 # special case for stress, which should be converted 

916 # to 3x3 matrices before writing 

917 if key == 'stress': 

918 xx, yy, zz, yz, xz, xy = value 

919 value = np.array( 

920 [(xx, xy, xz), (xy, yy, yz), (xz, yz, zz)]) 

921 per_frame_results[key] = value 

922 

923 # Move symbols and positions to first two properties 

924 if 'symbols' in fr_cols: 

925 i = fr_cols.index('symbols') 

926 fr_cols[0], fr_cols[i] = fr_cols[i], fr_cols[0] 

927 

928 if 'positions' in fr_cols: 

929 i = fr_cols.index('positions') 

930 fr_cols[1], fr_cols[i] = fr_cols[i], fr_cols[1] 

931 

932 # Check first column "looks like" atomic symbols 

933 if fr_cols[0] in atoms.arrays: 

934 symbols = atoms.arrays[fr_cols[0]] 

935 else: 

936 symbols = atoms.get_chemical_symbols() 

937 

938 if natoms > 0 and not isinstance(symbols[0], str): 

939 raise ValueError('First column must be symbols-like') 

940 

941 # Check second column "looks like" atomic positions 

942 pos = atoms.arrays[fr_cols[1]] 

943 if pos.shape != (natoms, 3) or pos.dtype.kind != 'f': 

944 raise ValueError('Second column must be position-like') 

945 

946 # if vec_cell add cell information as pseudo-atoms 

947 if vec_cell: 

948 pbc = list(atoms.get_pbc()) 

949 cell = atoms.get_cell() 

950 

951 if True in pbc: 

952 nPBC = 0 

953 for i, b in enumerate(pbc): 

954 if b: 

955 nPBC += 1 

956 symbols.append('VEC' + str(nPBC)) 

957 pos = np.vstack((pos, cell[i])) 

958 # add to natoms 

959 natoms += nPBC 

960 if pos.shape != (natoms, 3) or pos.dtype.kind != 'f': 

961 raise ValueError( 

962 'Pseudo Atoms containing cell have bad coords') 

963 

964 # Move mask 

965 if 'move_mask' in fr_cols: 

966 cnstr = images[0]._get_constraints() 

967 if len(cnstr) > 0: 

968 c0 = cnstr[0] 

969 if isinstance(c0, FixAtoms): 

970 cnstr = np.ones((natoms,), dtype=bool) 

971 for idx in c0.index: 

972 cnstr[idx] = False 

973 elif isinstance(c0, FixCartesian): 

974 masks = np.ones((natoms, 3), dtype=bool) 

975 for i in range(len(cnstr)): 

976 idx = cnstr[i].index 

977 masks[idx] = cnstr[i].mask 

978 cnstr = masks 

979 else: 

980 fr_cols.remove('move_mask') 

981 

982 # Collect data to be written out 

983 arrays = {} 

984 for column in fr_cols: 

985 if column == 'positions': 

986 arrays[column] = pos 

987 elif column in atoms.arrays: 

988 arrays[column] = atoms.arrays[column] 

989 elif column == 'symbols': 

990 arrays[column] = np.array(symbols) 

991 elif column == 'move_mask': 

992 arrays[column] = cnstr 

993 else: 

994 raise ValueError('Missing array "%s"' % column) 

995 

996 if write_results: 

997 for key in per_atom_results: 

998 if key not in fr_cols: 

999 fr_cols += [key] 

1000 else: 

1001 warnings.warn('write_xyz() overwriting array "{0}" present ' 

1002 'in atoms.arrays with stored results ' 

1003 'from calculator'.format(key)) 

1004 arrays.update(per_atom_results) 

1005 

1006 comm, ncols, dtype, fmt = output_column_format(atoms, 

1007 fr_cols, 

1008 arrays, 

1009 write_info, 

1010 per_frame_results) 

1011 

1012 if plain or comment != '': 

1013 # override key/value pairs with user-speficied comment string 

1014 comm = comment.rstrip() 

1015 if '\n' in comm: 

1016 raise ValueError('Comment line should not have line breaks.') 

1017 

1018 # Pack fr_cols into record array 

1019 data = np.zeros(natoms, dtype) 

1020 for column, ncol in zip(fr_cols, ncols): 

1021 value = arrays[column] 

1022 if ncol == 1: 

1023 data[column] = np.squeeze(value) 

1024 else: 

1025 for c in range(ncol): 

1026 data[column + str(c)] = value[:, c] 

1027 

1028 nat = natoms 

1029 if vec_cell: 

1030 nat -= nPBC 

1031 # Write the output 

1032 fileobj.write('%d\n' % nat) 

1033 fileobj.write('%s\n' % comm) 

1034 for i in range(natoms): 

1035 fileobj.write(fmt % tuple(data[i])) 

1036 

1037 

1038# create aliases for read/write functions 

1039read_extxyz = read_xyz 

1040write_extxyz = write_xyz