Coverage for /builds/ase/ase/ase/io/extxyz.py : 78.71%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Extended XYZ support
4Read/write files in "extended" XYZ format, storing additional
5per-configuration information as key-value pairs on the XYZ
6comment line, and additional per-atom properties as extra columns.
8Contributed by James Kermode <james.kermode@gmail.com>
9"""
12from itertools import islice
13import re
14import warnings
15from io import StringIO, UnsupportedOperation
16import json
18import numpy as np
19import numbers
21from ase.atoms import Atoms
22from ase.calculators.calculator import all_properties, BaseCalculator
23from ase.calculators.singlepoint import SinglePointCalculator
24from ase.spacegroup.spacegroup import Spacegroup
25from ase.parallel import paropen
26from ase.constraints import FixAtoms, FixCartesian
27from ase.io.formats import index2range
28from ase.utils import reader
30__all__ = ['read_xyz', 'write_xyz', 'iread_xyz']
32PROPERTY_NAME_MAP = {'positions': 'pos',
33 'numbers': 'Z',
34 'charges': 'charge',
35 'symbols': 'species'}
37REV_PROPERTY_NAME_MAP = dict(zip(PROPERTY_NAME_MAP.values(),
38 PROPERTY_NAME_MAP.keys()))
40KEY_QUOTED_VALUE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_-]*)'
41 + r'\s*=\s*["\{\}]([^"\{\}]+)["\{\}]\s*')
42KEY_VALUE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_]*)\s*='
43 + r'\s*([^\s]+)\s*')
44KEY_RE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_-]*)\s*')
46UNPROCESSED_KEYS = ['uid']
48SPECIAL_3_3_KEYS = ['Lattice', 'virial', 'stress']
50# partition ase.calculators.calculator.all_properties into two lists:
51# 'per-atom' and 'per-config'
52per_atom_properties = ['forces', 'stresses', 'charges', 'magmoms', 'energies']
53per_config_properties = ['energy', 'stress', 'dipole', 'magmom', 'free_energy']
56def key_val_str_to_dict(string, sep=None):
57 """
58 Parse an xyz properties string in a key=value and return a dict with
59 various values parsed to native types.
61 Accepts brackets or quotes to delimit values. Parses integers, floats
62 booleans and arrays thereof. Arrays with 9 values whose name is listed
63 in SPECIAL_3_3_KEYS are converted to 3x3 arrays with Fortran ordering.
65 If sep is None, string will split on whitespace, otherwise will split
66 key value pairs with the given separator.
68 """
69 # store the closing delimiters to match opening ones
70 delimiters = {
71 "'": "'",
72 '"': '"',
73 '{': '}',
74 '[': ']'
75 }
77 # Make pairs and process afterwards
78 kv_pairs = [
79 [[]]] # List of characters for each entry, add a new list for new value
80 cur_delimiter = None # push and pop closing delimiters
81 escaped = False # add escaped sequences verbatim
83 # parse character-by-character unless someone can do nested brackets
84 # and escape sequences in a regex
85 for char in string.strip():
86 if escaped: # bypass everything if escaped
87 kv_pairs[-1][-1].append(char)
88 escaped = False
89 elif char == '\\': # escape the next thing
90 escaped = True
91 elif cur_delimiter: # inside brackets
92 if char == cur_delimiter: # found matching delimiter
93 cur_delimiter = None
94 else:
95 kv_pairs[-1][-1].append(char) # inside quotes, add verbatim
96 elif char in delimiters:
97 cur_delimiter = delimiters[char] # brackets or quotes
98 elif (sep is None and char.isspace()) or char == sep:
99 if kv_pairs == [[[]]]: # empty, beginning of string
100 continue
101 elif kv_pairs[-1][-1] == []:
102 continue
103 else:
104 kv_pairs.append([[]])
105 elif char == '=':
106 if kv_pairs[-1] == [[]]:
107 del kv_pairs[-1]
108 kv_pairs[-1].append([]) # value
109 else:
110 kv_pairs[-1][-1].append(char)
112 kv_dict = {}
114 for kv_pair in kv_pairs:
115 if len(kv_pair) == 0: # empty line
116 continue
117 elif len(kv_pair) == 1: # default to True
118 key, value = ''.join(kv_pair[0]), 'T'
119 else: # Smush anything else with kv-splitter '=' between them
120 key, value = ''.join(kv_pair[0]), '='.join(
121 ''.join(x) for x in kv_pair[1:])
123 if key.lower() not in UNPROCESSED_KEYS:
124 # Try to convert to (arrays of) floats, ints
125 split_value = re.findall(r'[^\s,]+', value)
126 try:
127 try:
128 numvalue = np.array(split_value, dtype=int)
129 except (ValueError, OverflowError):
130 # don't catch errors here so it falls through to bool
131 numvalue = np.array(split_value, dtype=float)
132 if len(numvalue) == 1:
133 numvalue = numvalue[0] # Only one number
134 value = numvalue
135 except (ValueError, OverflowError):
136 pass # value is unchanged
138 # convert special 3x3 matrices
139 if key in SPECIAL_3_3_KEYS:
140 if not isinstance(value, np.ndarray) or value.shape != (9,):
141 raise ValueError("Got info item {}, expecting special 3x3 "
142 "matrix, but value is not in the form of "
143 "a 9-long numerical vector".format(key))
144 value = np.array(value).reshape((3, 3), order='F')
146 # parse special strings as boolean or JSON
147 if isinstance(value, str):
148 # Parse boolean values:
149 # T or [tT]rue or TRUE -> True
150 # F or [fF]alse or FALSE -> False
151 # For list: 'T T F' -> [True, True, False]
152 # Cannot use `.lower()` to reduce `str_to_bool` mapping because
153 # 't'/'f' not accepted
154 str_to_bool = {
155 'T': True, 'F': False, 'true': True, 'false': False,
156 'True': True, 'False': False, 'TRUE': True, 'FALSE': False
157 }
158 try:
159 boolvalue = [str_to_bool[vpart] for vpart in
160 re.findall(r'[^\s,]+', value)]
162 if len(boolvalue) == 1:
163 value = boolvalue[0]
164 else:
165 value = boolvalue
166 except KeyError:
167 # Try to parse JSON
168 if value.startswith("_JSON "):
169 d = json.loads(value.replace("_JSON ", "", 1))
170 value = np.array(d)
171 if value.dtype.kind not in ['i', 'f', 'b']:
172 value = d
174 kv_dict[key] = value
176 return kv_dict
179def key_val_str_to_dict_regex(s):
180 """
181 Parse strings in the form 'key1=value1 key2="quoted value"'
182 """
183 d = {}
184 s = s.strip()
185 while True:
186 # Match quoted string first, then fall through to plain key=value
187 m = KEY_QUOTED_VALUE.match(s)
188 if m is None:
189 m = KEY_VALUE.match(s)
190 if m is not None:
191 s = KEY_VALUE.sub('', s, 1)
192 else:
193 # Just a key with no value
194 m = KEY_RE.match(s)
195 if m is not None:
196 s = KEY_RE.sub('', s, 1)
197 else:
198 s = KEY_QUOTED_VALUE.sub('', s, 1)
200 if m is None:
201 break # No more matches
203 key = m.group(1)
204 try:
205 value = m.group(2)
206 except IndexError:
207 # default value is 'T' (True)
208 value = 'T'
210 if key.lower() not in UNPROCESSED_KEYS:
211 # Try to convert to (arrays of) floats, ints
212 try:
213 numvalue = []
214 for x in value.split():
215 if x.find('.') == -1:
216 numvalue.append(int(float(x)))
217 else:
218 numvalue.append(float(x))
219 if len(numvalue) == 1:
220 numvalue = numvalue[0] # Only one number
221 elif len(numvalue) == 9:
222 # special case: 3x3 matrix, fortran ordering
223 numvalue = np.array(numvalue).reshape((3, 3), order='F')
224 else:
225 numvalue = np.array(numvalue) # vector
226 value = numvalue
227 except (ValueError, OverflowError):
228 pass
230 # Parse boolean values: 'T' -> True, 'F' -> False,
231 # 'T T F' -> [True, True, False]
232 if isinstance(value, str):
233 str_to_bool = {'T': True, 'F': False}
235 if len(value.split()) > 1:
236 if all([x in str_to_bool.keys() for x in value.split()]):
237 value = [str_to_bool[x] for x in value.split()]
238 elif value in str_to_bool:
239 value = str_to_bool[value]
241 d[key] = value
243 return d
246def escape(string):
247 if (' ' in string or
248 '"' in string or "'" in string or
249 '{' in string or '}' in string or
250 '[' in string or ']' in string):
251 string = string.replace('"', '\\"')
252 string = '"%s"' % string
253 return string
256def key_val_dict_to_str(dct, sep=' '):
257 """
258 Convert atoms.info dictionary to extended XYZ string representation
259 """
261 def array_to_string(key, val):
262 # some ndarrays are special (special 3x3 keys, and scalars/vectors of
263 # numbers or bools), handle them here
264 if key in SPECIAL_3_3_KEYS:
265 # special 3x3 matrix, flatten in Fortran order
266 val = val.reshape(val.size, order='F')
267 if val.dtype.kind in ['i', 'f', 'b']:
268 # numerical or bool scalars/vectors are special, for backwards
269 # compat.
270 if len(val.shape) == 0:
271 # scalar
272 val = str(known_types_to_str(val))
273 elif len(val.shape) == 1:
274 # vector
275 val = ' '.join(str(known_types_to_str(v)) for v in val)
276 return val
278 def known_types_to_str(val):
279 if isinstance(val, bool) or isinstance(val, np.bool_):
280 return 'T' if val else 'F'
281 elif isinstance(val, numbers.Real):
282 return '{}'.format(val)
283 elif isinstance(val, Spacegroup):
284 return val.symbol
285 else:
286 return val
288 if len(dct) == 0:
289 return ''
291 string = ''
292 for key in dct:
293 val = dct[key]
295 if isinstance(val, np.ndarray):
296 val = array_to_string(key, val)
297 else:
298 # convert any known types to string
299 val = known_types_to_str(val)
301 if val is not None and not isinstance(val, str):
302 # what's left is an object, try using JSON
303 if isinstance(val, np.ndarray):
304 val = val.tolist()
305 try:
306 val = '_JSON ' + json.dumps(val)
307 # if this fails, let give up
308 except TypeError:
309 warnings.warn('Skipping unhashable information '
310 '{0}'.format(key))
311 continue
313 key = escape(key) # escape and quote key
314 eq = "="
315 # Should this really be setting empty value that's going to be
316 # interpreted as bool True?
317 if val is None:
318 val = ""
319 eq = ""
320 val = escape(val) # escape and quote val
322 string += '%s%s%s%s' % (key, eq, val, sep)
324 return string.strip()
327def parse_properties(prop_str):
328 """
329 Parse extended XYZ properties format string
331 Format is "[NAME:TYPE:NCOLS]...]", e.g. "species:S:1:pos:R:3".
332 NAME is the name of the property.
333 TYPE is one of R, I, S, L for real, integer, string and logical.
334 NCOLS is number of columns for that property.
335 """
337 properties = {}
338 properties_list = []
339 dtypes = []
340 converters = []
342 fields = prop_str.split(':')
344 def parse_bool(x):
345 """
346 Parse bool to string
347 """
348 return {'T': True, 'F': False,
349 'True': True, 'False': False}.get(x)
351 fmt_map = {'R': ('d', float),
352 'I': ('i', int),
353 'S': (object, str),
354 'L': ('bool', parse_bool)}
356 for name, ptype, cols in zip(fields[::3],
357 fields[1::3],
358 [int(x) for x in fields[2::3]]):
359 if ptype not in ('R', 'I', 'S', 'L'):
360 raise ValueError('Unknown property type: ' + ptype)
361 ase_name = REV_PROPERTY_NAME_MAP.get(name, name)
363 dtype, converter = fmt_map[ptype]
364 if cols == 1:
365 dtypes.append((name, dtype))
366 converters.append(converter)
367 else:
368 for c in range(cols):
369 dtypes.append((name + str(c), dtype))
370 converters.append(converter)
372 properties[name] = (ase_name, cols)
373 properties_list.append(name)
375 dtype = np.dtype(dtypes)
376 return properties, properties_list, dtype, converters
379def _read_xyz_frame(lines, natoms, properties_parser=key_val_str_to_dict,
380 nvec=0):
381 # comment line
382 line = next(lines).strip()
383 if nvec > 0:
384 info = {'comment': line}
385 else:
386 info = properties_parser(line) if line else {}
388 pbc = None
389 if 'pbc' in info:
390 pbc = info.pop('pbc')
391 elif 'Lattice' in info:
392 # default pbc for extxyz file containing Lattice
393 # is True in all directions
394 pbc = [True, True, True]
395 elif nvec > 0:
396 # cell information given as pseudo-Atoms
397 pbc = [False, False, False]
399 cell = None
400 if 'Lattice' in info:
401 # NB: ASE cell is transpose of extended XYZ lattice
402 cell = info['Lattice'].T
403 del info['Lattice']
404 elif nvec > 0:
405 # cell information given as pseudo-Atoms
406 cell = np.zeros((3, 3))
408 if 'Properties' not in info:
409 # Default set of properties is atomic symbols and positions only
410 info['Properties'] = 'species:S:1:pos:R:3'
411 properties, names, dtype, convs = parse_properties(info['Properties'])
412 del info['Properties']
414 data = []
415 for ln in range(natoms):
416 try:
417 line = next(lines)
418 except StopIteration:
419 raise XYZError('ase.io.extxyz: Frame has {} atoms, expected {}'
420 .format(len(data), natoms))
421 vals = line.split()
422 row = tuple([conv(val) for conv, val in zip(convs, vals)])
423 data.append(row)
425 try:
426 data = np.array(data, dtype)
427 except TypeError:
428 raise XYZError('Badly formatted data '
429 'or end of file reached before end of frame')
431 # Read VEC entries if present
432 if nvec > 0:
433 for ln in range(nvec):
434 try:
435 line = next(lines)
436 except StopIteration:
437 raise XYZError('ase.io.adfxyz: Frame has {} cell vectors, '
438 'expected {}'.format(len(cell), nvec))
439 entry = line.split()
441 if not entry[0].startswith('VEC'):
442 raise XYZError('Expected cell vector, got {}'.format(entry[0]))
444 try:
445 n = int(entry[0][3:])
446 except ValueError as e:
447 raise XYZError('Expected VEC{}, got VEC{}'
448 .format(ln + 1, entry[0][3:])) from e
449 if n != ln + 1:
450 raise XYZError('Expected VEC{}, got VEC{}'
451 .format(ln + 1, n))
453 cell[ln] = np.array([float(x) for x in entry[1:]])
454 pbc[ln] = True
455 if nvec != pbc.count(True):
456 raise XYZError('Problem with number of cell vectors')
457 pbc = tuple(pbc)
459 arrays = {}
460 for name in names:
461 ase_name, cols = properties[name]
462 if cols == 1:
463 value = data[name]
464 else:
465 value = np.vstack([data[name + str(c)]
466 for c in range(cols)]).T
467 arrays[ase_name] = value
469 symbols = None
470 if 'symbols' in arrays:
471 symbols = [s.capitalize() for s in arrays['symbols']]
472 del arrays['symbols']
474 numbers = None
475 duplicate_numbers = None
476 if 'numbers' in arrays:
477 if symbols is None:
478 numbers = arrays['numbers']
479 else:
480 duplicate_numbers = arrays['numbers']
481 del arrays['numbers']
483 initial_charges = None
484 if 'initial_charges' in arrays:
485 initial_charges = arrays['initial_charges']
486 del arrays['initial_charges']
488 positions = None
489 if 'positions' in arrays:
490 positions = arrays['positions']
491 del arrays['positions']
493 atoms = Atoms(symbols=symbols,
494 positions=positions,
495 numbers=numbers,
496 charges=initial_charges,
497 cell=cell,
498 pbc=pbc,
499 info=info)
501 # Read and set constraints
502 if 'move_mask' in arrays:
503 move_mask = arrays['move_mask'].astype(bool)
504 if properties['move_mask'][1] == 3:
505 cons = []
506 for a in range(natoms):
507 cons.append(FixCartesian(a, mask=~move_mask[a, :]))
508 atoms.set_constraint(cons)
509 elif properties['move_mask'][1] == 1:
510 atoms.set_constraint(FixAtoms(mask=~move_mask))
511 else:
512 raise XYZError('Not implemented constraint')
513 del arrays['move_mask']
515 for name, array in arrays.items():
516 atoms.new_array(name, array)
518 if duplicate_numbers is not None:
519 atoms.set_atomic_numbers(duplicate_numbers)
521 # Load results of previous calculations into SinglePointCalculator
522 results = {}
523 for key in list(atoms.info.keys()):
524 if key in per_config_properties:
525 results[key] = atoms.info[key]
526 # special case for stress- convert to Voigt 6-element form
527 if key == 'stress' and results[key].shape == (3, 3):
528 stress = results[key]
529 stress = np.array([stress[0, 0],
530 stress[1, 1],
531 stress[2, 2],
532 stress[1, 2],
533 stress[0, 2],
534 stress[0, 1]])
535 results[key] = stress
536 for key in list(atoms.arrays.keys()):
537 if (key in per_atom_properties and len(value.shape) >= 1
538 and value.shape[0] == len(atoms)):
539 results[key] = atoms.arrays[key]
540 if results != {}:
541 calculator = SinglePointCalculator(atoms, **results)
542 atoms.calc = calculator
543 return atoms
546class XYZError(IOError):
547 pass
550class XYZChunk:
551 def __init__(self, lines, natoms):
552 self.lines = lines
553 self.natoms = natoms
555 def build(self):
556 """Convert unprocessed chunk into Atoms."""
557 return _read_xyz_frame(iter(self.lines), self.natoms)
560def ixyzchunks(fd):
561 """Yield unprocessed chunks (header, lines) for each xyz image."""
562 while True:
563 line = next(fd).strip() # Raises StopIteration on empty file
564 try:
565 natoms = int(line)
566 except ValueError:
567 raise XYZError('Expected integer, found "{0}"'.format(line))
568 try:
569 lines = [next(fd) for _ in range(1 + natoms)]
570 except StopIteration:
571 raise XYZError('Incomplete XYZ chunk')
572 yield XYZChunk(lines, natoms)
575class ImageIterator:
576 """"""
578 def __init__(self, ichunks):
579 self.ichunks = ichunks
581 def __call__(self, fd, indices=-1):
582 if not hasattr(indices, 'start'):
583 if indices < 0:
584 indices = slice(indices - 1, indices)
585 else:
586 indices = slice(indices, indices + 1)
588 for chunk in self._getslice(fd, indices):
589 yield chunk.build()
591 def _getslice(self, fd, indices):
592 try:
593 iterator = islice(self.ichunks(fd), indices.start, indices.stop,
594 indices.step)
595 except ValueError:
596 # Negative indices. Go through the whole thing to get the length,
597 # which allows us to evaluate the slice, and then read it again
598 startpos = fd.tell()
599 nchunks = 0
600 for chunk in self.ichunks(fd):
601 nchunks += 1
602 fd.seek(startpos)
603 indices_tuple = indices.indices(nchunks)
604 iterator = islice(self.ichunks(fd), *indices_tuple)
605 return iterator
608iread_xyz = ImageIterator(ixyzchunks)
611@reader
612def read_xyz(fileobj, index=-1, properties_parser=key_val_str_to_dict):
613 r"""
614 Read from a file in Extended XYZ format
616 index is the frame to read, default is last frame (index=-1).
617 properties_parser is the parse to use when converting the properties line
618 to a dictionary, ``extxyz.key_val_str_to_dict`` is the default and can
619 deal with most use cases, ``extxyz.key_val_str_to_dict_regex`` is slightly
620 faster but has fewer features.
622 Extended XYZ format is an enhanced version of the `basic XYZ format
623 <http://en.wikipedia.org/wiki/XYZ_file_format>`_ that allows extra
624 columns to be present in the file for additonal per-atom properties as
625 well as standardising the format of the comment line to include the
626 cell lattice and other per-frame parameters.
628 It's easiest to describe the format with an example. Here is a
629 standard XYZ file containing a bulk cubic 8 atom silicon cell ::
631 8
632 Cubic bulk silicon cell
633 Si 0.00000000 0.00000000 0.00000000
634 Si 1.36000000 1.36000000 1.36000000
635 Si 2.72000000 2.72000000 0.00000000
636 Si 4.08000000 4.08000000 1.36000000
637 Si 2.72000000 0.00000000 2.72000000
638 Si 4.08000000 1.36000000 4.08000000
639 Si 0.00000000 2.72000000 2.72000000
640 Si 1.36000000 4.08000000 4.08000000
642 The first line is the number of atoms, followed by a comment and
643 then one line per atom, giving the element symbol and cartesian
644 x y, and z coordinates in Angstroms.
646 Here's the same configuration in extended XYZ format ::
648 8
649 Lattice="5.44 0.0 0.0 0.0 5.44 0.0 0.0 0.0 5.44" Properties=species:S:1:pos:R:3 Time=0.0
650 Si 0.00000000 0.00000000 0.00000000
651 Si 1.36000000 1.36000000 1.36000000
652 Si 2.72000000 2.72000000 0.00000000
653 Si 4.08000000 4.08000000 1.36000000
654 Si 2.72000000 0.00000000 2.72000000
655 Si 4.08000000 1.36000000 4.08000000
656 Si 0.00000000 2.72000000 2.72000000
657 Si 1.36000000 4.08000000 4.08000000
659 In extended XYZ format, the comment line is replaced by a series of
660 key/value pairs. The keys should be strings and values can be
661 integers, reals, logicals (denoted by `T` and `F` for true and false)
662 or strings. Quotes are required if a value contains any spaces (like
663 `Lattice` above). There are two mandatory parameters that any
664 extended XYZ: `Lattice` and `Properties`. Other parameters --
665 e.g. `Time` in the example above --- can be added to the parameter line
666 as needed.
668 `Lattice` is a Cartesian 3x3 matrix representation of the cell
669 vectors, with each vector stored as a column and the 9 values listed in
670 Fortran column-major order, i.e. in the form ::
672 Lattice="R1x R1y R1z R2x R2y R2z R3x R3y R3z"
674 where `R1x R1y R1z` are the Cartesian x-, y- and z-components of the
675 first lattice vector (:math:`\mathbf{a}`), `R2x R2y R2z` those of the second
676 lattice vector (:math:`\mathbf{b}`) and `R3x R3y R3z` those of the
677 third lattice vector (:math:`\mathbf{c}`).
679 The list of properties in the file is described by the `Properties`
680 parameter, which should take the form of a series of colon separated
681 triplets giving the name, format (`R` for real, `I` for integer) and
682 number of columns of each property. For example::
684 Properties="species:S:1:pos:R:3:vel:R:3:select:I:1"
686 indicates the first column represents atomic species, the next three
687 columns represent atomic positions, the next three velcoities, and the
688 last is an single integer called `select`. With this property
689 definition, the line ::
691 Si 4.08000000 4.08000000 1.36000000 0.00000000 0.00000000 0.00000000 1
693 would describe a silicon atom at position (4.08,4.08,1.36) with zero
694 velocity and the `select` property set to 1.
696 The property names `pos`, `Z`, `mass`, and `charge` map to ASE
697 :attr:`ase.atoms.Atoms.arrays` entries named
698 `positions`, `numbers`, `masses` and `charges` respectively.
700 Additional key-value pairs in the comment line are parsed into the
701 :attr:`ase.Atoms.atoms.info` dictionary, with the following conventions
703 - Values can be quoted with `""`, `''`, `[]` or `{}` (the latter are
704 included to ease command-line usage as the `{}` are not treated
705 specially by the shell)
706 - Quotes within keys or values can be escaped with `\"`.
707 - Keys with special names `stress` or `virial` are treated as 3x3 matrices
708 in Fortran order, as for `Lattice` above.
709 - Otherwise, values with multiple elements are treated as 1D arrays, first
710 assuming integer format and falling back to float if conversion is
711 unsuccessful.
712 - A missing value defaults to `True`, e.g. the comment line
713 `"cutoff=3.4 have_energy"` leads to
714 `{'cutoff': 3.4, 'have_energy': True}` in `atoms.info`.
715 - Value strings starting with `"_JSON"` are interpreted as JSON content;
716 similarly, when writing, anything which does not match the criteria above
717 is serialised as JSON.
719 The extended XYZ format is also supported by the
720 the `Ovito <http://www.ovito.org>`_ visualisation tool
721 (from `v2.4 beta
722 <http://www.ovito.org/index.php/component/content/article?id=25>`_
723 onwards).
724 """ # noqa: E501
726 if not isinstance(index, int) and not isinstance(index, slice):
727 raise TypeError('Index argument is neither slice nor integer!')
729 # If possible, build a partial index up to the last frame required
730 last_frame = None
731 if isinstance(index, int) and index >= 0:
732 last_frame = index
733 elif isinstance(index, slice):
734 if index.stop is not None and index.stop >= 0:
735 last_frame = index.stop
737 # scan through file to find where the frames start
738 try:
739 fileobj.seek(0)
740 except UnsupportedOperation:
741 fileobj = StringIO(fileobj.read())
742 fileobj.seek(0)
743 frames = []
744 while True:
745 frame_pos = fileobj.tell()
746 line = fileobj.readline()
747 if line.strip() == '':
748 break
749 try:
750 natoms = int(line)
751 except ValueError as err:
752 raise XYZError('ase.io.extxyz: Expected xyz header but got: {}'
753 .format(err))
754 fileobj.readline() # read comment line
755 for i in range(natoms):
756 fileobj.readline()
757 # check for VEC
758 nvec = 0
759 while True:
760 lastPos = fileobj.tell()
761 line = fileobj.readline()
762 if line.lstrip().startswith('VEC'):
763 nvec += 1
764 if nvec > 3:
765 raise XYZError('ase.io.extxyz: More than 3 VECX entries')
766 else:
767 fileobj.seek(lastPos)
768 break
769 frames.append((frame_pos, natoms, nvec))
770 if last_frame is not None and len(frames) > last_frame:
771 break
773 trbl = index2range(index, len(frames))
775 for index in trbl:
776 frame_pos, natoms, nvec = frames[index]
777 fileobj.seek(frame_pos)
778 # check for consistency with frame index table
779 assert int(fileobj.readline()) == natoms
780 yield _read_xyz_frame(fileobj, natoms, properties_parser, nvec)
783def output_column_format(atoms, columns, arrays,
784 write_info=True, results=None):
785 """
786 Helper function to build extended XYZ comment line
787 """
788 fmt_map = {'d': ('R', '%16.8f'),
789 'f': ('R', '%16.8f'),
790 'i': ('I', '%8d'),
791 'O': ('S', '%s'),
792 'S': ('S', '%s'),
793 'U': ('S', '%-2s'),
794 'b': ('L', ' %.1s')}
796 # NB: Lattice is stored as tranpose of ASE cell,
797 # with Fortran array ordering
798 lattice_str = ('Lattice="'
799 + ' '.join([str(x) for x in np.reshape(atoms.cell.T,
800 9, order='F')]) +
801 '"')
803 property_names = []
804 property_types = []
805 property_ncols = []
806 dtypes = []
807 formats = []
809 for column in columns:
810 array = arrays[column]
811 dtype = array.dtype
813 property_name = PROPERTY_NAME_MAP.get(column, column)
814 property_type, fmt = fmt_map[dtype.kind]
815 property_names.append(property_name)
816 property_types.append(property_type)
818 if (len(array.shape) == 1
819 or (len(array.shape) == 2 and array.shape[1] == 1)):
820 ncol = 1
821 dtypes.append((column, dtype))
822 else:
823 ncol = array.shape[1]
824 for c in range(ncol):
825 dtypes.append((column + str(c), dtype))
827 formats.extend([fmt] * ncol)
828 property_ncols.append(ncol)
830 props_str = ':'.join([':'.join(x) for x in
831 zip(property_names,
832 property_types,
833 [str(nc) for nc in property_ncols])])
835 comment_str = ''
836 if atoms.cell.any():
837 comment_str += lattice_str + ' '
838 comment_str += 'Properties={}'.format(props_str)
840 info = {}
841 if write_info:
842 info.update(atoms.info)
843 if results is not None:
844 info.update(results)
845 info['pbc'] = atoms.get_pbc() # always save periodic boundary conditions
846 comment_str += ' ' + key_val_dict_to_str(info)
848 dtype = np.dtype(dtypes)
849 fmt = ' '.join(formats) + '\n'
851 return comment_str, property_ncols, dtype, fmt
854def write_xyz(fileobj, images, comment='', columns=None,
855 write_info=True,
856 write_results=True, plain=False, vec_cell=False,
857 append=False):
858 """
859 Write output in extended XYZ format
861 Optionally, specify which columns (arrays) to include in output,
862 whether to write the contents of the `atoms.info` dict to the
863 XYZ comment line (default is True), the results of any
864 calculator attached to this Atoms. The `plain` argument
865 can be used to write a simple XYZ file with no additional information.
866 `vec_cell` can be used to write the cell vectors as additional
867 pseudo-atoms. If `append` is set to True, the file is for append (mode `a`),
868 otherwise it is overwritten (mode `w`).
870 See documentation for :func:`read_xyz()` for further details of the extended
871 XYZ file format.
872 """
873 if isinstance(fileobj, str):
874 mode = 'w'
875 if append:
876 mode = 'a'
877 fileobj = paropen(fileobj, mode)
879 if hasattr(images, 'get_positions'):
880 images = [images]
882 for atoms in images:
883 natoms = len(atoms)
885 if columns is None:
886 fr_cols = None
887 else:
888 fr_cols = columns[:]
890 if fr_cols is None:
891 fr_cols = (['symbols', 'positions']
892 + [key for key in atoms.arrays.keys() if
893 key not in ['symbols', 'positions', 'numbers',
894 'species', 'pos']])
896 if vec_cell:
897 plain = True
899 if plain:
900 fr_cols = ['symbols', 'positions']
901 write_info = False
902 write_results = False
904 per_frame_results = {}
905 per_atom_results = {}
906 if write_results:
907 calculator = atoms.calc
908 if (calculator is not None
909 and isinstance(calculator, BaseCalculator)):
910 for key in all_properties:
911 value = calculator.results.get(key, None)
912 if value is None:
913 # skip missing calculator results
914 continue
915 if (key in per_atom_properties and len(value.shape) >= 1
916 and value.shape[0] == len(atoms)):
917 # per-atom quantities (forces, energies, stresses)
918 per_atom_results[key] = value
919 elif key in per_config_properties:
920 # per-frame quantities (energy, stress)
921 # special case for stress, which should be converted
922 # to 3x3 matrices before writing
923 if key == 'stress':
924 xx, yy, zz, yz, xz, xy = value
925 value = np.array(
926 [(xx, xy, xz), (xy, yy, yz), (xz, yz, zz)])
927 per_frame_results[key] = value
929 # Move symbols and positions to first two properties
930 if 'symbols' in fr_cols:
931 i = fr_cols.index('symbols')
932 fr_cols[0], fr_cols[i] = fr_cols[i], fr_cols[0]
934 if 'positions' in fr_cols:
935 i = fr_cols.index('positions')
936 fr_cols[1], fr_cols[i] = fr_cols[i], fr_cols[1]
938 # Check first column "looks like" atomic symbols
939 if fr_cols[0] in atoms.arrays:
940 symbols = atoms.arrays[fr_cols[0]]
941 else:
942 symbols = atoms.get_chemical_symbols()
944 if natoms > 0 and not isinstance(symbols[0], str):
945 raise ValueError('First column must be symbols-like')
947 # Check second column "looks like" atomic positions
948 pos = atoms.arrays[fr_cols[1]]
949 if pos.shape != (natoms, 3) or pos.dtype.kind != 'f':
950 raise ValueError('Second column must be position-like')
952 # if vec_cell add cell information as pseudo-atoms
953 if vec_cell:
954 pbc = list(atoms.get_pbc())
955 cell = atoms.get_cell()
957 if True in pbc:
958 nPBC = 0
959 for i, b in enumerate(pbc):
960 if b:
961 nPBC += 1
962 symbols.append('VEC' + str(nPBC))
963 pos = np.vstack((pos, cell[i]))
964 # add to natoms
965 natoms += nPBC
966 if pos.shape != (natoms, 3) or pos.dtype.kind != 'f':
967 raise ValueError(
968 'Pseudo Atoms containing cell have bad coords')
970 # Move mask
971 if 'move_mask' in fr_cols:
972 cnstr = images[0]._get_constraints()
973 if len(cnstr) > 0:
974 c0 = cnstr[0]
975 if isinstance(c0, FixAtoms):
976 cnstr = np.ones((natoms,), dtype=bool)
977 for idx in c0.index:
978 cnstr[idx] = False
979 elif isinstance(c0, FixCartesian):
980 masks = np.ones((natoms, 3), dtype=bool)
981 for i in range(len(cnstr)):
982 idx = cnstr[i].index
983 masks[idx] = cnstr[i].mask
984 cnstr = masks
985 else:
986 fr_cols.remove('move_mask')
988 # Collect data to be written out
989 arrays = {}
990 for column in fr_cols:
991 if column == 'positions':
992 arrays[column] = pos
993 elif column in atoms.arrays:
994 arrays[column] = atoms.arrays[column]
995 elif column == 'symbols':
996 arrays[column] = np.array(symbols)
997 elif column == 'move_mask':
998 arrays[column] = cnstr
999 else:
1000 raise ValueError('Missing array "%s"' % column)
1002 if write_results:
1003 for key in per_atom_results:
1004 if key not in fr_cols:
1005 fr_cols += [key]
1006 else:
1007 warnings.warn('write_xyz() overwriting array "{0}" present '
1008 'in atoms.arrays with stored results '
1009 'from calculator'.format(key))
1010 arrays.update(per_atom_results)
1012 comm, ncols, dtype, fmt = output_column_format(atoms,
1013 fr_cols,
1014 arrays,
1015 write_info,
1016 per_frame_results)
1018 if plain or comment != '':
1019 # override key/value pairs with user-speficied comment string
1020 comm = comment.rstrip()
1021 if '\n' in comm:
1022 raise ValueError('Comment line should not have line breaks.')
1024 # Pack fr_cols into record array
1025 data = np.zeros(natoms, dtype)
1026 for column, ncol in zip(fr_cols, ncols):
1027 value = arrays[column]
1028 if ncol == 1:
1029 data[column] = np.squeeze(value)
1030 else:
1031 for c in range(ncol):
1032 data[column + str(c)] = value[:, c]
1034 nat = natoms
1035 if vec_cell:
1036 nat -= nPBC
1037 # Write the output
1038 fileobj.write('%d\n' % nat)
1039 fileobj.write('%s\n' % comm)
1040 for i in range(natoms):
1041 fileobj.write(fmt % tuple(data[i]))
1044# create aliases for read/write functions
1045read_extxyz = read_xyz
1046write_extxyz = write_xyz