Coverage for /builds/ase/ase/ase/io/formats.py : 89.72%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""File formats.
3This module implements the read(), iread() and write() functions in ase.io.
4For each file format there is an IOFormat object.
6There is a dict, ioformats, which stores the objects.
8Example
9=======
11The xyz format is implemented in the ase/io/xyz.py file which has a
12read_xyz() generator and a write_xyz() function. This and other
13information can be obtained from ioformats['xyz'].
14"""
16import io
17import re
18import functools
19import inspect
20import os
21import sys
22import numbers
23import warnings
24from pathlib import Path, PurePath
25from typing import (
26 IO, List, Any, Iterable, Tuple, Union, Sequence, Dict, Optional)
28if sys.version_info >= (3, 8):
29 from importlib.metadata import entry_points
30else:
31 from importlib_metadata import entry_points
33from ase.atoms import Atoms
34from ase.utils.plugins import ExternalIOFormat
35from importlib import import_module
36from ase.parallel import parallel_function, parallel_generator
39PEEK_BYTES = 50000
42class UnknownFileTypeError(Exception):
43 pass
46class IOFormat:
47 def __init__(self, name: str, desc: str, code: str, module_name: str,
48 encoding: str = None) -> None:
49 self.name = name
50 self.description = desc
51 assert len(code) == 2
52 assert code[0] in list('+1')
53 assert code[1] in list('BFS')
54 self.code = code
55 self.module_name = module_name
56 self.encoding = encoding
58 # (To be set by define_io_format())
59 self.extensions: List[str] = []
60 self.globs: List[str] = []
61 self.magic: List[str] = []
62 self.magic_regex: Optional[bytes] = None
64 def open(self, fname, mode: str = 'r') -> IO:
65 # We might want append mode, too
66 # We can allow more flags as needed (buffering etc.)
67 if mode not in list('rwa'):
68 raise ValueError("Only modes allowed are 'r', 'w', and 'a'")
69 if mode == 'r' and not self.can_read:
70 raise NotImplementedError('No reader implemented for {} format'
71 .format(self.name))
72 if mode == 'w' and not self.can_write:
73 raise NotImplementedError('No writer implemented for {} format'
74 .format(self.name))
75 if mode == 'a' and not self.can_append:
76 raise NotImplementedError('Appending not supported by {} format'
77 .format(self.name))
79 if self.isbinary:
80 mode += 'b'
82 path = Path(fname)
83 return path.open(mode, encoding=self.encoding)
85 def _buf_as_filelike(self, data: Union[str, bytes]) -> IO:
86 encoding = self.encoding
87 if encoding is None:
88 encoding = 'utf-8' # Best hacky guess.
90 if self.isbinary:
91 if isinstance(data, str):
92 data = data.encode(encoding)
93 else:
94 if isinstance(data, bytes):
95 data = data.decode(encoding)
97 return self._ioclass(data)
99 @property
100 def _ioclass(self):
101 if self.isbinary:
102 return io.BytesIO
103 else:
104 return io.StringIO
106 def parse_images(self, data: Union[str, bytes],
107 **kwargs) -> Sequence[Atoms]:
108 with self._buf_as_filelike(data) as fd:
109 outputs = self.read(fd, **kwargs)
110 if self.single:
111 assert isinstance(outputs, Atoms)
112 return [outputs]
113 else:
114 return list(self.read(fd, **kwargs))
116 def parse_atoms(self, data: Union[str, bytes], **kwargs) -> Atoms:
117 images = self.parse_images(data, **kwargs)
118 return images[-1]
120 @property
121 def can_read(self) -> bool:
122 return self._readfunc() is not None
124 @property
125 def can_write(self) -> bool:
126 return self._writefunc() is not None
128 @property
129 def can_append(self) -> bool:
130 writefunc = self._writefunc()
131 return self.can_write and 'append' in writefunc.__code__.co_varnames
133 def __repr__(self) -> str:
134 tokens = ['{}={}'.format(name, repr(value))
135 for name, value in vars(self).items()]
136 return 'IOFormat({})'.format(', '.join(tokens))
138 def __getitem__(self, i):
139 # For compatibility.
140 #
141 # Historically, the ioformats were listed as tuples
142 # with (description, code). We look like such a tuple.
143 return (self.description, self.code)[i]
145 @property
146 def single(self) -> bool:
147 """Whether this format is for a single Atoms object."""
148 return self.code[0] == '1'
150 @property
151 def _formatname(self) -> str:
152 return self.name.replace('-', '_')
154 def _readfunc(self):
155 return getattr(self.module, 'read_' + self._formatname, None)
157 def _writefunc(self):
158 return getattr(self.module, 'write_' + self._formatname, None)
160 @property
161 def read(self):
162 if not self.can_read:
163 self._warn_none('read')
164 return None
166 return self._read_wrapper
168 def _read_wrapper(self, *args, **kwargs):
169 function = self._readfunc()
170 if function is None:
171 self._warn_none('read')
172 return None
173 if not inspect.isgeneratorfunction(function):
174 function = functools.partial(wrap_read_function, function)
175 return function(*args, **kwargs)
177 def _warn_none(self, action):
178 msg = ('Accessing the IOFormat.{action} property on a format '
179 'without {action} support will change behaviour in the '
180 'future and return a callable instead of None. '
181 'Use IOFormat.can_{action} to check whether {action} '
182 'is supported.')
183 warnings.warn(msg.format(action=action), FutureWarning)
185 @property
186 def write(self):
187 if not self.can_write:
188 self._warn_none('write')
189 return None
191 return self._write_wrapper
193 def _write_wrapper(self, *args, **kwargs):
194 function = self._writefunc()
195 if function is None:
196 raise ValueError(f'Cannot write to {self.name}-format')
197 return function(*args, **kwargs)
199 @property
200 def modes(self) -> str:
201 modes = ''
202 if self.can_read:
203 modes += 'r'
204 if self.can_write:
205 modes += 'w'
206 return modes
208 def full_description(self) -> str:
209 lines = [f'Name: {self.name}',
210 f'Description: {self.description}',
211 f'Modes: {self.modes}',
212 f'Encoding: {self.encoding}',
213 f'Module: {self.module_name}',
214 f'Code: {self.code}',
215 f'Extensions: {self.extensions}',
216 f'Globs: {self.globs}',
217 f'Magic: {self.magic}']
218 return '\n'.join(lines)
220 @property
221 def acceptsfd(self) -> bool:
222 return self.code[1] != 'S'
224 @property
225 def isbinary(self) -> bool:
226 return self.code[1] == 'B'
228 @property
229 def module(self):
230 try:
231 return import_module(self.module_name)
232 except ImportError as err:
233 raise UnknownFileTypeError(
234 f'File format not recognized: {self.name}. Error: {err}')
236 def match_name(self, basename: str) -> bool:
237 from fnmatch import fnmatch
238 return any(fnmatch(basename, pattern)
239 for pattern in self.globs)
241 def match_magic(self, data: bytes) -> bool:
242 if self.magic_regex:
243 assert not self.magic, 'Define only one of magic and magic_regex'
244 match = re.match(self.magic_regex, data, re.M | re.S)
245 return match is not None
247 from fnmatch import fnmatchcase
248 return any(fnmatchcase(data, magic + b'*') # type: ignore
249 for magic in self.magic)
252ioformats: Dict[str, IOFormat] = {} # These will be filled at run-time.
253extension2format = {}
256all_formats = ioformats # Aliased for compatibility only. Please do not use.
257format2modulename = {} # Left for compatibility only.
260def define_io_format(name, desc, code, *, module=None, ext=None,
261 glob=None, magic=None, encoding=None,
262 magic_regex=None, external=False):
263 if module is None:
264 module = name.replace('-', '_')
265 format2modulename[name] = module
267 if not external:
268 module = 'ase.io.' + module
270 def normalize_patterns(strings):
271 if strings is None:
272 strings = []
273 elif isinstance(strings, (str, bytes)):
274 strings = [strings]
275 else:
276 strings = list(strings)
277 return strings
279 fmt = IOFormat(name, desc, code, module_name=module,
280 encoding=encoding)
281 fmt.extensions = normalize_patterns(ext)
282 fmt.globs = normalize_patterns(glob)
283 fmt.magic = normalize_patterns(magic)
285 if magic_regex is not None:
286 fmt.magic_regex = magic_regex
288 for ext in fmt.extensions:
289 if ext in extension2format:
290 raise ValueError('extension "{}" already registered'.format(ext))
291 extension2format[ext] = fmt
293 ioformats[name] = fmt
294 return fmt
297def get_ioformat(name: str) -> IOFormat:
298 """Return ioformat object or raise appropriate error."""
299 if name not in ioformats:
300 raise UnknownFileTypeError(name)
301 fmt = ioformats[name]
302 # Make sure module is importable, since this could also raise an error.
303 fmt.module
304 return ioformats[name]
307def register_external_io_formats(group):
308 if hasattr(entry_points(), 'select'):
309 fmt_entry_points = entry_points().select(group=group) # type: ignore
310 else:
311 fmt_entry_points = entry_points().get(group, ())
313 for entry_point in fmt_entry_points:
314 try:
315 define_external_io_format(entry_point)
316 except Exception as exc:
317 warnings.warn(
318 'Failed to register external '
319 f'IO format {entry_point.name}: {exc}'
320 )
323def define_external_io_format(entry_point):
325 fmt = entry_point.load()
326 if entry_point.name in ioformats:
327 raise ValueError(f'Format {entry_point.name} already defined')
328 if not isinstance(fmt, ExternalIOFormat):
329 raise TypeError('Wrong type for registering external IO formats '
330 f'in format {entry_point.name}, expected '
331 'ExternalIOFormat')
332 F(entry_point.name, **fmt._asdict(), external=True) # type: ignore
335# We define all the IO formats below. Each IO format has a code,
336# such as '1F', which defines some of the format's properties:
337#
338# 1=single atoms object
339# +=multiple atoms objects
340# F=accepts a file-descriptor
341# S=needs a file-name str
342# B=like F, but opens in binary mode
344F = define_io_format
345F('abinit-gsr', 'ABINIT GSR file', '1S',
346 module='abinit', glob='*o_GSR.nc')
347F('abinit-in', 'ABINIT input file', '1F',
348 module='abinit', magic=b'*znucl *')
349F('abinit-out', 'ABINIT output file', '1F',
350 module='abinit', magic=b'*.Version * of ABINIT')
351F('aims', 'FHI-aims geometry file', '1S', ext='in')
352F('aims-output', 'FHI-aims output', '+S',
353 module='aims', magic=b'*Invoking FHI-aims ...')
354F('bundletrajectory', 'ASE bundle trajectory', '+S')
355F('castep-castep', 'CASTEP output file', '+F',
356 module='castep', ext='castep')
357F('castep-cell', 'CASTEP geom file', '1F',
358 module='castep', ext='cell')
359F('castep-geom', 'CASTEP trajectory file', '+F',
360 module='castep', ext='geom')
361F('castep-md', 'CASTEP molecular dynamics file', '+F',
362 module='castep', ext='md')
363F('castep-phonon', 'CASTEP phonon file', '1F',
364 module='castep', ext='phonon')
365F('cfg', 'AtomEye configuration', '1F')
366F('cif', 'CIF-file', '+B', ext='cif')
367F('cmdft', 'CMDFT-file', '1F', glob='*I_info')
368F('cjson', 'Chemical json file', '1F', ext='cjson')
369F('cp2k-dcd', 'CP2K DCD file', '+B',
370 module='cp2k', ext='dcd')
371F('cp2k-restart', 'CP2K restart file', '1F',
372 module='cp2k', ext='restart')
373F('crystal', 'Crystal fort.34 format', '1F',
374 ext=['f34', '34'], glob=['f34', '34'])
375F('cube', 'CUBE file', '1F', ext='cube')
376F('dacapo-text', 'Dacapo text output', '1F',
377 module='dacapo', magic=b'*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n')
378F('db', 'ASE SQLite database file', '+S')
379F('dftb', 'DftbPlus input file', '1S', magic=b'Geometry')
380F('dlp4', 'DL_POLY_4 CONFIG file', '1F',
381 module='dlp4', ext='config', glob=['*CONFIG*'])
382F('dlp-history', 'DL_POLY HISTORY file', '+F',
383 module='dlp4', glob='HISTORY')
384F('dmol-arc', 'DMol3 arc file', '+S',
385 module='dmol', ext='arc')
386F('dmol-car', 'DMol3 structure file', '1S',
387 module='dmol', ext='car')
388F('dmol-incoor', 'DMol3 structure file', '1S',
389 module='dmol')
390F('elk', 'ELK atoms definition from GEOMETRY.OUT', '1F',
391 glob=['GEOMETRY.OUT'])
392F('elk-in', 'ELK input file', '1F', module='elk')
393F('eon', 'EON CON file', '+F',
394 ext='con')
395F('eps', 'Encapsulated Postscript', '1S')
396F('espresso-in', 'Quantum espresso in file', '1F',
397 module='espresso', ext='pwi', magic=[b'*\n&system', b'*\n&SYSTEM'])
398F('espresso-out', 'Quantum espresso out file', '+F',
399 module='espresso', ext=['pwo', 'out'], magic=b'*Program PWSCF')
400F('exciting', 'exciting input', '1F', glob='input.xml')
401F('extxyz', 'Extended XYZ file', '+F', ext='xyz')
402F('findsym', 'FINDSYM-format', '+F')
403F('gamess-us-out', 'GAMESS-US output file', '1F',
404 module='gamess_us', magic=b'*GAMESS')
405F('gamess-us-in', 'GAMESS-US input file', '1F',
406 module='gamess_us')
407F('gamess-us-punch', 'GAMESS-US punchcard file', '1F',
408 module='gamess_us', magic=b' $DATA', ext='dat')
409F('gaussian-in', 'Gaussian com (input) file', '1F',
410 module='gaussian', ext=['com', 'gjf'])
411F('gaussian-out', 'Gaussian output file', '+F',
412 module='gaussian', ext='log', magic=b'*Entering Gaussian System')
413F('acemolecule-out', 'ACE output file', '1S',
414 module='acemolecule')
415F('acemolecule-input', 'ACE input file', '1S',
416 module='acemolecule')
417F('gen', 'DFTBPlus GEN format', '1F')
418F('gif', 'Graphics interchange format', '+S',
419 module='animation')
420F('gpaw-out', 'GPAW text output', '+F',
421 magic=b'* ___ ___ ___ _ _ _')
422F('gpumd', 'GPUMD input file', '1F', glob='xyz.in')
423F('gpw', 'GPAW restart-file', '1S',
424 magic=[b'- of UlmGPAW', b'AFFormatGPAW'])
425F('gromacs', 'Gromacs coordinates', '1F',
426 ext='gro')
427F('gromos', 'Gromos96 geometry file', '1F', ext='g96')
428F('html', 'X3DOM HTML', '1F', module='x3d')
429F('json', 'ASE JSON database file', '+F', ext='json', module='db')
430F('jsv', 'JSV file format', '1F')
431F('lammps-dump-text', 'LAMMPS text dump file', '+F',
432 module='lammpsrun', magic_regex=b'.*?^ITEM: TIMESTEP$')
433F('lammps-dump-binary', 'LAMMPS binary dump file', '+B',
434 module='lammpsrun')
435F('lammps-data', 'LAMMPS data file', '1F', module='lammpsdata',
436 encoding='ascii')
437F('magres', 'MAGRES ab initio NMR data file', '1F')
438F('mol', 'MDL Molfile', '1F')
439F('mp4', 'MP4 animation', '+S',
440 module='animation')
441F('mustem', 'muSTEM xtl file', '1F',
442 ext='xtl')
443F('mysql', 'ASE MySQL database file', '+S',
444 module='db')
445F('netcdftrajectory', 'AMBER NetCDF trajectory file', '+S',
446 magic=b'CDF')
447F('nomad-json', 'JSON from Nomad archive', '+F',
448 ext='nomad-json')
449F('nwchem-in', 'NWChem input file', '1F',
450 module='nwchem', ext='nwi')
451F('nwchem-out', 'NWChem output file', '+F',
452 module='nwchem', ext='nwo',
453 magic=b'*Northwest Computational Chemistry Package')
454F('octopus-in', 'Octopus input file', '1F',
455 module='octopus', glob='inp')
456F('proteindatabank', 'Protein Data Bank', '+F',
457 ext='pdb')
458F('png', 'Portable Network Graphics', '1B')
459F('postgresql', 'ASE PostgreSQL database file', '+S', module='db')
460F('pov', 'Persistance of Vision', '1S')
461# prismatic: Should have ext='xyz' if/when multiple formats can have the same
462# extension
463F('prismatic', 'prismatic and computem XYZ-file', '1F')
464F('py', 'Python file', '+F')
465F('sys', 'qball sys file', '1F')
466F('qbox', 'QBOX output file', '+F',
467 magic=b'*:simulation xmlns:')
468F('res', 'SHELX format', '1S', ext='shelx')
469F('rmc6f', 'RMCProfile', '1S', ext='rmc6f')
470F('sdf', 'SDF format', '1F')
471F('siesta-xv', 'Siesta .XV file', '1F',
472 glob='*.XV', module='siesta')
473F('struct', 'WIEN2k structure file', '1S', module='wien2k')
474F('struct_out', 'SIESTA STRUCT file', '1F', module='siesta')
475F('traj', 'ASE trajectory', '+B', module='trajectory', ext='traj',
476 magic=[b'- of UlmASE-Trajectory', b'AFFormatASE-Trajectory'])
477F('turbomole', 'TURBOMOLE coord file', '1F', glob='coord',
478 magic=b'$coord')
479F('turbomole-gradient', 'TURBOMOLE gradient file', '+F',
480 module='turbomole', glob='gradient', magic=b'$grad')
481F('v-sim', 'V_Sim ascii file', '1F', ext='ascii')
482F('vasp', 'VASP POSCAR/CONTCAR', '1F',
483 ext='poscar', glob=['*POSCAR*', '*CONTCAR*', '*CENTCAR*'])
484F('vasp-out', 'VASP OUTCAR file', '+F',
485 module='vasp', glob='*OUTCAR*')
486F('vasp-xdatcar', 'VASP XDATCAR file', '+F',
487 module='vasp', glob='*XDATCAR*')
488F('vasp-xml', 'VASP vasprun.xml file', '+F',
489 module='vasp', glob='*vasp*.xml')
490F('vti', 'VTK XML Image Data', '1F', module='vtkxml')
491F('vtu', 'VTK XML Unstructured Grid', '1F', module='vtkxml', ext='vtu')
492F('wout', 'Wannier90 output', '1F', module='wannier90')
493F('x3d', 'X3D', '1S')
494F('xsd', 'Materials Studio file', '1F')
495F('xsf', 'XCrySDen Structure File', '+F',
496 magic=[b'*\nANIMSTEPS', b'*\nCRYSTAL', b'*\nSLAB', b'*\nPOLYMER',
497 b'*\nMOLECULE', b'*\nATOMS'])
498F('xtd', 'Materials Studio file', '+F')
499# xyz: No `ext='xyz'` in the definition below.
500# The .xyz files are handled by the extxyz module by default.
501F('xyz', 'XYZ-file', '+F')
503# Register IO formats exposed through the ase.ioformats entry point
504register_external_io_formats('ase.ioformats')
507def get_compression(filename: str) -> Tuple[str, Optional[str]]:
508 """
509 Parse any expected file compression from the extension of a filename.
510 Return the filename without the extension, and the extension. Recognises
511 ``.gz``, ``.bz2``, ``.xz``.
513 >>> get_compression('H2O.pdb.gz')
514 ('H2O.pdb', 'gz')
515 >>> get_compression('crystal.cif')
516 ('crystal.cif', None)
518 Parameters
519 ==========
520 filename: str
521 Full filename including extension.
523 Returns
524 =======
525 (root, extension): (str, str or None)
526 Filename split into root without extension, and the extension
527 indicating compression format. Will not split if compression
528 is not recognised.
529 """
530 # Update if anything is added
531 valid_compression = ['gz', 'bz2', 'xz']
533 # Use stdlib as it handles most edge cases
534 root, compression = os.path.splitext(filename)
536 # extension keeps the '.' so remember to remove it
537 if compression.strip('.') in valid_compression:
538 return root, compression.strip('.')
539 else:
540 return filename, None
543def open_with_compression(filename: str, mode: str = 'r') -> IO:
544 """
545 Wrapper around builtin `open` that will guess compression of a file
546 from the filename and open it for reading or writing as if it were
547 a standard file.
549 Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma).
551 Supported modes are:
552 * 'r', 'rt', 'w', 'wt' for text mode read and write.
553 * 'rb, 'wb' for binary read and write.
555 Parameters
556 ==========
557 filename: str
558 Path to the file to open, including any extensions that indicate
559 the compression used.
560 mode: str
561 Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'.
563 Returns
564 =======
565 fd: file
566 File-like object open with the specified mode.
567 """
569 # Compressed formats sometimes default to binary, so force text mode.
570 if mode == 'r':
571 mode = 'rt'
572 elif mode == 'w':
573 mode = 'wt'
574 elif mode == 'a':
575 mode = 'at'
577 root, compression = get_compression(filename)
579 if compression == 'gz':
580 import gzip
581 return gzip.open(filename, mode=mode) # type: ignore
582 elif compression == 'bz2':
583 import bz2
584 return bz2.open(filename, mode=mode) # type: ignore
585 elif compression == 'xz':
586 import lzma
587 return lzma.open(filename, mode)
588 else:
589 # Either None or unknown string
590 return open(filename, mode)
593def wrap_read_function(read, filename, index=None, **kwargs):
594 """Convert read-function to generator."""
595 if index is None:
596 yield read(filename, **kwargs)
597 else:
598 for atoms in read(filename, index, **kwargs):
599 yield atoms
602NameOrFile = Union[str, PurePath, IO]
605def write(
606 filename: NameOrFile,
607 images: Union[Atoms, Sequence[Atoms]],
608 format: str = None,
609 parallel: bool = True,
610 append: bool = False,
611 **kwargs: Any
612) -> None:
613 """Write Atoms object(s) to file.
615 filename: str or file
616 Name of the file to write to or a file descriptor. The name '-'
617 means standard output.
618 images: Atoms object or list of Atoms objects
619 A single Atoms object or a list of Atoms objects.
620 format: str
621 Used to specify the file-format. If not given, the
622 file-format will be taken from suffix of the filename.
623 parallel: bool
624 Default is to write on master only. Use parallel=False to write
625 from all slaves.
626 append: bool
627 Default is to open files in 'w' or 'wb' mode, overwriting
628 existing files. In some cases opening the file in 'a' or 'ab'
629 mode (appending) is useful,
630 e.g. writing trajectories or saving multiple Atoms objects in one file.
631 WARNING: If the file format does not support multiple entries without
632 additional keywords/headers, files created using 'append=True'
633 might not be readable by any program! They will nevertheless be
634 written without error message.
636 The use of additional keywords is format specific. write() may
637 return an object after writing certain formats, but this behaviour
638 may change in the future.
640 """
642 if isinstance(filename, PurePath):
643 filename = str(filename)
645 if isinstance(filename, str):
646 fd = None
647 if filename == '-':
648 fd = sys.stdout
649 filename = None # type: ignore
650 elif format is None:
651 format = filetype(filename, read=False)
652 assert isinstance(format, str)
653 else:
654 fd = filename # type: ignore
655 if format is None:
656 try:
657 format = filetype(filename, read=False)
658 assert isinstance(format, str)
659 except UnknownFileTypeError:
660 format = None
661 filename = None # type: ignore
663 format = format or 'json' # default is json
665 io = get_ioformat(format)
667 return _write(filename, fd, format, io, images,
668 parallel=parallel, append=append, **kwargs)
671@parallel_function
672def _write(filename, fd, format, io, images, parallel=None, append=False,
673 **kwargs):
674 if isinstance(images, Atoms):
675 images = [images]
677 if io.single:
678 if len(images) > 1:
679 raise ValueError('{}-format can only store 1 Atoms object.'
680 .format(format))
681 images = images[0]
683 if not io.can_write:
684 raise ValueError("Can't write to {}-format".format(format))
686 # Special case for json-format:
687 if format == 'json' and (len(images) > 1 or append):
688 if filename is not None:
689 return io.write(filename, images, append=append, **kwargs)
690 raise ValueError("Can't write more than one image to file-descriptor "
691 'using json-format.')
693 if io.acceptsfd:
694 open_new = (fd is None)
695 try:
696 if open_new:
697 mode = 'wb' if io.isbinary else 'w'
698 if append:
699 mode = mode.replace('w', 'a')
700 fd = open_with_compression(filename, mode)
701 # XXX remember to re-enable compressed open
702 # fd = io.open(filename, mode)
703 return io.write(fd, images, **kwargs)
704 finally:
705 if open_new and fd is not None:
706 fd.close()
707 else:
708 if fd is not None:
709 raise ValueError("Can't write {}-format to file-descriptor"
710 .format(format))
711 if io.can_append:
712 return io.write(filename, images, append=append, **kwargs)
713 elif append:
714 raise ValueError("Cannot append to {}-format, write-function "
715 "does not support the append keyword."
716 .format(format))
717 else:
718 return io.write(filename, images, **kwargs)
721def read(
722 filename: NameOrFile,
723 index: Any = None,
724 format: str = None,
725 parallel: bool = True,
726 do_not_split_by_at_sign: bool = False,
727 **kwargs
728) -> Union[Atoms, List[Atoms]]:
729 """Read Atoms object(s) from file.
731 filename: str or file
732 Name of the file to read from or a file descriptor.
733 index: int, slice or str
734 The last configuration will be returned by default. Examples:
736 * ``index=0``: first configuration
737 * ``index=-2``: second to last
738 * ``index=':'`` or ``index=slice(None)``: all
739 * ``index='-3:'`` or ``index=slice(-3, None)``: three last
740 * ``index='::2'`` or ``index=slice(0, None, 2)``: even
741 * ``index='1::2'`` or ``index=slice(1, None, 2)``: odd
742 format: str
743 Used to specify the file-format. If not given, the
744 file-format will be guessed by the *filetype* function.
745 parallel: bool
746 Default is to read on master and broadcast to slaves. Use
747 parallel=False to read on all slaves.
748 do_not_split_by_at_sign: bool
749 If False (default) ``filename`` is splited by at sign ``@``
751 Many formats allow on open file-like object to be passed instead
752 of ``filename``. In this case the format cannot be auto-decected,
753 so the ``format`` argument should be explicitly given."""
755 if isinstance(filename, PurePath):
756 filename = str(filename)
757 if filename == '-':
758 filename = sys.stdin
759 if isinstance(index, str):
760 try:
761 index = string2index(index)
762 except ValueError:
763 pass
765 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)
766 if index is None:
767 index = -1
768 format = format or filetype(filename, read=isinstance(filename, str))
770 io = get_ioformat(format)
771 if isinstance(index, (slice, str)):
772 return list(_iread(filename, index, format, io, parallel=parallel,
773 **kwargs))
774 else:
775 return next(_iread(filename, slice(index, None), format, io,
776 parallel=parallel, **kwargs))
779def iread(
780 filename: NameOrFile,
781 index: Any = None,
782 format: str = None,
783 parallel: bool = True,
784 do_not_split_by_at_sign: bool = False,
785 **kwargs
786) -> Iterable[Atoms]:
787 """Iterator for reading Atoms objects from file.
789 Works as the `read` function, but yields one Atoms object at a time
790 instead of all at once."""
792 if isinstance(filename, PurePath):
793 filename = str(filename)
795 if isinstance(index, str):
796 index = string2index(index)
798 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)
800 if index is None or index == ':':
801 index = slice(None, None, None)
803 if not isinstance(index, (slice, str)):
804 index = slice(index, (index + 1) or None)
806 format = format or filetype(filename, read=isinstance(filename, str))
807 io = get_ioformat(format)
809 for atoms in _iread(filename, index, format, io, parallel=parallel,
810 **kwargs):
811 yield atoms
814@parallel_generator
815def _iread(filename, index, format, io, parallel=None, full_output=False,
816 **kwargs):
818 if not io.can_read:
819 raise ValueError("Can't read from {}-format".format(format))
821 if io.single:
822 start = index.start
823 assert start is None or start == 0 or start == -1
824 args = ()
825 else:
826 args = (index,)
828 must_close_fd = False
829 if isinstance(filename, str):
830 if io.acceptsfd:
831 mode = 'rb' if io.isbinary else 'r'
832 fd = open_with_compression(filename, mode)
833 must_close_fd = True
834 else:
835 fd = filename
836 else:
837 assert io.acceptsfd
838 fd = filename
840 # Make sure fd is closed in case loop doesn't finish:
841 try:
842 for dct in io.read(fd, *args, **kwargs):
843 if not isinstance(dct, dict):
844 dct = {'atoms': dct}
845 if full_output:
846 yield dct
847 else:
848 yield dct['atoms']
849 finally:
850 if must_close_fd:
851 fd.close()
854def parse_filename(filename, index=None, do_not_split_by_at_sign=False):
855 if not isinstance(filename, str):
856 return filename, index
858 basename = os.path.basename(filename)
859 if do_not_split_by_at_sign or '@' not in basename:
860 return filename, index
862 newindex = None
863 newfilename, newindex = filename.rsplit('@', 1)
865 if isinstance(index, slice):
866 return newfilename, index
867 try:
868 newindex = string2index(newindex)
869 except ValueError:
870 warnings.warn('Can not parse index for path \n'
871 ' "%s" \nConsider set '
872 'do_not_split_by_at_sign=True \nif '
873 'there is no index.' % filename)
874 return newfilename, newindex
877def match_magic(data: bytes) -> IOFormat:
878 data = data[:PEEK_BYTES]
879 for ioformat in ioformats.values():
880 if ioformat.match_magic(data):
881 return ioformat
882 raise UnknownFileTypeError('Cannot guess file type from contents')
885def string2index(string: str) -> Union[int, slice, str]:
886 """Convert index string to either int or slice"""
887 if ':' not in string:
888 # may contain database accessor
889 try:
890 return int(string)
891 except ValueError:
892 return string
893 i: List[Optional[int]] = []
894 for s in string.split(':'):
895 if s == '':
896 i.append(None)
897 else:
898 i.append(int(s))
899 i += (3 - len(i)) * [None]
900 return slice(*i)
903def filetype(
904 filename: NameOrFile,
905 read: bool = True,
906 guess: bool = True,
907) -> str:
908 """Try to guess the type of the file.
910 First, special signatures in the filename will be checked for. If that
911 does not identify the file type, then the first 2000 bytes of the file
912 will be read and analysed. Turn off this second part by using
913 read=False.
915 Can be used from the command-line also::
917 $ ase info filename ...
918 """
920 orig_filename = filename
921 if hasattr(filename, 'name'):
922 filename = filename.name # type: ignore
924 ext = None
925 if isinstance(filename, str):
926 if os.path.isdir(filename):
927 if os.path.basename(os.path.normpath(filename)) == 'states':
928 return 'eon'
929 return 'bundletrajectory'
931 if filename.startswith('postgres'):
932 return 'postgresql'
934 if filename.startswith('mysql') or filename.startswith('mariadb'):
935 return 'mysql'
937 # strip any compression extensions that can be read
938 root, compression = get_compression(filename)
939 basename = os.path.basename(root)
941 if '.' in basename:
942 ext = os.path.splitext(basename)[1].strip('.').lower()
944 for fmt in ioformats.values():
945 if fmt.match_name(basename):
946 return fmt.name
948 if not read:
949 if ext is None:
950 raise UnknownFileTypeError('Could not guess file type')
951 ioformat = extension2format.get(ext)
952 if ioformat:
953 return ioformat.name
955 # askhl: This is strange, we don't know if ext is a format:
956 return ext
958 if orig_filename == filename:
959 fd = open_with_compression(filename, 'rb')
960 else:
961 fd = orig_filename # type: ignore
962 else:
963 fd = filename # type: ignore
964 if fd is sys.stdin:
965 return 'json'
967 data = fd.read(PEEK_BYTES)
968 if fd is not filename:
969 fd.close()
970 else:
971 fd.seek(0)
973 if len(data) == 0:
974 raise UnknownFileTypeError('Empty file: ' + filename) # type: ignore
976 try:
977 return match_magic(data).name
978 except UnknownFileTypeError:
979 pass
981 format = None
982 if ext in extension2format:
983 format = extension2format[ext].name
985 if format is None and guess:
986 format = ext
987 if format is None:
988 # Do quick xyz check:
989 lines = data.splitlines()
990 if lines and lines[0].strip().isdigit():
991 return extension2format['xyz'].name
993 raise UnknownFileTypeError('Could not guess file type')
994 assert isinstance(format, str)
995 return format
998def index2range(index, length):
999 """Convert slice or integer to range.
1001 If index is an integer, range will contain only that integer."""
1002 obj = range(length)[index]
1003 if isinstance(obj, numbers.Integral):
1004 obj = range(obj, obj + 1)
1005 return obj