Coverage for /builds/ase/ase/ase/io/formats.py : 89.34%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""File formats.
3This module implements the read(), iread() and write() functions in ase.io.
4For each file format there is an IOFormat object.
6There is a dict, ioformats, which stores the objects.
8Example
9=======
11The xyz format is implemented in the ase/io/xyz.py file which has a
12read_xyz() generator and a write_xyz() function. This and other
13information can be obtained from ioformats['xyz'].
14"""
16import io
17import re
18import functools
19import inspect
20import os
21import sys
22import numbers
23import warnings
24from pathlib import Path, PurePath
25from typing import (
26 IO, List, Any, Iterable, Tuple, Union, Sequence, Dict, Optional)
28if sys.version_info >= (3, 8):
29 from importlib.metadata import entry_points
30else:
31 from importlib_metadata import entry_points
33from ase.atoms import Atoms
34from ase.utils.plugins import ExternalIOFormat
35from importlib import import_module
36from ase.parallel import parallel_function, parallel_generator
39PEEK_BYTES = 50000
42class UnknownFileTypeError(Exception):
43 pass
46class IOFormat:
47 def __init__(self, name: str, desc: str, code: str, module_name: str,
48 encoding: str = None) -> None:
49 self.name = name
50 self.description = desc
51 assert len(code) == 2
52 assert code[0] in list('+1')
53 assert code[1] in list('BFS')
54 self.code = code
55 self.module_name = module_name
56 self.encoding = encoding
58 # (To be set by define_io_format())
59 self.extensions: List[str] = []
60 self.globs: List[str] = []
61 self.magic: List[str] = []
62 self.magic_regex: Optional[bytes] = None
64 def open(self, fname, mode: str = 'r') -> IO:
65 # We might want append mode, too
66 # We can allow more flags as needed (buffering etc.)
67 if mode not in list('rwa'):
68 raise ValueError("Only modes allowed are 'r', 'w', and 'a'")
69 if mode == 'r' and not self.can_read:
70 raise NotImplementedError('No reader implemented for {} format'
71 .format(self.name))
72 if mode == 'w' and not self.can_write:
73 raise NotImplementedError('No writer implemented for {} format'
74 .format(self.name))
75 if mode == 'a' and not self.can_append:
76 raise NotImplementedError('Appending not supported by {} format'
77 .format(self.name))
79 if self.isbinary:
80 mode += 'b'
82 path = Path(fname)
83 return path.open(mode, encoding=self.encoding)
85 def _buf_as_filelike(self, data: Union[str, bytes]) -> IO:
86 encoding = self.encoding
87 if encoding is None:
88 encoding = 'utf-8' # Best hacky guess.
90 if self.isbinary:
91 if isinstance(data, str):
92 data = data.encode(encoding)
93 else:
94 if isinstance(data, bytes):
95 data = data.decode(encoding)
97 return self._ioclass(data)
99 @property
100 def _ioclass(self):
101 if self.isbinary:
102 return io.BytesIO
103 else:
104 return io.StringIO
106 def parse_images(self, data: Union[str, bytes],
107 **kwargs) -> Sequence[Atoms]:
108 with self._buf_as_filelike(data) as fd:
109 outputs = self.read(fd, **kwargs)
110 if self.single:
111 assert isinstance(outputs, Atoms)
112 return [outputs]
113 else:
114 return list(self.read(fd, **kwargs))
116 def parse_atoms(self, data: Union[str, bytes], **kwargs) -> Atoms:
117 images = self.parse_images(data, **kwargs)
118 return images[-1]
120 @property
121 def can_read(self) -> bool:
122 return self._readfunc() is not None
124 @property
125 def can_write(self) -> bool:
126 return self._writefunc() is not None
128 @property
129 def can_append(self) -> bool:
130 writefunc = self._writefunc()
131 return self.can_write and 'append' in writefunc.__code__.co_varnames
133 def __repr__(self) -> str:
134 tokens = ['{}={}'.format(name, repr(value))
135 for name, value in vars(self).items()]
136 return 'IOFormat({})'.format(', '.join(tokens))
138 def __getitem__(self, i):
139 # For compatibility.
140 #
141 # Historically, the ioformats were listed as tuples
142 # with (description, code). We look like such a tuple.
143 return (self.description, self.code)[i]
145 @property
146 def single(self) -> bool:
147 """Whether this format is for a single Atoms object."""
148 return self.code[0] == '1'
150 @property
151 def _formatname(self) -> str:
152 return self.name.replace('-', '_')
154 def _readfunc(self):
155 return getattr(self.module, 'read_' + self._formatname, None)
157 def _writefunc(self):
158 return getattr(self.module, 'write_' + self._formatname, None)
160 @property
161 def read(self):
162 if not self.can_read:
163 self._warn_none('read')
164 return None
166 return self._read_wrapper
168 def _read_wrapper(self, *args, **kwargs):
169 function = self._readfunc()
170 if function is None:
171 self._warn_none('read')
172 return None
173 if not inspect.isgeneratorfunction(function):
174 function = functools.partial(wrap_read_function, function)
175 return function(*args, **kwargs)
177 def _warn_none(self, action):
178 msg = ('Accessing the IOFormat.{action} property on a format '
179 'without {action} support will change behaviour in the '
180 'future and return a callable instead of None. '
181 'Use IOFormat.can_{action} to check whether {action} '
182 'is supported.')
183 warnings.warn(msg.format(action=action), FutureWarning)
185 @property
186 def write(self):
187 if not self.can_write:
188 self._warn_none('write')
189 return None
191 return self._write_wrapper
193 def _write_wrapper(self, *args, **kwargs):
194 function = self._writefunc()
195 if function is None:
196 raise ValueError(f'Cannot write to {self.name}-format')
197 return function(*args, **kwargs)
199 @property
200 def modes(self) -> str:
201 modes = ''
202 if self.can_read:
203 modes += 'r'
204 if self.can_write:
205 modes += 'w'
206 return modes
208 def full_description(self) -> str:
209 lines = [f'Name: {self.name}',
210 f'Description: {self.description}',
211 f'Modes: {self.modes}',
212 f'Encoding: {self.encoding}',
213 f'Module: {self.module_name}',
214 f'Code: {self.code}',
215 f'Extensions: {self.extensions}',
216 f'Globs: {self.globs}',
217 f'Magic: {self.magic}']
218 return '\n'.join(lines)
220 @property
221 def acceptsfd(self) -> bool:
222 return self.code[1] != 'S'
224 @property
225 def isbinary(self) -> bool:
226 return self.code[1] == 'B'
228 @property
229 def module(self):
230 try:
231 return import_module(self.module_name)
232 except ImportError as err:
233 raise UnknownFileTypeError(
234 f'File format not recognized: {self.name}. Error: {err}')
236 def match_name(self, basename: str) -> bool:
237 from fnmatch import fnmatch
238 return any(fnmatch(basename, pattern)
239 for pattern in self.globs)
241 def match_magic(self, data: bytes) -> bool:
242 if self.magic_regex:
243 assert not self.magic, 'Define only one of magic and magic_regex'
244 match = re.match(self.magic_regex, data, re.M | re.S)
245 return match is not None
247 from fnmatch import fnmatchcase
248 return any(fnmatchcase(data, magic + b'*') # type: ignore
249 for magic in self.magic)
252ioformats: Dict[str, IOFormat] = {} # These will be filled at run-time.
253extension2format = {}
256all_formats = ioformats # Aliased for compatibility only. Please do not use.
257format2modulename = {} # Left for compatibility only.
260def define_io_format(name, desc, code, *, module=None, ext=None,
261 glob=None, magic=None, encoding=None,
262 magic_regex=None, external=False):
263 if module is None:
264 module = name.replace('-', '_')
265 format2modulename[name] = module
267 if not external:
268 module = 'ase.io.' + module
270 def normalize_patterns(strings):
271 if strings is None:
272 strings = []
273 elif isinstance(strings, (str, bytes)):
274 strings = [strings]
275 else:
276 strings = list(strings)
277 return strings
279 fmt = IOFormat(name, desc, code, module_name=module,
280 encoding=encoding)
281 fmt.extensions = normalize_patterns(ext)
282 fmt.globs = normalize_patterns(glob)
283 fmt.magic = normalize_patterns(magic)
285 if magic_regex is not None:
286 fmt.magic_regex = magic_regex
288 for ext in fmt.extensions:
289 if ext in extension2format:
290 raise ValueError('extension "{}" already registered'.format(ext))
291 extension2format[ext] = fmt
293 ioformats[name] = fmt
294 return fmt
297def get_ioformat(name: str) -> IOFormat:
298 """Return ioformat object or raise appropriate error."""
299 if name not in ioformats:
300 raise UnknownFileTypeError(name)
301 fmt = ioformats[name]
302 # Make sure module is importable, since this could also raise an error.
303 fmt.module
304 return ioformats[name]
307def register_external_io_formats(group):
308 if hasattr(entry_points(), 'select'):
309 fmt_entry_points = entry_points().select(group=group) # type: ignore
310 else:
311 fmt_entry_points = entry_points().get(group, ())
313 for entry_point in fmt_entry_points:
314 try:
315 define_external_io_format(entry_point)
316 except Exception as exc:
317 warnings.warn(
318 'Failed to register external '
319 f'IO format {entry_point.name}: {exc}'
320 )
323def define_external_io_format(entry_point):
325 fmt = entry_point.load()
326 if entry_point.name in ioformats:
327 raise ValueError(f'Format {entry_point.name} already defined')
328 if not isinstance(fmt, ExternalIOFormat):
329 raise TypeError('Wrong type for registering external IO formats '
330 f'in format {entry_point.name}, expected '
331 'ExternalIOFormat')
332 F(entry_point.name, **fmt._asdict(), external=True) # type: ignore
335# We define all the IO formats below. Each IO format has a code,
336# such as '1F', which defines some of the format's properties:
337#
338# 1=single atoms object
339# +=multiple atoms objects
340# F=accepts a file-descriptor
341# S=needs a file-name str
342# B=like F, but opens in binary mode
344F = define_io_format
345F('abinit-in', 'ABINIT input file', '1F',
346 module='abinit', magic=b'*znucl *')
347F('abinit-out', 'ABINIT output file', '1F',
348 module='abinit', magic=b'*.Version * of ABINIT')
349F('aims', 'FHI-aims geometry file', '1S', ext='in')
350F('aims-output', 'FHI-aims output', '+S',
351 module='aims', magic=b'*Invoking FHI-aims ...')
352F('bundletrajectory', 'ASE bundle trajectory', '+S')
353F('castep-castep', 'CASTEP output file', '+F',
354 module='castep', ext='castep')
355F('castep-cell', 'CASTEP geom file', '1F',
356 module='castep', ext='cell')
357F('castep-geom', 'CASTEP trajectory file', '+F',
358 module='castep', ext='geom')
359F('castep-md', 'CASTEP molecular dynamics file', '+F',
360 module='castep', ext='md')
361F('castep-phonon', 'CASTEP phonon file', '1F',
362 module='castep', ext='phonon')
363F('cfg', 'AtomEye configuration', '1F')
364F('cif', 'CIF-file', '+B', ext='cif')
365F('cmdft', 'CMDFT-file', '1F', glob='*I_info')
366F('cml', 'Chemical json file', '1F', ext='cml')
367F('cp2k-dcd', 'CP2K DCD file', '+B',
368 module='cp2k', ext='dcd')
369F('cp2k-restart', 'CP2K restart file', '1F',
370 module='cp2k', ext='restart')
371F('crystal', 'Crystal fort.34 format', '1F',
372 ext=['f34', '34'], glob=['f34', '34'])
373F('cube', 'CUBE file', '1F', ext='cube')
374F('dacapo-text', 'Dacapo text output', '1F',
375 module='dacapo', magic=b'*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n')
376F('db', 'ASE SQLite database file', '+S')
377F('dftb', 'DftbPlus input file', '1S', magic=b'Geometry')
378F('dlp4', 'DL_POLY_4 CONFIG file', '1F',
379 module='dlp4', ext='config', glob=['*CONFIG*'])
380F('dlp-history', 'DL_POLY HISTORY file', '+F',
381 module='dlp4', glob='HISTORY')
382F('dmol-arc', 'DMol3 arc file', '+S',
383 module='dmol', ext='arc')
384F('dmol-car', 'DMol3 structure file', '1S',
385 module='dmol', ext='car')
386F('dmol-incoor', 'DMol3 structure file', '1S',
387 module='dmol')
388F('elk', 'ELK atoms definition from GEOMETRY.OUT', '1F',
389 glob=['GEOMETRY.OUT'])
390F('elk-in', 'ELK input file', '1F', module='elk')
391F('eon', 'EON CON file', '+F',
392 ext='con')
393F('eps', 'Encapsulated Postscript', '1S')
394F('espresso-in', 'Quantum espresso in file', '1F',
395 module='espresso', ext='pwi', magic=[b'*\n&system', b'*\n&SYSTEM'])
396F('espresso-out', 'Quantum espresso out file', '+F',
397 module='espresso', ext=['pwo', 'out'], magic=b'*Program PWSCF')
398F('exciting', 'exciting input', '1F', glob='input.xml')
399F('extxyz', 'Extended XYZ file', '+F', ext='xyz')
400F('findsym', 'FINDSYM-format', '+F')
401F('gamess-us-out', 'GAMESS-US output file', '1F',
402 module='gamess_us', magic=b'*GAMESS')
403F('gamess-us-in', 'GAMESS-US input file', '1F',
404 module='gamess_us')
405F('gamess-us-punch', 'GAMESS-US punchcard file', '1F',
406 module='gamess_us', magic=b' $DATA', ext='dat')
407F('gaussian-in', 'Gaussian com (input) file', '1F',
408 module='gaussian', ext=['com', 'gjf'])
409F('gaussian-out', 'Gaussian output file', '+F',
410 module='gaussian', ext='log', magic=b'*Entering Gaussian System')
411F('acemolecule-out', 'ACE output file', '1S',
412 module='acemolecule')
413F('acemolecule-input', 'ACE input file', '1S',
414 module='acemolecule')
415F('gen', 'DFTBPlus GEN format', '1F')
416F('gif', 'Graphics interchange format', '+S',
417 module='animation')
418F('gpaw-out', 'GPAW text output', '+F',
419 magic=b'* ___ ___ ___ _ _ _')
420F('gpumd', 'GPUMD input file', '1F', glob='xyz.in')
421F('gpw', 'GPAW restart-file', '1S',
422 magic=[b'- of UlmGPAW', b'AFFormatGPAW'])
423F('gromacs', 'Gromacs coordinates', '1F',
424 ext='gro')
425F('gromos', 'Gromos96 geometry file', '1F', ext='g96')
426F('html', 'X3DOM HTML', '1F', module='x3d')
427F('json', 'ASE JSON database file', '+F', ext='json', module='db')
428F('jsv', 'JSV file format', '1F')
429F('lammps-dump-text', 'LAMMPS text dump file', '+F',
430 module='lammpsrun', magic_regex=b'.*?^ITEM: TIMESTEP$')
431F('lammps-dump-binary', 'LAMMPS binary dump file', '+B',
432 module='lammpsrun')
433F('lammps-data', 'LAMMPS data file', '1F', module='lammpsdata',
434 encoding='ascii')
435F('magres', 'MAGRES ab initio NMR data file', '1F')
436F('mol', 'MDL Molfile', '1F')
437F('mp4', 'MP4 animation', '+S',
438 module='animation')
439F('mustem', 'muSTEM xtl file', '1F',
440 ext='xtl')
441F('mysql', 'ASE MySQL database file', '+S',
442 module='db')
443F('netcdftrajectory', 'AMBER NetCDF trajectory file', '+S',
444 magic=b'CDF')
445F('nomad-json', 'JSON from Nomad archive', '+F',
446 ext='nomad-json')
447F('nwchem-in', 'NWChem input file', '1F',
448 module='nwchem', ext='nwi')
449F('nwchem-out', 'NWChem output file', '+F',
450 module='nwchem', ext='nwo',
451 magic=b'*Northwest Computational Chemistry Package')
452F('octopus-in', 'Octopus input file', '1F',
453 module='octopus', glob='inp')
454F('proteindatabank', 'Protein Data Bank', '+F',
455 ext='pdb')
456F('png', 'Portable Network Graphics', '1B')
457F('postgresql', 'ASE PostgreSQL database file', '+S', module='db')
458F('pov', 'Persistance of Vision', '1S')
459# prismatic: Should have ext='xyz' if/when multiple formats can have the same
460# extension
461F('prismatic', 'prismatic and computem XYZ-file', '1F')
462F('py', 'Python file', '+F')
463F('sys', 'qball sys file', '1F')
464F('qbox', 'QBOX output file', '+F',
465 magic=b'*:simulation xmlns:')
466F('res', 'SHELX format', '1S', ext='shelx')
467F('rmc6f', 'RMCProfile', '1S', ext='rmc6f')
468F('sdf', 'SDF format', '1F')
469F('siesta-xv', 'Siesta .XV file', '1F',
470 glob='*.XV', module='siesta')
471F('struct', 'WIEN2k structure file', '1S', module='wien2k')
472F('struct_out', 'SIESTA STRUCT file', '1F', module='siesta')
473F('traj', 'ASE trajectory', '+B', module='trajectory', ext='traj',
474 magic=[b'- of UlmASE-Trajectory', b'AFFormatASE-Trajectory'])
475F('turbomole', 'TURBOMOLE coord file', '1F', glob='coord',
476 magic=b'$coord')
477F('turbomole-gradient', 'TURBOMOLE gradient file', '+F',
478 module='turbomole', glob='gradient', magic=b'$grad')
479F('v-sim', 'V_Sim ascii file', '1F', ext='ascii')
480F('vasp', 'VASP POSCAR/CONTCAR', '1F',
481 ext='poscar', glob=['*POSCAR*', '*CONTCAR*', '*CENTCAR*'])
482F('vasp-out', 'VASP OUTCAR file', '+F',
483 module='vasp', glob='*OUTCAR*')
484F('vasp-xdatcar', 'VASP XDATCAR file', '+F',
485 module='vasp', glob='*XDATCAR*')
486F('vasp-xml', 'VASP vasprun.xml file', '+F',
487 module='vasp', glob='*vasp*.xml')
488F('vti', 'VTK XML Image Data', '1F', module='vtkxml')
489F('vtu', 'VTK XML Unstructured Grid', '1F', module='vtkxml', ext='vtu')
490F('wout', 'Wannier90 output', '1F', module='wannier90')
491F('x3d', 'X3D', '1S')
492F('xsd', 'Materials Studio file', '1F')
493F('xsf', 'XCrySDen Structure File', '+F',
494 magic=[b'*\nANIMSTEPS', b'*\nCRYSTAL', b'*\nSLAB', b'*\nPOLYMER',
495 b'*\nMOLECULE', b'*\nATOMS'])
496F('xtd', 'Materials Studio file', '+F')
497# xyz: No `ext='xyz'` in the definition below.
498# The .xyz files are handled by the extxyz module by default.
499F('xyz', 'XYZ-file', '+F')
501# Register IO formats exposed through the ase.ioformats entry point
502register_external_io_formats('ase.ioformats')
505def get_compression(filename: str) -> Tuple[str, Optional[str]]:
506 """
507 Parse any expected file compression from the extension of a filename.
508 Return the filename without the extension, and the extension. Recognises
509 ``.gz``, ``.bz2``, ``.xz``.
511 >>> get_compression('H2O.pdb.gz')
512 ('H2O.pdb', 'gz')
513 >>> get_compression('crystal.cif')
514 ('crystal.cif', None)
516 Parameters
517 ==========
518 filename: str
519 Full filename including extension.
521 Returns
522 =======
523 (root, extension): (str, str or None)
524 Filename split into root without extension, and the extension
525 indicating compression format. Will not split if compression
526 is not recognised.
527 """
528 # Update if anything is added
529 valid_compression = ['gz', 'bz2', 'xz']
531 # Use stdlib as it handles most edge cases
532 root, compression = os.path.splitext(filename)
534 # extension keeps the '.' so remember to remove it
535 if compression.strip('.') in valid_compression:
536 return root, compression.strip('.')
537 else:
538 return filename, None
541def open_with_compression(filename: str, mode: str = 'r') -> IO:
542 """
543 Wrapper around builtin `open` that will guess compression of a file
544 from the filename and open it for reading or writing as if it were
545 a standard file.
547 Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma).
549 Supported modes are:
550 * 'r', 'rt', 'w', 'wt' for text mode read and write.
551 * 'rb, 'wb' for binary read and write.
553 Parameters
554 ==========
555 filename: str
556 Path to the file to open, including any extensions that indicate
557 the compression used.
558 mode: str
559 Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'.
561 Returns
562 =======
563 fd: file
564 File-like object open with the specified mode.
565 """
567 # Compressed formats sometimes default to binary, so force text mode.
568 if mode == 'r':
569 mode = 'rt'
570 elif mode == 'w':
571 mode = 'wt'
572 elif mode == 'a':
573 mode = 'at'
575 root, compression = get_compression(filename)
577 if compression == 'gz':
578 import gzip
579 return gzip.open(filename, mode=mode) # type: ignore
580 elif compression == 'bz2':
581 import bz2
582 return bz2.open(filename, mode=mode) # type: ignore
583 elif compression == 'xz':
584 import lzma
585 return lzma.open(filename, mode)
586 else:
587 # Either None or unknown string
588 return open(filename, mode)
591def wrap_read_function(read, filename, index=None, **kwargs):
592 """Convert read-function to generator."""
593 if index is None:
594 yield read(filename, **kwargs)
595 else:
596 for atoms in read(filename, index, **kwargs):
597 yield atoms
600NameOrFile = Union[str, PurePath, IO]
603def write(
604 filename: NameOrFile,
605 images: Union[Atoms, Sequence[Atoms]],
606 format: str = None,
607 parallel: bool = True,
608 append: bool = False,
609 **kwargs: Any
610) -> None:
611 """Write Atoms object(s) to file.
613 filename: str or file
614 Name of the file to write to or a file descriptor. The name '-'
615 means standard output.
616 images: Atoms object or list of Atoms objects
617 A single Atoms object or a list of Atoms objects.
618 format: str
619 Used to specify the file-format. If not given, the
620 file-format will be taken from suffix of the filename.
621 parallel: bool
622 Default is to write on master only. Use parallel=False to write
623 from all slaves.
624 append: bool
625 Default is to open files in 'w' or 'wb' mode, overwriting
626 existing files. In some cases opening the file in 'a' or 'ab'
627 mode (appending) is useful,
628 e.g. writing trajectories or saving multiple Atoms objects in one file.
629 WARNING: If the file format does not support multiple entries without
630 additional keywords/headers, files created using 'append=True'
631 might not be readable by any program! They will nevertheless be
632 written without error message.
634 The use of additional keywords is format specific. write() may
635 return an object after writing certain formats, but this behaviour
636 may change in the future.
638 """
640 if isinstance(filename, PurePath):
641 filename = str(filename)
643 if isinstance(filename, str):
644 fd = None
645 if filename == '-':
646 fd = sys.stdout
647 filename = None # type: ignore
648 elif format is None:
649 format = filetype(filename, read=False)
650 assert isinstance(format, str)
651 else:
652 fd = filename # type: ignore
653 if format is None:
654 try:
655 format = filetype(filename, read=False)
656 assert isinstance(format, str)
657 except UnknownFileTypeError:
658 format = None
659 filename = None # type: ignore
661 format = format or 'json' # default is json
663 io = get_ioformat(format)
665 return _write(filename, fd, format, io, images,
666 parallel=parallel, append=append, **kwargs)
669@parallel_function
670def _write(filename, fd, format, io, images, parallel=None, append=False,
671 **kwargs):
672 if isinstance(images, Atoms):
673 images = [images]
675 if io.single:
676 if len(images) > 1:
677 raise ValueError('{}-format can only store 1 Atoms object.'
678 .format(format))
679 images = images[0]
681 if not io.can_write:
682 raise ValueError("Can't write to {}-format".format(format))
684 # Special case for json-format:
685 if format == 'json' and (len(images) > 1 or append):
686 if filename is not None:
687 return io.write(filename, images, append=append, **kwargs)
688 raise ValueError("Can't write more than one image to file-descriptor "
689 'using json-format.')
691 if io.acceptsfd:
692 open_new = (fd is None)
693 try:
694 if open_new:
695 mode = 'wb' if io.isbinary else 'w'
696 if append:
697 mode = mode.replace('w', 'a')
698 fd = open_with_compression(filename, mode)
699 # XXX remember to re-enable compressed open
700 # fd = io.open(filename, mode)
701 return io.write(fd, images, **kwargs)
702 finally:
703 if open_new and fd is not None:
704 fd.close()
705 else:
706 if fd is not None:
707 raise ValueError("Can't write {}-format to file-descriptor"
708 .format(format))
709 if io.can_append:
710 return io.write(filename, images, append=append, **kwargs)
711 elif append:
712 raise ValueError("Cannot append to {}-format, write-function "
713 "does not support the append keyword."
714 .format(format))
715 else:
716 return io.write(filename, images, **kwargs)
719def read(
720 filename: NameOrFile,
721 index: Any = None,
722 format: str = None,
723 parallel: bool = True,
724 do_not_split_by_at_sign: bool = False,
725 **kwargs
726) -> Union[Atoms, List[Atoms]]:
727 """Read Atoms object(s) from file.
729 filename: str or file
730 Name of the file to read from or a file descriptor.
731 index: int, slice or str
732 The last configuration will be returned by default. Examples:
734 * ``index=0``: first configuration
735 * ``index=-2``: second to last
736 * ``index=':'`` or ``index=slice(None)``: all
737 * ``index='-3:'`` or ``index=slice(-3, None)``: three last
738 * ``index='::2'`` or ``index=slice(0, None, 2)``: even
739 * ``index='1::2'`` or ``index=slice(1, None, 2)``: odd
740 format: str
741 Used to specify the file-format. If not given, the
742 file-format will be guessed by the *filetype* function.
743 parallel: bool
744 Default is to read on master and broadcast to slaves. Use
745 parallel=False to read on all slaves.
746 do_not_split_by_at_sign: bool
747 If False (default) ``filename`` is splited by at sign ``@``
749 Many formats allow on open file-like object to be passed instead
750 of ``filename``. In this case the format cannot be auto-decected,
751 so the ``format`` argument should be explicitly given."""
753 if isinstance(filename, PurePath):
754 filename = str(filename)
755 if filename == '-':
756 filename = sys.stdin
757 if isinstance(index, str):
758 try:
759 index = string2index(index)
760 except ValueError:
761 pass
763 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)
764 if index is None:
765 index = -1
766 format = format or filetype(filename, read=isinstance(filename, str))
768 io = get_ioformat(format)
769 if isinstance(index, (slice, str)):
770 return list(_iread(filename, index, format, io, parallel=parallel,
771 **kwargs))
772 else:
773 return next(_iread(filename, slice(index, None), format, io,
774 parallel=parallel, **kwargs))
777def iread(
778 filename: NameOrFile,
779 index: Any = None,
780 format: str = None,
781 parallel: bool = True,
782 do_not_split_by_at_sign: bool = False,
783 **kwargs
784) -> Iterable[Atoms]:
785 """Iterator for reading Atoms objects from file.
787 Works as the `read` function, but yields one Atoms object at a time
788 instead of all at once."""
790 if isinstance(filename, PurePath):
791 filename = str(filename)
793 if isinstance(index, str):
794 index = string2index(index)
796 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)
798 if index is None or index == ':':
799 index = slice(None, None, None)
801 if not isinstance(index, (slice, str)):
802 index = slice(index, (index + 1) or None)
804 format = format or filetype(filename, read=isinstance(filename, str))
805 io = get_ioformat(format)
807 for atoms in _iread(filename, index, format, io, parallel=parallel,
808 **kwargs):
809 yield atoms
812@parallel_generator
813def _iread(filename, index, format, io, parallel=None, full_output=False,
814 **kwargs):
816 if not io.can_read:
817 raise ValueError("Can't read from {}-format".format(format))
819 if io.single:
820 start = index.start
821 assert start is None or start == 0 or start == -1
822 args = ()
823 else:
824 args = (index,)
826 must_close_fd = False
827 if isinstance(filename, str):
828 if io.acceptsfd:
829 mode = 'rb' if io.isbinary else 'r'
830 fd = open_with_compression(filename, mode)
831 must_close_fd = True
832 else:
833 fd = filename
834 else:
835 assert io.acceptsfd
836 fd = filename
838 # Make sure fd is closed in case loop doesn't finish:
839 try:
840 for dct in io.read(fd, *args, **kwargs):
841 if not isinstance(dct, dict):
842 dct = {'atoms': dct}
843 if full_output:
844 yield dct
845 else:
846 yield dct['atoms']
847 finally:
848 if must_close_fd:
849 fd.close()
852def parse_filename(filename, index=None, do_not_split_by_at_sign=False):
853 if not isinstance(filename, str):
854 return filename, index
856 basename = os.path.basename(filename)
857 if do_not_split_by_at_sign or '@' not in basename:
858 return filename, index
860 newindex = None
861 newfilename, newindex = filename.rsplit('@', 1)
863 if isinstance(index, slice):
864 return newfilename, index
865 try:
866 newindex = string2index(newindex)
867 except ValueError:
868 warnings.warn('Can not parse index for path \n'
869 ' "%s" \nConsider set '
870 'do_not_split_by_at_sign=True \nif '
871 'there is no index.' % filename)
872 return newfilename, newindex
875def match_magic(data: bytes) -> IOFormat:
876 data = data[:PEEK_BYTES]
877 for ioformat in ioformats.values():
878 if ioformat.match_magic(data):
879 return ioformat
880 raise UnknownFileTypeError('Cannot guess file type from contents')
883def string2index(string: str) -> Union[int, slice, str]:
884 """Convert index string to either int or slice"""
885 if ':' not in string:
886 # may contain database accessor
887 try:
888 return int(string)
889 except ValueError:
890 return string
891 i: List[Optional[int]] = []
892 for s in string.split(':'):
893 if s == '':
894 i.append(None)
895 else:
896 i.append(int(s))
897 i += (3 - len(i)) * [None]
898 return slice(*i)
901def filetype(
902 filename: NameOrFile,
903 read: bool = True,
904 guess: bool = True,
905) -> str:
906 """Try to guess the type of the file.
908 First, special signatures in the filename will be checked for. If that
909 does not identify the file type, then the first 2000 bytes of the file
910 will be read and analysed. Turn off this second part by using
911 read=False.
913 Can be used from the command-line also::
915 $ ase info filename ...
916 """
918 orig_filename = filename
919 if hasattr(filename, 'name'):
920 filename = filename.name # type: ignore
922 ext = None
923 if isinstance(filename, str):
924 if os.path.isdir(filename):
925 if os.path.basename(os.path.normpath(filename)) == 'states':
926 return 'eon'
927 return 'bundletrajectory'
929 if filename.startswith('postgres'):
930 return 'postgresql'
932 if filename.startswith('mysql') or filename.startswith('mariadb'):
933 return 'mysql'
935 # strip any compression extensions that can be read
936 root, compression = get_compression(filename)
937 basename = os.path.basename(root)
939 if '.' in basename:
940 ext = os.path.splitext(basename)[1].strip('.').lower()
942 for fmt in ioformats.values():
943 if fmt.match_name(basename):
944 return fmt.name
946 if not read:
947 if ext is None:
948 raise UnknownFileTypeError('Could not guess file type')
949 ioformat = extension2format.get(ext)
950 if ioformat:
951 return ioformat.name
953 # askhl: This is strange, we don't know if ext is a format:
954 return ext
956 if orig_filename == filename:
957 fd = open_with_compression(filename, 'rb')
958 else:
959 fd = orig_filename # type: ignore
960 else:
961 fd = filename # type: ignore
962 if fd is sys.stdin:
963 return 'json'
965 data = fd.read(PEEK_BYTES)
966 if fd is not filename:
967 fd.close()
968 else:
969 fd.seek(0)
971 if len(data) == 0:
972 raise UnknownFileTypeError('Empty file: ' + filename) # type: ignore
974 try:
975 return match_magic(data).name
976 except UnknownFileTypeError:
977 pass
979 format = None
980 if ext in extension2format:
981 format = extension2format[ext].name
983 if format is None and guess:
984 format = ext
985 if format is None:
986 # Do quick xyz check:
987 lines = data.splitlines()
988 if lines and lines[0].strip().isdigit():
989 return extension2format['xyz'].name
991 raise UnknownFileTypeError('Could not guess file type')
992 assert isinstance(format, str)
993 return format
996def index2range(index, length):
997 """Convert slice or integer to range.
999 If index is an integer, range will contain only that integer."""
1000 obj = range(length)[index]
1001 if isinstance(obj, numbers.Integral):
1002 obj = range(obj, obj + 1)
1003 return obj