Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""File formats. 

2 

3This module implements the read(), iread() and write() functions in ase.io. 

4For each file format there is an IOFormat object. 

5 

6There is a dict, ioformats, which stores the objects. 

7 

8Example 

9======= 

10 

11The xyz format is implemented in the ase/io/xyz.py file which has a 

12read_xyz() generator and a write_xyz() function. This and other 

13information can be obtained from ioformats['xyz']. 

14""" 

15 

16import io 

17import re 

18import functools 

19import inspect 

20import os 

21import sys 

22import numbers 

23import warnings 

24from pathlib import Path, PurePath 

25from typing import ( 

26 IO, List, Any, Iterable, Tuple, Union, Sequence, Dict, Optional) 

27 

28if sys.version_info >= (3, 8): 

29 from importlib.metadata import entry_points 

30else: 

31 from importlib_metadata import entry_points 

32 

33from ase.atoms import Atoms 

34from ase.utils.plugins import ExternalIOFormat 

35from importlib import import_module 

36from ase.parallel import parallel_function, parallel_generator 

37 

38 

39PEEK_BYTES = 50000 

40 

41 

42class UnknownFileTypeError(Exception): 

43 pass 

44 

45 

46class IOFormat: 

47 def __init__(self, name: str, desc: str, code: str, module_name: str, 

48 encoding: str = None) -> None: 

49 self.name = name 

50 self.description = desc 

51 assert len(code) == 2 

52 assert code[0] in list('+1') 

53 assert code[1] in list('BFS') 

54 self.code = code 

55 self.module_name = module_name 

56 self.encoding = encoding 

57 

58 # (To be set by define_io_format()) 

59 self.extensions: List[str] = [] 

60 self.globs: List[str] = [] 

61 self.magic: List[str] = [] 

62 self.magic_regex: Optional[bytes] = None 

63 

64 def open(self, fname, mode: str = 'r') -> IO: 

65 # We might want append mode, too 

66 # We can allow more flags as needed (buffering etc.) 

67 if mode not in list('rwa'): 

68 raise ValueError("Only modes allowed are 'r', 'w', and 'a'") 

69 if mode == 'r' and not self.can_read: 

70 raise NotImplementedError('No reader implemented for {} format' 

71 .format(self.name)) 

72 if mode == 'w' and not self.can_write: 

73 raise NotImplementedError('No writer implemented for {} format' 

74 .format(self.name)) 

75 if mode == 'a' and not self.can_append: 

76 raise NotImplementedError('Appending not supported by {} format' 

77 .format(self.name)) 

78 

79 if self.isbinary: 

80 mode += 'b' 

81 

82 path = Path(fname) 

83 return path.open(mode, encoding=self.encoding) 

84 

85 def _buf_as_filelike(self, data: Union[str, bytes]) -> IO: 

86 encoding = self.encoding 

87 if encoding is None: 

88 encoding = 'utf-8' # Best hacky guess. 

89 

90 if self.isbinary: 

91 if isinstance(data, str): 

92 data = data.encode(encoding) 

93 else: 

94 if isinstance(data, bytes): 

95 data = data.decode(encoding) 

96 

97 return self._ioclass(data) 

98 

99 @property 

100 def _ioclass(self): 

101 if self.isbinary: 

102 return io.BytesIO 

103 else: 

104 return io.StringIO 

105 

106 def parse_images(self, data: Union[str, bytes], 

107 **kwargs) -> Sequence[Atoms]: 

108 with self._buf_as_filelike(data) as fd: 

109 outputs = self.read(fd, **kwargs) 

110 if self.single: 

111 assert isinstance(outputs, Atoms) 

112 return [outputs] 

113 else: 

114 return list(self.read(fd, **kwargs)) 

115 

116 def parse_atoms(self, data: Union[str, bytes], **kwargs) -> Atoms: 

117 images = self.parse_images(data, **kwargs) 

118 return images[-1] 

119 

120 @property 

121 def can_read(self) -> bool: 

122 return self._readfunc() is not None 

123 

124 @property 

125 def can_write(self) -> bool: 

126 return self._writefunc() is not None 

127 

128 @property 

129 def can_append(self) -> bool: 

130 writefunc = self._writefunc() 

131 return self.can_write and 'append' in writefunc.__code__.co_varnames 

132 

133 def __repr__(self) -> str: 

134 tokens = ['{}={}'.format(name, repr(value)) 

135 for name, value in vars(self).items()] 

136 return 'IOFormat({})'.format(', '.join(tokens)) 

137 

138 def __getitem__(self, i): 

139 # For compatibility. 

140 # 

141 # Historically, the ioformats were listed as tuples 

142 # with (description, code). We look like such a tuple. 

143 return (self.description, self.code)[i] 

144 

145 @property 

146 def single(self) -> bool: 

147 """Whether this format is for a single Atoms object.""" 

148 return self.code[0] == '1' 

149 

150 @property 

151 def _formatname(self) -> str: 

152 return self.name.replace('-', '_') 

153 

154 def _readfunc(self): 

155 return getattr(self.module, 'read_' + self._formatname, None) 

156 

157 def _writefunc(self): 

158 return getattr(self.module, 'write_' + self._formatname, None) 

159 

160 @property 

161 def read(self): 

162 if not self.can_read: 

163 self._warn_none('read') 

164 return None 

165 

166 return self._read_wrapper 

167 

168 def _read_wrapper(self, *args, **kwargs): 

169 function = self._readfunc() 

170 if function is None: 

171 self._warn_none('read') 

172 return None 

173 if not inspect.isgeneratorfunction(function): 

174 function = functools.partial(wrap_read_function, function) 

175 return function(*args, **kwargs) 

176 

177 def _warn_none(self, action): 

178 msg = ('Accessing the IOFormat.{action} property on a format ' 

179 'without {action} support will change behaviour in the ' 

180 'future and return a callable instead of None. ' 

181 'Use IOFormat.can_{action} to check whether {action} ' 

182 'is supported.') 

183 warnings.warn(msg.format(action=action), FutureWarning) 

184 

185 @property 

186 def write(self): 

187 if not self.can_write: 

188 self._warn_none('write') 

189 return None 

190 

191 return self._write_wrapper 

192 

193 def _write_wrapper(self, *args, **kwargs): 

194 function = self._writefunc() 

195 if function is None: 

196 raise ValueError(f'Cannot write to {self.name}-format') 

197 return function(*args, **kwargs) 

198 

199 @property 

200 def modes(self) -> str: 

201 modes = '' 

202 if self.can_read: 

203 modes += 'r' 

204 if self.can_write: 

205 modes += 'w' 

206 return modes 

207 

208 def full_description(self) -> str: 

209 lines = [f'Name: {self.name}', 

210 f'Description: {self.description}', 

211 f'Modes: {self.modes}', 

212 f'Encoding: {self.encoding}', 

213 f'Module: {self.module_name}', 

214 f'Code: {self.code}', 

215 f'Extensions: {self.extensions}', 

216 f'Globs: {self.globs}', 

217 f'Magic: {self.magic}'] 

218 return '\n'.join(lines) 

219 

220 @property 

221 def acceptsfd(self) -> bool: 

222 return self.code[1] != 'S' 

223 

224 @property 

225 def isbinary(self) -> bool: 

226 return self.code[1] == 'B' 

227 

228 @property 

229 def module(self): 

230 try: 

231 return import_module(self.module_name) 

232 except ImportError as err: 

233 raise UnknownFileTypeError( 

234 f'File format not recognized: {self.name}. Error: {err}') 

235 

236 def match_name(self, basename: str) -> bool: 

237 from fnmatch import fnmatch 

238 return any(fnmatch(basename, pattern) 

239 for pattern in self.globs) 

240 

241 def match_magic(self, data: bytes) -> bool: 

242 if self.magic_regex: 

243 assert not self.magic, 'Define only one of magic and magic_regex' 

244 match = re.match(self.magic_regex, data, re.M | re.S) 

245 return match is not None 

246 

247 from fnmatch import fnmatchcase 

248 return any(fnmatchcase(data, magic + b'*') # type: ignore 

249 for magic in self.magic) 

250 

251 

252ioformats: Dict[str, IOFormat] = {} # These will be filled at run-time. 

253extension2format = {} 

254 

255 

256all_formats = ioformats # Aliased for compatibility only. Please do not use. 

257format2modulename = {} # Left for compatibility only. 

258 

259 

260def define_io_format(name, desc, code, *, module=None, ext=None, 

261 glob=None, magic=None, encoding=None, 

262 magic_regex=None, external=False): 

263 if module is None: 

264 module = name.replace('-', '_') 

265 format2modulename[name] = module 

266 

267 if not external: 

268 module = 'ase.io.' + module 

269 

270 def normalize_patterns(strings): 

271 if strings is None: 

272 strings = [] 

273 elif isinstance(strings, (str, bytes)): 

274 strings = [strings] 

275 else: 

276 strings = list(strings) 

277 return strings 

278 

279 fmt = IOFormat(name, desc, code, module_name=module, 

280 encoding=encoding) 

281 fmt.extensions = normalize_patterns(ext) 

282 fmt.globs = normalize_patterns(glob) 

283 fmt.magic = normalize_patterns(magic) 

284 

285 if magic_regex is not None: 

286 fmt.magic_regex = magic_regex 

287 

288 for ext in fmt.extensions: 

289 if ext in extension2format: 

290 raise ValueError('extension "{}" already registered'.format(ext)) 

291 extension2format[ext] = fmt 

292 

293 ioformats[name] = fmt 

294 return fmt 

295 

296 

297def get_ioformat(name: str) -> IOFormat: 

298 """Return ioformat object or raise appropriate error.""" 

299 if name not in ioformats: 

300 raise UnknownFileTypeError(name) 

301 fmt = ioformats[name] 

302 # Make sure module is importable, since this could also raise an error. 

303 fmt.module 

304 return ioformats[name] 

305 

306 

307def register_external_io_formats(group): 

308 if hasattr(entry_points(), 'select'): 

309 fmt_entry_points = entry_points().select(group=group) # type: ignore 

310 else: 

311 fmt_entry_points = entry_points().get(group, ()) 

312 

313 for entry_point in fmt_entry_points: 

314 try: 

315 define_external_io_format(entry_point) 

316 except Exception as exc: 

317 warnings.warn( 

318 'Failed to register external ' 

319 f'IO format {entry_point.name}: {exc}' 

320 ) 

321 

322 

323def define_external_io_format(entry_point): 

324 

325 fmt = entry_point.load() 

326 if entry_point.name in ioformats: 

327 raise ValueError(f'Format {entry_point.name} already defined') 

328 if not isinstance(fmt, ExternalIOFormat): 

329 raise TypeError('Wrong type for registering external IO formats ' 

330 f'in format {entry_point.name}, expected ' 

331 'ExternalIOFormat') 

332 F(entry_point.name, **fmt._asdict(), external=True) # type: ignore 

333 

334 

335# We define all the IO formats below. Each IO format has a code, 

336# such as '1F', which defines some of the format's properties: 

337# 

338# 1=single atoms object 

339# +=multiple atoms objects 

340# F=accepts a file-descriptor 

341# S=needs a file-name str 

342# B=like F, but opens in binary mode 

343 

344F = define_io_format 

345F('abinit-in', 'ABINIT input file', '1F', 

346 module='abinit', magic=b'*znucl *') 

347F('abinit-out', 'ABINIT output file', '1F', 

348 module='abinit', magic=b'*.Version * of ABINIT') 

349F('aims', 'FHI-aims geometry file', '1S', ext='in') 

350F('aims-output', 'FHI-aims output', '+S', 

351 module='aims', magic=b'*Invoking FHI-aims ...') 

352F('bundletrajectory', 'ASE bundle trajectory', '+S') 

353F('castep-castep', 'CASTEP output file', '+F', 

354 module='castep', ext='castep') 

355F('castep-cell', 'CASTEP geom file', '1F', 

356 module='castep', ext='cell') 

357F('castep-geom', 'CASTEP trajectory file', '+F', 

358 module='castep', ext='geom') 

359F('castep-md', 'CASTEP molecular dynamics file', '+F', 

360 module='castep', ext='md') 

361F('castep-phonon', 'CASTEP phonon file', '1F', 

362 module='castep', ext='phonon') 

363F('cfg', 'AtomEye configuration', '1F') 

364F('cif', 'CIF-file', '+B', ext='cif') 

365F('cmdft', 'CMDFT-file', '1F', glob='*I_info') 

366F('cml', 'Chemical json file', '1F', ext='cml') 

367F('cp2k-dcd', 'CP2K DCD file', '+B', 

368 module='cp2k', ext='dcd') 

369F('cp2k-restart', 'CP2K restart file', '1F', 

370 module='cp2k', ext='restart') 

371F('crystal', 'Crystal fort.34 format', '1F', 

372 ext=['f34', '34'], glob=['f34', '34']) 

373F('cube', 'CUBE file', '1F', ext='cube') 

374F('dacapo-text', 'Dacapo text output', '1F', 

375 module='dacapo', magic=b'*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n') 

376F('db', 'ASE SQLite database file', '+S') 

377F('dftb', 'DftbPlus input file', '1S', magic=b'Geometry') 

378F('dlp4', 'DL_POLY_4 CONFIG file', '1F', 

379 module='dlp4', ext='config', glob=['*CONFIG*']) 

380F('dlp-history', 'DL_POLY HISTORY file', '+F', 

381 module='dlp4', glob='HISTORY') 

382F('dmol-arc', 'DMol3 arc file', '+S', 

383 module='dmol', ext='arc') 

384F('dmol-car', 'DMol3 structure file', '1S', 

385 module='dmol', ext='car') 

386F('dmol-incoor', 'DMol3 structure file', '1S', 

387 module='dmol') 

388F('elk', 'ELK atoms definition from GEOMETRY.OUT', '1F', 

389 glob=['GEOMETRY.OUT']) 

390F('elk-in', 'ELK input file', '1F', module='elk') 

391F('eon', 'EON CON file', '+F', 

392 ext='con') 

393F('eps', 'Encapsulated Postscript', '1S') 

394F('espresso-in', 'Quantum espresso in file', '1F', 

395 module='espresso', ext='pwi', magic=[b'*\n&system', b'*\n&SYSTEM']) 

396F('espresso-out', 'Quantum espresso out file', '+F', 

397 module='espresso', ext=['pwo', 'out'], magic=b'*Program PWSCF') 

398F('exciting', 'exciting input', '1F', glob='input.xml') 

399F('extxyz', 'Extended XYZ file', '+F', ext='xyz') 

400F('findsym', 'FINDSYM-format', '+F') 

401F('gamess-us-out', 'GAMESS-US output file', '1F', 

402 module='gamess_us', magic=b'*GAMESS') 

403F('gamess-us-in', 'GAMESS-US input file', '1F', 

404 module='gamess_us') 

405F('gamess-us-punch', 'GAMESS-US punchcard file', '1F', 

406 module='gamess_us', magic=b' $DATA', ext='dat') 

407F('gaussian-in', 'Gaussian com (input) file', '1F', 

408 module='gaussian', ext=['com', 'gjf']) 

409F('gaussian-out', 'Gaussian output file', '+F', 

410 module='gaussian', ext='log', magic=b'*Entering Gaussian System') 

411F('acemolecule-out', 'ACE output file', '1S', 

412 module='acemolecule') 

413F('acemolecule-input', 'ACE input file', '1S', 

414 module='acemolecule') 

415F('gen', 'DFTBPlus GEN format', '1F') 

416F('gif', 'Graphics interchange format', '+S', 

417 module='animation') 

418F('gpaw-out', 'GPAW text output', '+F', 

419 magic=b'* ___ ___ ___ _ _ _') 

420F('gpumd', 'GPUMD input file', '1F', glob='xyz.in') 

421F('gpw', 'GPAW restart-file', '1S', 

422 magic=[b'- of UlmGPAW', b'AFFormatGPAW']) 

423F('gromacs', 'Gromacs coordinates', '1F', 

424 ext='gro') 

425F('gromos', 'Gromos96 geometry file', '1F', ext='g96') 

426F('html', 'X3DOM HTML', '1F', module='x3d') 

427F('json', 'ASE JSON database file', '+F', ext='json', module='db') 

428F('jsv', 'JSV file format', '1F') 

429F('lammps-dump-text', 'LAMMPS text dump file', '+F', 

430 module='lammpsrun', magic_regex=b'.*?^ITEM: TIMESTEP$') 

431F('lammps-dump-binary', 'LAMMPS binary dump file', '+B', 

432 module='lammpsrun') 

433F('lammps-data', 'LAMMPS data file', '1F', module='lammpsdata', 

434 encoding='ascii') 

435F('magres', 'MAGRES ab initio NMR data file', '1F') 

436F('mol', 'MDL Molfile', '1F') 

437F('mp4', 'MP4 animation', '+S', 

438 module='animation') 

439F('mustem', 'muSTEM xtl file', '1F', 

440 ext='xtl') 

441F('mysql', 'ASE MySQL database file', '+S', 

442 module='db') 

443F('netcdftrajectory', 'AMBER NetCDF trajectory file', '+S', 

444 magic=b'CDF') 

445F('nomad-json', 'JSON from Nomad archive', '+F', 

446 ext='nomad-json') 

447F('nwchem-in', 'NWChem input file', '1F', 

448 module='nwchem', ext='nwi') 

449F('nwchem-out', 'NWChem output file', '+F', 

450 module='nwchem', ext='nwo', 

451 magic=b'*Northwest Computational Chemistry Package') 

452F('octopus-in', 'Octopus input file', '1F', 

453 module='octopus', glob='inp') 

454F('proteindatabank', 'Protein Data Bank', '+F', 

455 ext='pdb') 

456F('png', 'Portable Network Graphics', '1B') 

457F('postgresql', 'ASE PostgreSQL database file', '+S', module='db') 

458F('pov', 'Persistance of Vision', '1S') 

459# prismatic: Should have ext='xyz' if/when multiple formats can have the same 

460# extension 

461F('prismatic', 'prismatic and computem XYZ-file', '1F') 

462F('py', 'Python file', '+F') 

463F('sys', 'qball sys file', '1F') 

464F('qbox', 'QBOX output file', '+F', 

465 magic=b'*:simulation xmlns:') 

466F('res', 'SHELX format', '1S', ext='shelx') 

467F('rmc6f', 'RMCProfile', '1S', ext='rmc6f') 

468F('sdf', 'SDF format', '1F') 

469F('siesta-xv', 'Siesta .XV file', '1F', 

470 glob='*.XV', module='siesta') 

471F('struct', 'WIEN2k structure file', '1S', module='wien2k') 

472F('struct_out', 'SIESTA STRUCT file', '1F', module='siesta') 

473F('traj', 'ASE trajectory', '+B', module='trajectory', ext='traj', 

474 magic=[b'- of UlmASE-Trajectory', b'AFFormatASE-Trajectory']) 

475F('turbomole', 'TURBOMOLE coord file', '1F', glob='coord', 

476 magic=b'$coord') 

477F('turbomole-gradient', 'TURBOMOLE gradient file', '+F', 

478 module='turbomole', glob='gradient', magic=b'$grad') 

479F('v-sim', 'V_Sim ascii file', '1F', ext='ascii') 

480F('vasp', 'VASP POSCAR/CONTCAR', '1F', 

481 ext='poscar', glob=['*POSCAR*', '*CONTCAR*', '*CENTCAR*']) 

482F('vasp-out', 'VASP OUTCAR file', '+F', 

483 module='vasp', glob='*OUTCAR*') 

484F('vasp-xdatcar', 'VASP XDATCAR file', '+F', 

485 module='vasp', glob='*XDATCAR*') 

486F('vasp-xml', 'VASP vasprun.xml file', '+F', 

487 module='vasp', glob='*vasp*.xml') 

488F('vti', 'VTK XML Image Data', '1F', module='vtkxml') 

489F('vtu', 'VTK XML Unstructured Grid', '1F', module='vtkxml', ext='vtu') 

490F('wout', 'Wannier90 output', '1F', module='wannier90') 

491F('x3d', 'X3D', '1S') 

492F('xsd', 'Materials Studio file', '1F') 

493F('xsf', 'XCrySDen Structure File', '+F', 

494 magic=[b'*\nANIMSTEPS', b'*\nCRYSTAL', b'*\nSLAB', b'*\nPOLYMER', 

495 b'*\nMOLECULE', b'*\nATOMS']) 

496F('xtd', 'Materials Studio file', '+F') 

497# xyz: No `ext='xyz'` in the definition below. 

498# The .xyz files are handled by the extxyz module by default. 

499F('xyz', 'XYZ-file', '+F') 

500 

501# Register IO formats exposed through the ase.ioformats entry point 

502register_external_io_formats('ase.ioformats') 

503 

504 

505def get_compression(filename: str) -> Tuple[str, Optional[str]]: 

506 """ 

507 Parse any expected file compression from the extension of a filename. 

508 Return the filename without the extension, and the extension. Recognises 

509 ``.gz``, ``.bz2``, ``.xz``. 

510 

511 >>> get_compression('H2O.pdb.gz') 

512 ('H2O.pdb', 'gz') 

513 >>> get_compression('crystal.cif') 

514 ('crystal.cif', None) 

515 

516 Parameters 

517 ========== 

518 filename: str 

519 Full filename including extension. 

520 

521 Returns 

522 ======= 

523 (root, extension): (str, str or None) 

524 Filename split into root without extension, and the extension 

525 indicating compression format. Will not split if compression 

526 is not recognised. 

527 """ 

528 # Update if anything is added 

529 valid_compression = ['gz', 'bz2', 'xz'] 

530 

531 # Use stdlib as it handles most edge cases 

532 root, compression = os.path.splitext(filename) 

533 

534 # extension keeps the '.' so remember to remove it 

535 if compression.strip('.') in valid_compression: 

536 return root, compression.strip('.') 

537 else: 

538 return filename, None 

539 

540 

541def open_with_compression(filename: str, mode: str = 'r') -> IO: 

542 """ 

543 Wrapper around builtin `open` that will guess compression of a file 

544 from the filename and open it for reading or writing as if it were 

545 a standard file. 

546 

547 Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma). 

548 

549 Supported modes are: 

550 * 'r', 'rt', 'w', 'wt' for text mode read and write. 

551 * 'rb, 'wb' for binary read and write. 

552 

553 Parameters 

554 ========== 

555 filename: str 

556 Path to the file to open, including any extensions that indicate 

557 the compression used. 

558 mode: str 

559 Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'. 

560 

561 Returns 

562 ======= 

563 fd: file 

564 File-like object open with the specified mode. 

565 """ 

566 

567 # Compressed formats sometimes default to binary, so force text mode. 

568 if mode == 'r': 

569 mode = 'rt' 

570 elif mode == 'w': 

571 mode = 'wt' 

572 elif mode == 'a': 

573 mode = 'at' 

574 

575 root, compression = get_compression(filename) 

576 

577 if compression == 'gz': 

578 import gzip 

579 return gzip.open(filename, mode=mode) # type: ignore 

580 elif compression == 'bz2': 

581 import bz2 

582 return bz2.open(filename, mode=mode) # type: ignore 

583 elif compression == 'xz': 

584 import lzma 

585 return lzma.open(filename, mode) 

586 else: 

587 # Either None or unknown string 

588 return open(filename, mode) 

589 

590 

591def wrap_read_function(read, filename, index=None, **kwargs): 

592 """Convert read-function to generator.""" 

593 if index is None: 

594 yield read(filename, **kwargs) 

595 else: 

596 for atoms in read(filename, index, **kwargs): 

597 yield atoms 

598 

599 

600NameOrFile = Union[str, PurePath, IO] 

601 

602 

603def write( 

604 filename: NameOrFile, 

605 images: Union[Atoms, Sequence[Atoms]], 

606 format: str = None, 

607 parallel: bool = True, 

608 append: bool = False, 

609 **kwargs: Any 

610) -> None: 

611 """Write Atoms object(s) to file. 

612 

613 filename: str or file 

614 Name of the file to write to or a file descriptor. The name '-' 

615 means standard output. 

616 images: Atoms object or list of Atoms objects 

617 A single Atoms object or a list of Atoms objects. 

618 format: str 

619 Used to specify the file-format. If not given, the 

620 file-format will be taken from suffix of the filename. 

621 parallel: bool 

622 Default is to write on master only. Use parallel=False to write 

623 from all slaves. 

624 append: bool 

625 Default is to open files in 'w' or 'wb' mode, overwriting 

626 existing files. In some cases opening the file in 'a' or 'ab' 

627 mode (appending) is useful, 

628 e.g. writing trajectories or saving multiple Atoms objects in one file. 

629 WARNING: If the file format does not support multiple entries without 

630 additional keywords/headers, files created using 'append=True' 

631 might not be readable by any program! They will nevertheless be 

632 written without error message. 

633 

634 The use of additional keywords is format specific. write() may 

635 return an object after writing certain formats, but this behaviour 

636 may change in the future. 

637 

638 """ 

639 

640 if isinstance(filename, PurePath): 

641 filename = str(filename) 

642 

643 if isinstance(filename, str): 

644 fd = None 

645 if filename == '-': 

646 fd = sys.stdout 

647 filename = None # type: ignore 

648 elif format is None: 

649 format = filetype(filename, read=False) 

650 assert isinstance(format, str) 

651 else: 

652 fd = filename # type: ignore 

653 if format is None: 

654 try: 

655 format = filetype(filename, read=False) 

656 assert isinstance(format, str) 

657 except UnknownFileTypeError: 

658 format = None 

659 filename = None # type: ignore 

660 

661 format = format or 'json' # default is json 

662 

663 io = get_ioformat(format) 

664 

665 return _write(filename, fd, format, io, images, 

666 parallel=parallel, append=append, **kwargs) 

667 

668 

669@parallel_function 

670def _write(filename, fd, format, io, images, parallel=None, append=False, 

671 **kwargs): 

672 if isinstance(images, Atoms): 

673 images = [images] 

674 

675 if io.single: 

676 if len(images) > 1: 

677 raise ValueError('{}-format can only store 1 Atoms object.' 

678 .format(format)) 

679 images = images[0] 

680 

681 if not io.can_write: 

682 raise ValueError("Can't write to {}-format".format(format)) 

683 

684 # Special case for json-format: 

685 if format == 'json' and (len(images) > 1 or append): 

686 if filename is not None: 

687 return io.write(filename, images, append=append, **kwargs) 

688 raise ValueError("Can't write more than one image to file-descriptor " 

689 'using json-format.') 

690 

691 if io.acceptsfd: 

692 open_new = (fd is None) 

693 try: 

694 if open_new: 

695 mode = 'wb' if io.isbinary else 'w' 

696 if append: 

697 mode = mode.replace('w', 'a') 

698 fd = open_with_compression(filename, mode) 

699 # XXX remember to re-enable compressed open 

700 # fd = io.open(filename, mode) 

701 return io.write(fd, images, **kwargs) 

702 finally: 

703 if open_new and fd is not None: 

704 fd.close() 

705 else: 

706 if fd is not None: 

707 raise ValueError("Can't write {}-format to file-descriptor" 

708 .format(format)) 

709 if io.can_append: 

710 return io.write(filename, images, append=append, **kwargs) 

711 elif append: 

712 raise ValueError("Cannot append to {}-format, write-function " 

713 "does not support the append keyword." 

714 .format(format)) 

715 else: 

716 return io.write(filename, images, **kwargs) 

717 

718 

719def read( 

720 filename: NameOrFile, 

721 index: Any = None, 

722 format: str = None, 

723 parallel: bool = True, 

724 do_not_split_by_at_sign: bool = False, 

725 **kwargs 

726) -> Union[Atoms, List[Atoms]]: 

727 """Read Atoms object(s) from file. 

728 

729 filename: str or file 

730 Name of the file to read from or a file descriptor. 

731 index: int, slice or str 

732 The last configuration will be returned by default. Examples: 

733 

734 * ``index=0``: first configuration 

735 * ``index=-2``: second to last 

736 * ``index=':'`` or ``index=slice(None)``: all 

737 * ``index='-3:'`` or ``index=slice(-3, None)``: three last 

738 * ``index='::2'`` or ``index=slice(0, None, 2)``: even 

739 * ``index='1::2'`` or ``index=slice(1, None, 2)``: odd 

740 format: str 

741 Used to specify the file-format. If not given, the 

742 file-format will be guessed by the *filetype* function. 

743 parallel: bool 

744 Default is to read on master and broadcast to slaves. Use 

745 parallel=False to read on all slaves. 

746 do_not_split_by_at_sign: bool 

747 If False (default) ``filename`` is splited by at sign ``@`` 

748 

749 Many formats allow on open file-like object to be passed instead 

750 of ``filename``. In this case the format cannot be auto-decected, 

751 so the ``format`` argument should be explicitly given.""" 

752 

753 if isinstance(filename, PurePath): 

754 filename = str(filename) 

755 if filename == '-': 

756 filename = sys.stdin 

757 if isinstance(index, str): 

758 try: 

759 index = string2index(index) 

760 except ValueError: 

761 pass 

762 

763 filename, index = parse_filename(filename, index, do_not_split_by_at_sign) 

764 if index is None: 

765 index = -1 

766 format = format or filetype(filename, read=isinstance(filename, str)) 

767 

768 io = get_ioformat(format) 

769 if isinstance(index, (slice, str)): 

770 return list(_iread(filename, index, format, io, parallel=parallel, 

771 **kwargs)) 

772 else: 

773 return next(_iread(filename, slice(index, None), format, io, 

774 parallel=parallel, **kwargs)) 

775 

776 

777def iread( 

778 filename: NameOrFile, 

779 index: Any = None, 

780 format: str = None, 

781 parallel: bool = True, 

782 do_not_split_by_at_sign: bool = False, 

783 **kwargs 

784) -> Iterable[Atoms]: 

785 """Iterator for reading Atoms objects from file. 

786 

787 Works as the `read` function, but yields one Atoms object at a time 

788 instead of all at once.""" 

789 

790 if isinstance(filename, PurePath): 

791 filename = str(filename) 

792 

793 if isinstance(index, str): 

794 index = string2index(index) 

795 

796 filename, index = parse_filename(filename, index, do_not_split_by_at_sign) 

797 

798 if index is None or index == ':': 

799 index = slice(None, None, None) 

800 

801 if not isinstance(index, (slice, str)): 

802 index = slice(index, (index + 1) or None) 

803 

804 format = format or filetype(filename, read=isinstance(filename, str)) 

805 io = get_ioformat(format) 

806 

807 for atoms in _iread(filename, index, format, io, parallel=parallel, 

808 **kwargs): 

809 yield atoms 

810 

811 

812@parallel_generator 

813def _iread(filename, index, format, io, parallel=None, full_output=False, 

814 **kwargs): 

815 

816 if not io.can_read: 

817 raise ValueError("Can't read from {}-format".format(format)) 

818 

819 if io.single: 

820 start = index.start 

821 assert start is None or start == 0 or start == -1 

822 args = () 

823 else: 

824 args = (index,) 

825 

826 must_close_fd = False 

827 if isinstance(filename, str): 

828 if io.acceptsfd: 

829 mode = 'rb' if io.isbinary else 'r' 

830 fd = open_with_compression(filename, mode) 

831 must_close_fd = True 

832 else: 

833 fd = filename 

834 else: 

835 assert io.acceptsfd 

836 fd = filename 

837 

838 # Make sure fd is closed in case loop doesn't finish: 

839 try: 

840 for dct in io.read(fd, *args, **kwargs): 

841 if not isinstance(dct, dict): 

842 dct = {'atoms': dct} 

843 if full_output: 

844 yield dct 

845 else: 

846 yield dct['atoms'] 

847 finally: 

848 if must_close_fd: 

849 fd.close() 

850 

851 

852def parse_filename(filename, index=None, do_not_split_by_at_sign=False): 

853 if not isinstance(filename, str): 

854 return filename, index 

855 

856 basename = os.path.basename(filename) 

857 if do_not_split_by_at_sign or '@' not in basename: 

858 return filename, index 

859 

860 newindex = None 

861 newfilename, newindex = filename.rsplit('@', 1) 

862 

863 if isinstance(index, slice): 

864 return newfilename, index 

865 try: 

866 newindex = string2index(newindex) 

867 except ValueError: 

868 warnings.warn('Can not parse index for path \n' 

869 ' "%s" \nConsider set ' 

870 'do_not_split_by_at_sign=True \nif ' 

871 'there is no index.' % filename) 

872 return newfilename, newindex 

873 

874 

875def match_magic(data: bytes) -> IOFormat: 

876 data = data[:PEEK_BYTES] 

877 for ioformat in ioformats.values(): 

878 if ioformat.match_magic(data): 

879 return ioformat 

880 raise UnknownFileTypeError('Cannot guess file type from contents') 

881 

882 

883def string2index(string: str) -> Union[int, slice, str]: 

884 """Convert index string to either int or slice""" 

885 if ':' not in string: 

886 # may contain database accessor 

887 try: 

888 return int(string) 

889 except ValueError: 

890 return string 

891 i: List[Optional[int]] = [] 

892 for s in string.split(':'): 

893 if s == '': 

894 i.append(None) 

895 else: 

896 i.append(int(s)) 

897 i += (3 - len(i)) * [None] 

898 return slice(*i) 

899 

900 

901def filetype( 

902 filename: NameOrFile, 

903 read: bool = True, 

904 guess: bool = True, 

905) -> str: 

906 """Try to guess the type of the file. 

907 

908 First, special signatures in the filename will be checked for. If that 

909 does not identify the file type, then the first 2000 bytes of the file 

910 will be read and analysed. Turn off this second part by using 

911 read=False. 

912 

913 Can be used from the command-line also:: 

914 

915 $ ase info filename ... 

916 """ 

917 

918 orig_filename = filename 

919 if hasattr(filename, 'name'): 

920 filename = filename.name # type: ignore 

921 

922 ext = None 

923 if isinstance(filename, str): 

924 if os.path.isdir(filename): 

925 if os.path.basename(os.path.normpath(filename)) == 'states': 

926 return 'eon' 

927 return 'bundletrajectory' 

928 

929 if filename.startswith('postgres'): 

930 return 'postgresql' 

931 

932 if filename.startswith('mysql') or filename.startswith('mariadb'): 

933 return 'mysql' 

934 

935 # strip any compression extensions that can be read 

936 root, compression = get_compression(filename) 

937 basename = os.path.basename(root) 

938 

939 if '.' in basename: 

940 ext = os.path.splitext(basename)[1].strip('.').lower() 

941 

942 for fmt in ioformats.values(): 

943 if fmt.match_name(basename): 

944 return fmt.name 

945 

946 if not read: 

947 if ext is None: 

948 raise UnknownFileTypeError('Could not guess file type') 

949 ioformat = extension2format.get(ext) 

950 if ioformat: 

951 return ioformat.name 

952 

953 # askhl: This is strange, we don't know if ext is a format: 

954 return ext 

955 

956 if orig_filename == filename: 

957 fd = open_with_compression(filename, 'rb') 

958 else: 

959 fd = orig_filename # type: ignore 

960 else: 

961 fd = filename # type: ignore 

962 if fd is sys.stdin: 

963 return 'json' 

964 

965 data = fd.read(PEEK_BYTES) 

966 if fd is not filename: 

967 fd.close() 

968 else: 

969 fd.seek(0) 

970 

971 if len(data) == 0: 

972 raise UnknownFileTypeError('Empty file: ' + filename) # type: ignore 

973 

974 try: 

975 return match_magic(data).name 

976 except UnknownFileTypeError: 

977 pass 

978 

979 format = None 

980 if ext in extension2format: 

981 format = extension2format[ext].name 

982 

983 if format is None and guess: 

984 format = ext 

985 if format is None: 

986 # Do quick xyz check: 

987 lines = data.splitlines() 

988 if lines and lines[0].strip().isdigit(): 

989 return extension2format['xyz'].name 

990 

991 raise UnknownFileTypeError('Could not guess file type') 

992 assert isinstance(format, str) 

993 return format 

994 

995 

996def index2range(index, length): 

997 """Convert slice or integer to range. 

998 

999 If index is an integer, range will contain only that integer.""" 

1000 obj = range(length)[index] 

1001 if isinstance(obj, numbers.Integral): 

1002 obj = range(obj, obj + 1) 

1003 return obj