Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""File formats. 

2 

3This module implements the read(), iread() and write() functions in ase.io. 

4For each file format there is an IOFormat object. 

5 

6There is a dict, ioformats, which stores the objects. 

7 

8Example 

9======= 

10 

11The xyz format is implemented in the ase/io/xyz.py file which has a 

12read_xyz() generator and a write_xyz() function. This and other 

13information can be obtained from ioformats['xyz']. 

14""" 

15 

16import io 

17import re 

18import functools 

19import inspect 

20import os 

21import sys 

22import numbers 

23import warnings 

24from pathlib import Path, PurePath 

25from typing import ( 

26 IO, List, Any, Iterable, Tuple, Union, Sequence, Dict, Optional) 

27 

28if sys.version_info >= (3, 8): 

29 from importlib.metadata import entry_points 

30else: 

31 from importlib_metadata import entry_points 

32 

33from ase.atoms import Atoms 

34from ase.utils.plugins import ExternalIOFormat 

35from importlib import import_module 

36from ase.parallel import parallel_function, parallel_generator 

37 

38 

39PEEK_BYTES = 50000 

40 

41 

42class UnknownFileTypeError(Exception): 

43 pass 

44 

45 

46class IOFormat: 

47 def __init__(self, name: str, desc: str, code: str, module_name: str, 

48 encoding: str = None) -> None: 

49 self.name = name 

50 self.description = desc 

51 assert len(code) == 2 

52 assert code[0] in list('+1') 

53 assert code[1] in list('BFS') 

54 self.code = code 

55 self.module_name = module_name 

56 self.encoding = encoding 

57 

58 # (To be set by define_io_format()) 

59 self.extensions: List[str] = [] 

60 self.globs: List[str] = [] 

61 self.magic: List[str] = [] 

62 self.magic_regex: Optional[bytes] = None 

63 

64 def open(self, fname, mode: str = 'r') -> IO: 

65 # We might want append mode, too 

66 # We can allow more flags as needed (buffering etc.) 

67 if mode not in list('rwa'): 

68 raise ValueError("Only modes allowed are 'r', 'w', and 'a'") 

69 if mode == 'r' and not self.can_read: 

70 raise NotImplementedError('No reader implemented for {} format' 

71 .format(self.name)) 

72 if mode == 'w' and not self.can_write: 

73 raise NotImplementedError('No writer implemented for {} format' 

74 .format(self.name)) 

75 if mode == 'a' and not self.can_append: 

76 raise NotImplementedError('Appending not supported by {} format' 

77 .format(self.name)) 

78 

79 if self.isbinary: 

80 mode += 'b' 

81 

82 path = Path(fname) 

83 return path.open(mode, encoding=self.encoding) 

84 

85 def _buf_as_filelike(self, data: Union[str, bytes]) -> IO: 

86 encoding = self.encoding 

87 if encoding is None: 

88 encoding = 'utf-8' # Best hacky guess. 

89 

90 if self.isbinary: 

91 if isinstance(data, str): 

92 data = data.encode(encoding) 

93 else: 

94 if isinstance(data, bytes): 

95 data = data.decode(encoding) 

96 

97 return self._ioclass(data) 

98 

99 @property 

100 def _ioclass(self): 

101 if self.isbinary: 

102 return io.BytesIO 

103 else: 

104 return io.StringIO 

105 

106 def parse_images(self, data: Union[str, bytes], 

107 **kwargs) -> Sequence[Atoms]: 

108 with self._buf_as_filelike(data) as fd: 

109 outputs = self.read(fd, **kwargs) 

110 if self.single: 

111 assert isinstance(outputs, Atoms) 

112 return [outputs] 

113 else: 

114 return list(self.read(fd, **kwargs)) 

115 

116 def parse_atoms(self, data: Union[str, bytes], **kwargs) -> Atoms: 

117 images = self.parse_images(data, **kwargs) 

118 return images[-1] 

119 

120 @property 

121 def can_read(self) -> bool: 

122 return self._readfunc() is not None 

123 

124 @property 

125 def can_write(self) -> bool: 

126 return self._writefunc() is not None 

127 

128 @property 

129 def can_append(self) -> bool: 

130 writefunc = self._writefunc() 

131 return self.can_write and 'append' in writefunc.__code__.co_varnames 

132 

133 def __repr__(self) -> str: 

134 tokens = ['{}={}'.format(name, repr(value)) 

135 for name, value in vars(self).items()] 

136 return 'IOFormat({})'.format(', '.join(tokens)) 

137 

138 def __getitem__(self, i): 

139 # For compatibility. 

140 # 

141 # Historically, the ioformats were listed as tuples 

142 # with (description, code). We look like such a tuple. 

143 return (self.description, self.code)[i] 

144 

145 @property 

146 def single(self) -> bool: 

147 """Whether this format is for a single Atoms object.""" 

148 return self.code[0] == '1' 

149 

150 @property 

151 def _formatname(self) -> str: 

152 return self.name.replace('-', '_') 

153 

154 def _readfunc(self): 

155 return getattr(self.module, 'read_' + self._formatname, None) 

156 

157 def _writefunc(self): 

158 return getattr(self.module, 'write_' + self._formatname, None) 

159 

160 @property 

161 def read(self): 

162 if not self.can_read: 

163 self._warn_none('read') 

164 return None 

165 

166 return self._read_wrapper 

167 

168 def _read_wrapper(self, *args, **kwargs): 

169 function = self._readfunc() 

170 if function is None: 

171 self._warn_none('read') 

172 return None 

173 if not inspect.isgeneratorfunction(function): 

174 function = functools.partial(wrap_read_function, function) 

175 return function(*args, **kwargs) 

176 

177 def _warn_none(self, action): 

178 msg = ('Accessing the IOFormat.{action} property on a format ' 

179 'without {action} support will change behaviour in the ' 

180 'future and return a callable instead of None. ' 

181 'Use IOFormat.can_{action} to check whether {action} ' 

182 'is supported.') 

183 warnings.warn(msg.format(action=action), FutureWarning) 

184 

185 @property 

186 def write(self): 

187 if not self.can_write: 

188 self._warn_none('write') 

189 return None 

190 

191 return self._write_wrapper 

192 

193 def _write_wrapper(self, *args, **kwargs): 

194 function = self._writefunc() 

195 if function is None: 

196 raise ValueError(f'Cannot write to {self.name}-format') 

197 return function(*args, **kwargs) 

198 

199 @property 

200 def modes(self) -> str: 

201 modes = '' 

202 if self.can_read: 

203 modes += 'r' 

204 if self.can_write: 

205 modes += 'w' 

206 return modes 

207 

208 def full_description(self) -> str: 

209 lines = [f'Name: {self.name}', 

210 f'Description: {self.description}', 

211 f'Modes: {self.modes}', 

212 f'Encoding: {self.encoding}', 

213 f'Module: {self.module_name}', 

214 f'Code: {self.code}', 

215 f'Extensions: {self.extensions}', 

216 f'Globs: {self.globs}', 

217 f'Magic: {self.magic}'] 

218 return '\n'.join(lines) 

219 

220 @property 

221 def acceptsfd(self) -> bool: 

222 return self.code[1] != 'S' 

223 

224 @property 

225 def isbinary(self) -> bool: 

226 return self.code[1] == 'B' 

227 

228 @property 

229 def module(self): 

230 try: 

231 return import_module(self.module_name) 

232 except ImportError as err: 

233 raise UnknownFileTypeError( 

234 f'File format not recognized: {self.name}. Error: {err}') 

235 

236 def match_name(self, basename: str) -> bool: 

237 from fnmatch import fnmatch 

238 return any(fnmatch(basename, pattern) 

239 for pattern in self.globs) 

240 

241 def match_magic(self, data: bytes) -> bool: 

242 if self.magic_regex: 

243 assert not self.magic, 'Define only one of magic and magic_regex' 

244 match = re.match(self.magic_regex, data, re.M | re.S) 

245 return match is not None 

246 

247 from fnmatch import fnmatchcase 

248 return any(fnmatchcase(data, magic + b'*') # type: ignore 

249 for magic in self.magic) 

250 

251 

252ioformats: Dict[str, IOFormat] = {} # These will be filled at run-time. 

253extension2format = {} 

254 

255 

256all_formats = ioformats # Aliased for compatibility only. Please do not use. 

257format2modulename = {} # Left for compatibility only. 

258 

259 

260def define_io_format(name, desc, code, *, module=None, ext=None, 

261 glob=None, magic=None, encoding=None, 

262 magic_regex=None, external=False): 

263 if module is None: 

264 module = name.replace('-', '_') 

265 format2modulename[name] = module 

266 

267 if not external: 

268 module = 'ase.io.' + module 

269 

270 def normalize_patterns(strings): 

271 if strings is None: 

272 strings = [] 

273 elif isinstance(strings, (str, bytes)): 

274 strings = [strings] 

275 else: 

276 strings = list(strings) 

277 return strings 

278 

279 fmt = IOFormat(name, desc, code, module_name=module, 

280 encoding=encoding) 

281 fmt.extensions = normalize_patterns(ext) 

282 fmt.globs = normalize_patterns(glob) 

283 fmt.magic = normalize_patterns(magic) 

284 

285 if magic_regex is not None: 

286 fmt.magic_regex = magic_regex 

287 

288 for ext in fmt.extensions: 

289 if ext in extension2format: 

290 raise ValueError('extension "{}" already registered'.format(ext)) 

291 extension2format[ext] = fmt 

292 

293 ioformats[name] = fmt 

294 return fmt 

295 

296 

297def get_ioformat(name: str) -> IOFormat: 

298 """Return ioformat object or raise appropriate error.""" 

299 if name not in ioformats: 

300 raise UnknownFileTypeError(name) 

301 fmt = ioformats[name] 

302 # Make sure module is importable, since this could also raise an error. 

303 fmt.module 

304 return ioformats[name] 

305 

306 

307def register_external_io_formats(group): 

308 if hasattr(entry_points(), 'select'): 

309 fmt_entry_points = entry_points().select(group=group) # type: ignore 

310 else: 

311 fmt_entry_points = entry_points().get(group, ()) 

312 

313 for entry_point in fmt_entry_points: 

314 try: 

315 define_external_io_format(entry_point) 

316 except Exception as exc: 

317 warnings.warn( 

318 'Failed to register external ' 

319 f'IO format {entry_point.name}: {exc}' 

320 ) 

321 

322 

323def define_external_io_format(entry_point): 

324 

325 fmt = entry_point.load() 

326 if entry_point.name in ioformats: 

327 raise ValueError(f'Format {entry_point.name} already defined') 

328 if not isinstance(fmt, ExternalIOFormat): 

329 raise TypeError('Wrong type for registering external IO formats ' 

330 f'in format {entry_point.name}, expected ' 

331 'ExternalIOFormat') 

332 F(entry_point.name, **fmt._asdict(), external=True) # type: ignore 

333 

334 

335# We define all the IO formats below. Each IO format has a code, 

336# such as '1F', which defines some of the format's properties: 

337# 

338# 1=single atoms object 

339# +=multiple atoms objects 

340# F=accepts a file-descriptor 

341# S=needs a file-name str 

342# B=like F, but opens in binary mode 

343 

344F = define_io_format 

345F('abinit-gsr', 'ABINIT GSR file', '1S', 

346 module='abinit', glob='*o_GSR.nc') 

347F('abinit-in', 'ABINIT input file', '1F', 

348 module='abinit', magic=b'*znucl *') 

349F('abinit-out', 'ABINIT output file', '1F', 

350 module='abinit', magic=b'*.Version * of ABINIT') 

351F('aims', 'FHI-aims geometry file', '1S', ext='in') 

352F('aims-output', 'FHI-aims output', '+S', 

353 module='aims', magic=b'*Invoking FHI-aims ...') 

354F('bundletrajectory', 'ASE bundle trajectory', '+S') 

355F('castep-castep', 'CASTEP output file', '+F', 

356 module='castep', ext='castep') 

357F('castep-cell', 'CASTEP geom file', '1F', 

358 module='castep', ext='cell') 

359F('castep-geom', 'CASTEP trajectory file', '+F', 

360 module='castep', ext='geom') 

361F('castep-md', 'CASTEP molecular dynamics file', '+F', 

362 module='castep', ext='md') 

363F('castep-phonon', 'CASTEP phonon file', '1F', 

364 module='castep', ext='phonon') 

365F('cfg', 'AtomEye configuration', '1F') 

366F('cif', 'CIF-file', '+B', ext='cif') 

367F('cmdft', 'CMDFT-file', '1F', glob='*I_info') 

368F('cjson', 'Chemical json file', '1F', ext='cjson') 

369F('cp2k-dcd', 'CP2K DCD file', '+B', 

370 module='cp2k', ext='dcd') 

371F('cp2k-restart', 'CP2K restart file', '1F', 

372 module='cp2k', ext='restart') 

373F('crystal', 'Crystal fort.34 format', '1F', 

374 ext=['f34', '34'], glob=['f34', '34']) 

375F('cube', 'CUBE file', '1F', ext='cube') 

376F('dacapo-text', 'Dacapo text output', '1F', 

377 module='dacapo', magic=b'*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n') 

378F('db', 'ASE SQLite database file', '+S') 

379F('dftb', 'DftbPlus input file', '1S', magic=b'Geometry') 

380F('dlp4', 'DL_POLY_4 CONFIG file', '1F', 

381 module='dlp4', ext='config', glob=['*CONFIG*']) 

382F('dlp-history', 'DL_POLY HISTORY file', '+F', 

383 module='dlp4', glob='HISTORY') 

384F('dmol-arc', 'DMol3 arc file', '+S', 

385 module='dmol', ext='arc') 

386F('dmol-car', 'DMol3 structure file', '1S', 

387 module='dmol', ext='car') 

388F('dmol-incoor', 'DMol3 structure file', '1S', 

389 module='dmol') 

390F('elk', 'ELK atoms definition from GEOMETRY.OUT', '1F', 

391 glob=['GEOMETRY.OUT']) 

392F('elk-in', 'ELK input file', '1F', module='elk') 

393F('eon', 'EON CON file', '+F', 

394 ext='con') 

395F('eps', 'Encapsulated Postscript', '1S') 

396F('espresso-in', 'Quantum espresso in file', '1F', 

397 module='espresso', ext='pwi', magic=[b'*\n&system', b'*\n&SYSTEM']) 

398F('espresso-out', 'Quantum espresso out file', '+F', 

399 module='espresso', ext=['pwo', 'out'], magic=b'*Program PWSCF') 

400F('exciting', 'exciting input', '1F', glob='input.xml') 

401F('extxyz', 'Extended XYZ file', '+F', ext='xyz') 

402F('findsym', 'FINDSYM-format', '+F') 

403F('gamess-us-out', 'GAMESS-US output file', '1F', 

404 module='gamess_us', magic=b'*GAMESS') 

405F('gamess-us-in', 'GAMESS-US input file', '1F', 

406 module='gamess_us') 

407F('gamess-us-punch', 'GAMESS-US punchcard file', '1F', 

408 module='gamess_us', magic=b' $DATA', ext='dat') 

409F('gaussian-in', 'Gaussian com (input) file', '1F', 

410 module='gaussian', ext=['com', 'gjf']) 

411F('gaussian-out', 'Gaussian output file', '+F', 

412 module='gaussian', ext='log', magic=b'*Entering Gaussian System') 

413F('acemolecule-out', 'ACE output file', '1S', 

414 module='acemolecule') 

415F('acemolecule-input', 'ACE input file', '1S', 

416 module='acemolecule') 

417F('gen', 'DFTBPlus GEN format', '1F') 

418F('gif', 'Graphics interchange format', '+S', 

419 module='animation') 

420F('gpaw-out', 'GPAW text output', '+F', 

421 magic=b'* ___ ___ ___ _ _ _') 

422F('gpumd', 'GPUMD input file', '1F', glob='xyz.in') 

423F('gpw', 'GPAW restart-file', '1S', 

424 magic=[b'- of UlmGPAW', b'AFFormatGPAW']) 

425F('gromacs', 'Gromacs coordinates', '1F', 

426 ext='gro') 

427F('gromos', 'Gromos96 geometry file', '1F', ext='g96') 

428F('html', 'X3DOM HTML', '1F', module='x3d') 

429F('json', 'ASE JSON database file', '+F', ext='json', module='db') 

430F('jsv', 'JSV file format', '1F') 

431F('lammps-dump-text', 'LAMMPS text dump file', '+F', 

432 module='lammpsrun', magic_regex=b'.*?^ITEM: TIMESTEP$') 

433F('lammps-dump-binary', 'LAMMPS binary dump file', '+B', 

434 module='lammpsrun') 

435F('lammps-data', 'LAMMPS data file', '1F', module='lammpsdata', 

436 encoding='ascii') 

437F('magres', 'MAGRES ab initio NMR data file', '1F') 

438F('mol', 'MDL Molfile', '1F') 

439F('mp4', 'MP4 animation', '+S', 

440 module='animation') 

441F('mustem', 'muSTEM xtl file', '1F', 

442 ext='xtl') 

443F('mysql', 'ASE MySQL database file', '+S', 

444 module='db') 

445F('netcdftrajectory', 'AMBER NetCDF trajectory file', '+S', 

446 magic=b'CDF') 

447F('nomad-json', 'JSON from Nomad archive', '+F', 

448 ext='nomad-json') 

449F('nwchem-in', 'NWChem input file', '1F', 

450 module='nwchem', ext='nwi') 

451F('nwchem-out', 'NWChem output file', '+F', 

452 module='nwchem', ext='nwo', 

453 magic=b'*Northwest Computational Chemistry Package') 

454F('octopus-in', 'Octopus input file', '1F', 

455 module='octopus', glob='inp') 

456F('proteindatabank', 'Protein Data Bank', '+F', 

457 ext='pdb') 

458F('png', 'Portable Network Graphics', '1B') 

459F('postgresql', 'ASE PostgreSQL database file', '+S', module='db') 

460F('pov', 'Persistance of Vision', '1S') 

461# prismatic: Should have ext='xyz' if/when multiple formats can have the same 

462# extension 

463F('prismatic', 'prismatic and computem XYZ-file', '1F') 

464F('py', 'Python file', '+F') 

465F('sys', 'qball sys file', '1F') 

466F('qbox', 'QBOX output file', '+F', 

467 magic=b'*:simulation xmlns:') 

468F('res', 'SHELX format', '1S', ext='shelx') 

469F('rmc6f', 'RMCProfile', '1S', ext='rmc6f') 

470F('sdf', 'SDF format', '1F') 

471F('siesta-xv', 'Siesta .XV file', '1F', 

472 glob='*.XV', module='siesta') 

473F('struct', 'WIEN2k structure file', '1S', module='wien2k') 

474F('struct_out', 'SIESTA STRUCT file', '1F', module='siesta') 

475F('traj', 'ASE trajectory', '+B', module='trajectory', ext='traj', 

476 magic=[b'- of UlmASE-Trajectory', b'AFFormatASE-Trajectory']) 

477F('turbomole', 'TURBOMOLE coord file', '1F', glob='coord', 

478 magic=b'$coord') 

479F('turbomole-gradient', 'TURBOMOLE gradient file', '+F', 

480 module='turbomole', glob='gradient', magic=b'$grad') 

481F('v-sim', 'V_Sim ascii file', '1F', ext='ascii') 

482F('vasp', 'VASP POSCAR/CONTCAR', '1F', 

483 ext='poscar', glob=['*POSCAR*', '*CONTCAR*', '*CENTCAR*']) 

484F('vasp-out', 'VASP OUTCAR file', '+F', 

485 module='vasp', glob='*OUTCAR*') 

486F('vasp-xdatcar', 'VASP XDATCAR file', '+F', 

487 module='vasp', glob='*XDATCAR*') 

488F('vasp-xml', 'VASP vasprun.xml file', '+F', 

489 module='vasp', glob='*vasp*.xml') 

490F('vti', 'VTK XML Image Data', '1F', module='vtkxml') 

491F('vtu', 'VTK XML Unstructured Grid', '1F', module='vtkxml', ext='vtu') 

492F('wout', 'Wannier90 output', '1F', module='wannier90') 

493F('x3d', 'X3D', '1S') 

494F('xsd', 'Materials Studio file', '1F') 

495F('xsf', 'XCrySDen Structure File', '+F', 

496 magic=[b'*\nANIMSTEPS', b'*\nCRYSTAL', b'*\nSLAB', b'*\nPOLYMER', 

497 b'*\nMOLECULE', b'*\nATOMS']) 

498F('xtd', 'Materials Studio file', '+F') 

499# xyz: No `ext='xyz'` in the definition below. 

500# The .xyz files are handled by the extxyz module by default. 

501F('xyz', 'XYZ-file', '+F') 

502 

503# Register IO formats exposed through the ase.ioformats entry point 

504register_external_io_formats('ase.ioformats') 

505 

506 

507def get_compression(filename: str) -> Tuple[str, Optional[str]]: 

508 """ 

509 Parse any expected file compression from the extension of a filename. 

510 Return the filename without the extension, and the extension. Recognises 

511 ``.gz``, ``.bz2``, ``.xz``. 

512 

513 >>> get_compression('H2O.pdb.gz') 

514 ('H2O.pdb', 'gz') 

515 >>> get_compression('crystal.cif') 

516 ('crystal.cif', None) 

517 

518 Parameters 

519 ========== 

520 filename: str 

521 Full filename including extension. 

522 

523 Returns 

524 ======= 

525 (root, extension): (str, str or None) 

526 Filename split into root without extension, and the extension 

527 indicating compression format. Will not split if compression 

528 is not recognised. 

529 """ 

530 # Update if anything is added 

531 valid_compression = ['gz', 'bz2', 'xz'] 

532 

533 # Use stdlib as it handles most edge cases 

534 root, compression = os.path.splitext(filename) 

535 

536 # extension keeps the '.' so remember to remove it 

537 if compression.strip('.') in valid_compression: 

538 return root, compression.strip('.') 

539 else: 

540 return filename, None 

541 

542 

543def open_with_compression(filename: str, mode: str = 'r') -> IO: 

544 """ 

545 Wrapper around builtin `open` that will guess compression of a file 

546 from the filename and open it for reading or writing as if it were 

547 a standard file. 

548 

549 Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma). 

550 

551 Supported modes are: 

552 * 'r', 'rt', 'w', 'wt' for text mode read and write. 

553 * 'rb, 'wb' for binary read and write. 

554 

555 Parameters 

556 ========== 

557 filename: str 

558 Path to the file to open, including any extensions that indicate 

559 the compression used. 

560 mode: str 

561 Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'. 

562 

563 Returns 

564 ======= 

565 fd: file 

566 File-like object open with the specified mode. 

567 """ 

568 

569 # Compressed formats sometimes default to binary, so force text mode. 

570 if mode == 'r': 

571 mode = 'rt' 

572 elif mode == 'w': 

573 mode = 'wt' 

574 elif mode == 'a': 

575 mode = 'at' 

576 

577 root, compression = get_compression(filename) 

578 

579 if compression == 'gz': 

580 import gzip 

581 return gzip.open(filename, mode=mode) # type: ignore 

582 elif compression == 'bz2': 

583 import bz2 

584 return bz2.open(filename, mode=mode) # type: ignore 

585 elif compression == 'xz': 

586 import lzma 

587 return lzma.open(filename, mode) 

588 else: 

589 # Either None or unknown string 

590 return open(filename, mode) 

591 

592 

593def wrap_read_function(read, filename, index=None, **kwargs): 

594 """Convert read-function to generator.""" 

595 if index is None: 

596 yield read(filename, **kwargs) 

597 else: 

598 for atoms in read(filename, index, **kwargs): 

599 yield atoms 

600 

601 

602NameOrFile = Union[str, PurePath, IO] 

603 

604 

605def write( 

606 filename: NameOrFile, 

607 images: Union[Atoms, Sequence[Atoms]], 

608 format: str = None, 

609 parallel: bool = True, 

610 append: bool = False, 

611 **kwargs: Any 

612) -> None: 

613 """Write Atoms object(s) to file. 

614 

615 filename: str or file 

616 Name of the file to write to or a file descriptor. The name '-' 

617 means standard output. 

618 images: Atoms object or list of Atoms objects 

619 A single Atoms object or a list of Atoms objects. 

620 format: str 

621 Used to specify the file-format. If not given, the 

622 file-format will be taken from suffix of the filename. 

623 parallel: bool 

624 Default is to write on master only. Use parallel=False to write 

625 from all slaves. 

626 append: bool 

627 Default is to open files in 'w' or 'wb' mode, overwriting 

628 existing files. In some cases opening the file in 'a' or 'ab' 

629 mode (appending) is useful, 

630 e.g. writing trajectories or saving multiple Atoms objects in one file. 

631 WARNING: If the file format does not support multiple entries without 

632 additional keywords/headers, files created using 'append=True' 

633 might not be readable by any program! They will nevertheless be 

634 written without error message. 

635 

636 The use of additional keywords is format specific. write() may 

637 return an object after writing certain formats, but this behaviour 

638 may change in the future. 

639 

640 """ 

641 

642 if isinstance(filename, PurePath): 

643 filename = str(filename) 

644 

645 if isinstance(filename, str): 

646 fd = None 

647 if filename == '-': 

648 fd = sys.stdout 

649 filename = None # type: ignore 

650 elif format is None: 

651 format = filetype(filename, read=False) 

652 assert isinstance(format, str) 

653 else: 

654 fd = filename # type: ignore 

655 if format is None: 

656 try: 

657 format = filetype(filename, read=False) 

658 assert isinstance(format, str) 

659 except UnknownFileTypeError: 

660 format = None 

661 filename = None # type: ignore 

662 

663 format = format or 'json' # default is json 

664 

665 io = get_ioformat(format) 

666 

667 return _write(filename, fd, format, io, images, 

668 parallel=parallel, append=append, **kwargs) 

669 

670 

671@parallel_function 

672def _write(filename, fd, format, io, images, parallel=None, append=False, 

673 **kwargs): 

674 if isinstance(images, Atoms): 

675 images = [images] 

676 

677 if io.single: 

678 if len(images) > 1: 

679 raise ValueError('{}-format can only store 1 Atoms object.' 

680 .format(format)) 

681 images = images[0] 

682 

683 if not io.can_write: 

684 raise ValueError("Can't write to {}-format".format(format)) 

685 

686 # Special case for json-format: 

687 if format == 'json' and (len(images) > 1 or append): 

688 if filename is not None: 

689 return io.write(filename, images, append=append, **kwargs) 

690 raise ValueError("Can't write more than one image to file-descriptor " 

691 'using json-format.') 

692 

693 if io.acceptsfd: 

694 open_new = (fd is None) 

695 try: 

696 if open_new: 

697 mode = 'wb' if io.isbinary else 'w' 

698 if append: 

699 mode = mode.replace('w', 'a') 

700 fd = open_with_compression(filename, mode) 

701 # XXX remember to re-enable compressed open 

702 # fd = io.open(filename, mode) 

703 return io.write(fd, images, **kwargs) 

704 finally: 

705 if open_new and fd is not None: 

706 fd.close() 

707 else: 

708 if fd is not None: 

709 raise ValueError("Can't write {}-format to file-descriptor" 

710 .format(format)) 

711 if io.can_append: 

712 return io.write(filename, images, append=append, **kwargs) 

713 elif append: 

714 raise ValueError("Cannot append to {}-format, write-function " 

715 "does not support the append keyword." 

716 .format(format)) 

717 else: 

718 return io.write(filename, images, **kwargs) 

719 

720 

721def read( 

722 filename: NameOrFile, 

723 index: Any = None, 

724 format: str = None, 

725 parallel: bool = True, 

726 do_not_split_by_at_sign: bool = False, 

727 **kwargs 

728) -> Union[Atoms, List[Atoms]]: 

729 """Read Atoms object(s) from file. 

730 

731 filename: str or file 

732 Name of the file to read from or a file descriptor. 

733 index: int, slice or str 

734 The last configuration will be returned by default. Examples: 

735 

736 * ``index=0``: first configuration 

737 * ``index=-2``: second to last 

738 * ``index=':'`` or ``index=slice(None)``: all 

739 * ``index='-3:'`` or ``index=slice(-3, None)``: three last 

740 * ``index='::2'`` or ``index=slice(0, None, 2)``: even 

741 * ``index='1::2'`` or ``index=slice(1, None, 2)``: odd 

742 format: str 

743 Used to specify the file-format. If not given, the 

744 file-format will be guessed by the *filetype* function. 

745 parallel: bool 

746 Default is to read on master and broadcast to slaves. Use 

747 parallel=False to read on all slaves. 

748 do_not_split_by_at_sign: bool 

749 If False (default) ``filename`` is splited by at sign ``@`` 

750 

751 Many formats allow on open file-like object to be passed instead 

752 of ``filename``. In this case the format cannot be auto-decected, 

753 so the ``format`` argument should be explicitly given.""" 

754 

755 if isinstance(filename, PurePath): 

756 filename = str(filename) 

757 if filename == '-': 

758 filename = sys.stdin 

759 if isinstance(index, str): 

760 try: 

761 index = string2index(index) 

762 except ValueError: 

763 pass 

764 

765 filename, index = parse_filename(filename, index, do_not_split_by_at_sign) 

766 if index is None: 

767 index = -1 

768 format = format or filetype(filename, read=isinstance(filename, str)) 

769 

770 io = get_ioformat(format) 

771 if isinstance(index, (slice, str)): 

772 return list(_iread(filename, index, format, io, parallel=parallel, 

773 **kwargs)) 

774 else: 

775 return next(_iread(filename, slice(index, None), format, io, 

776 parallel=parallel, **kwargs)) 

777 

778 

779def iread( 

780 filename: NameOrFile, 

781 index: Any = None, 

782 format: str = None, 

783 parallel: bool = True, 

784 do_not_split_by_at_sign: bool = False, 

785 **kwargs 

786) -> Iterable[Atoms]: 

787 """Iterator for reading Atoms objects from file. 

788 

789 Works as the `read` function, but yields one Atoms object at a time 

790 instead of all at once.""" 

791 

792 if isinstance(filename, PurePath): 

793 filename = str(filename) 

794 

795 if isinstance(index, str): 

796 index = string2index(index) 

797 

798 filename, index = parse_filename(filename, index, do_not_split_by_at_sign) 

799 

800 if index is None or index == ':': 

801 index = slice(None, None, None) 

802 

803 if not isinstance(index, (slice, str)): 

804 index = slice(index, (index + 1) or None) 

805 

806 format = format or filetype(filename, read=isinstance(filename, str)) 

807 io = get_ioformat(format) 

808 

809 for atoms in _iread(filename, index, format, io, parallel=parallel, 

810 **kwargs): 

811 yield atoms 

812 

813 

814@parallel_generator 

815def _iread(filename, index, format, io, parallel=None, full_output=False, 

816 **kwargs): 

817 

818 if not io.can_read: 

819 raise ValueError("Can't read from {}-format".format(format)) 

820 

821 if io.single: 

822 start = index.start 

823 assert start is None or start == 0 or start == -1 

824 args = () 

825 else: 

826 args = (index,) 

827 

828 must_close_fd = False 

829 if isinstance(filename, str): 

830 if io.acceptsfd: 

831 mode = 'rb' if io.isbinary else 'r' 

832 fd = open_with_compression(filename, mode) 

833 must_close_fd = True 

834 else: 

835 fd = filename 

836 else: 

837 assert io.acceptsfd 

838 fd = filename 

839 

840 # Make sure fd is closed in case loop doesn't finish: 

841 try: 

842 for dct in io.read(fd, *args, **kwargs): 

843 if not isinstance(dct, dict): 

844 dct = {'atoms': dct} 

845 if full_output: 

846 yield dct 

847 else: 

848 yield dct['atoms'] 

849 finally: 

850 if must_close_fd: 

851 fd.close() 

852 

853 

854def parse_filename(filename, index=None, do_not_split_by_at_sign=False): 

855 if not isinstance(filename, str): 

856 return filename, index 

857 

858 basename = os.path.basename(filename) 

859 if do_not_split_by_at_sign or '@' not in basename: 

860 return filename, index 

861 

862 newindex = None 

863 newfilename, newindex = filename.rsplit('@', 1) 

864 

865 if isinstance(index, slice): 

866 return newfilename, index 

867 try: 

868 newindex = string2index(newindex) 

869 except ValueError: 

870 warnings.warn('Can not parse index for path \n' 

871 ' "%s" \nConsider set ' 

872 'do_not_split_by_at_sign=True \nif ' 

873 'there is no index.' % filename) 

874 return newfilename, newindex 

875 

876 

877def match_magic(data: bytes) -> IOFormat: 

878 data = data[:PEEK_BYTES] 

879 for ioformat in ioformats.values(): 

880 if ioformat.match_magic(data): 

881 return ioformat 

882 raise UnknownFileTypeError('Cannot guess file type from contents') 

883 

884 

885def string2index(string: str) -> Union[int, slice, str]: 

886 """Convert index string to either int or slice""" 

887 if ':' not in string: 

888 # may contain database accessor 

889 try: 

890 return int(string) 

891 except ValueError: 

892 return string 

893 i: List[Optional[int]] = [] 

894 for s in string.split(':'): 

895 if s == '': 

896 i.append(None) 

897 else: 

898 i.append(int(s)) 

899 i += (3 - len(i)) * [None] 

900 return slice(*i) 

901 

902 

903def filetype( 

904 filename: NameOrFile, 

905 read: bool = True, 

906 guess: bool = True, 

907) -> str: 

908 """Try to guess the type of the file. 

909 

910 First, special signatures in the filename will be checked for. If that 

911 does not identify the file type, then the first 2000 bytes of the file 

912 will be read and analysed. Turn off this second part by using 

913 read=False. 

914 

915 Can be used from the command-line also:: 

916 

917 $ ase info filename ... 

918 """ 

919 

920 orig_filename = filename 

921 if hasattr(filename, 'name'): 

922 filename = filename.name # type: ignore 

923 

924 ext = None 

925 if isinstance(filename, str): 

926 if os.path.isdir(filename): 

927 if os.path.basename(os.path.normpath(filename)) == 'states': 

928 return 'eon' 

929 return 'bundletrajectory' 

930 

931 if filename.startswith('postgres'): 

932 return 'postgresql' 

933 

934 if filename.startswith('mysql') or filename.startswith('mariadb'): 

935 return 'mysql' 

936 

937 # strip any compression extensions that can be read 

938 root, compression = get_compression(filename) 

939 basename = os.path.basename(root) 

940 

941 if '.' in basename: 

942 ext = os.path.splitext(basename)[1].strip('.').lower() 

943 

944 for fmt in ioformats.values(): 

945 if fmt.match_name(basename): 

946 return fmt.name 

947 

948 if not read: 

949 if ext is None: 

950 raise UnknownFileTypeError('Could not guess file type') 

951 ioformat = extension2format.get(ext) 

952 if ioformat: 

953 return ioformat.name 

954 

955 # askhl: This is strange, we don't know if ext is a format: 

956 return ext 

957 

958 if orig_filename == filename: 

959 fd = open_with_compression(filename, 'rb') 

960 else: 

961 fd = orig_filename # type: ignore 

962 else: 

963 fd = filename # type: ignore 

964 if fd is sys.stdin: 

965 return 'json' 

966 

967 data = fd.read(PEEK_BYTES) 

968 if fd is not filename: 

969 fd.close() 

970 else: 

971 fd.seek(0) 

972 

973 if len(data) == 0: 

974 raise UnknownFileTypeError('Empty file: ' + filename) # type: ignore 

975 

976 try: 

977 return match_magic(data).name 

978 except UnknownFileTypeError: 

979 pass 

980 

981 format = None 

982 if ext in extension2format: 

983 format = extension2format[ext].name 

984 

985 if format is None and guess: 

986 format = ext 

987 if format is None: 

988 # Do quick xyz check: 

989 lines = data.splitlines() 

990 if lines and lines[0].strip().isdigit(): 

991 return extension2format['xyz'].name 

992 

993 raise UnknownFileTypeError('Could not guess file type') 

994 assert isinstance(format, str) 

995 return format 

996 

997 

998def index2range(index, length): 

999 """Convert slice or integer to range. 

1000 

1001 If index is an integer, range will contain only that integer.""" 

1002 obj = range(length)[index] 

1003 if isinstance(obj, numbers.Integral): 

1004 obj = range(obj, obj + 1) 

1005 return obj