Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""Module to read and write atoms in cif file format. 

2 

3See http://www.iucr.org/resources/cif/spec/version1.1/cifsyntax for a 

4description of the file format. STAR extensions as save frames, 

5global blocks, nested loops and multi-data values are not supported. 

6The "latin-1" encoding is required by the IUCR specification. 

7""" 

8 

9import io 

10import re 

11import shlex 

12import warnings 

13from typing import Dict, List, Tuple, Optional, Union, Iterator, Any, Sequence 

14import collections.abc 

15 

16import numpy as np 

17 

18from ase import Atoms 

19from ase.cell import Cell 

20from ase.spacegroup import crystal 

21from ase.spacegroup.spacegroup import spacegroup_from_data, Spacegroup 

22from ase.io.cif_unicode import format_unicode, handle_subscripts 

23from ase.utils import iofunction 

24 

25 

26rhombohedral_spacegroups = {146, 148, 155, 160, 161, 166, 167} 

27 

28 

29old_spacegroup_names = {'Abm2': 'Aem2', 

30 'Aba2': 'Aea2', 

31 'Cmca': 'Cmce', 

32 'Cmma': 'Cmme', 

33 'Ccca': 'Ccc1'} 

34 

35# CIF maps names to either single values or to multiple values via loops. 

36CIFDataValue = Union[str, int, float] 

37CIFData = Union[CIFDataValue, List[CIFDataValue]] 

38 

39 

40def convert_value(value: str) -> CIFDataValue: 

41 """Convert CIF value string to corresponding python type.""" 

42 value = value.strip() 

43 if re.match('(".*")|(\'.*\')$', value): 

44 return handle_subscripts(value[1:-1]) 

45 elif re.match(r'[+-]?\d+$', value): 

46 return int(value) 

47 elif re.match(r'[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?$', value): 

48 return float(value) 

49 elif re.match(r'[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?\(\d+\)$', 

50 value): 

51 return float(value[:value.index('(')]) # strip off uncertainties 

52 elif re.match(r'[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?\(\d+$', 

53 value): 

54 warnings.warn('Badly formed number: "{0}"'.format(value)) 

55 return float(value[:value.index('(')]) # strip off uncertainties 

56 else: 

57 return handle_subscripts(value) 

58 

59 

60def parse_multiline_string(lines: List[str], line: str) -> str: 

61 """Parse semicolon-enclosed multiline string and return it.""" 

62 assert line[0] == ';' 

63 strings = [line[1:].lstrip()] 

64 while True: 

65 line = lines.pop().strip() 

66 if line[:1] == ';': 

67 break 

68 strings.append(line) 

69 return '\n'.join(strings).strip() 

70 

71 

72def parse_singletag(lines: List[str], line: str) -> Tuple[str, CIFDataValue]: 

73 """Parse a CIF tag (entries starting with underscore). Returns 

74 a key-value pair.""" 

75 kv = line.split(None, 1) 

76 if len(kv) == 1: 

77 key = line 

78 line = lines.pop().strip() 

79 while not line or line[0] == '#': 

80 line = lines.pop().strip() 

81 if line[0] == ';': 

82 value = parse_multiline_string(lines, line) 

83 else: 

84 value = line 

85 else: 

86 key, value = kv 

87 return key, convert_value(value) 

88 

89 

90def parse_cif_loop_headers(lines: List[str]) -> Iterator[str]: 

91 while lines: 

92 line = lines.pop() 

93 tokens = line.split() 

94 

95 if len(tokens) == 1 and tokens[0].startswith('_'): 

96 header = tokens[0].lower() 

97 yield header 

98 else: 

99 lines.append(line) # 'undo' pop 

100 return 

101 

102 

103def parse_cif_loop_data(lines: List[str], 

104 ncolumns: int) -> List[List[CIFDataValue]]: 

105 columns: List[List[CIFDataValue]] = [[] for _ in range(ncolumns)] 

106 

107 tokens = [] 

108 while lines: 

109 line = lines.pop().strip() 

110 lowerline = line.lower() 

111 if (not line or 

112 line.startswith('_') or 

113 lowerline.startswith('data_') or 

114 lowerline.startswith('loop_')): 

115 lines.append(line) 

116 break 

117 

118 if line.startswith('#'): 

119 continue 

120 

121 if line.startswith(';'): 

122 moretokens = [parse_multiline_string(lines, line)] 

123 else: 

124 if ncolumns == 1: 

125 moretokens = [line] 

126 else: 

127 moretokens = shlex.split(line, posix=False) 

128 

129 tokens += moretokens 

130 if len(tokens) < ncolumns: 

131 continue 

132 if len(tokens) == ncolumns: 

133 for i, token in enumerate(tokens): 

134 columns[i].append(convert_value(token)) 

135 else: 

136 warnings.warn('Wrong number {} of tokens, expected {}: {}' 

137 .format(len(tokens), ncolumns, tokens)) 

138 

139 # (Due to continue statements we cannot move this to start of loop) 

140 tokens = [] 

141 

142 if tokens: 

143 assert len(tokens) < ncolumns 

144 raise RuntimeError('CIF loop ended unexpectedly with incomplete row') 

145 

146 return columns 

147 

148 

149def parse_loop(lines: List[str]) -> Dict[str, List[CIFDataValue]]: 

150 """Parse a CIF loop. Returns a dict with column tag names as keys 

151 and a lists of the column content as values.""" 

152 

153 headers = list(parse_cif_loop_headers(lines)) 

154 # Dict would be better. But there can be repeated headers. 

155 

156 columns = parse_cif_loop_data(lines, len(headers)) 

157 

158 columns_dict = {} 

159 for i, header in enumerate(headers): 

160 if header in columns_dict: 

161 warnings.warn('Duplicated loop tags: {0}'.format(header)) 

162 else: 

163 columns_dict[header] = columns[i] 

164 return columns_dict 

165 

166 

167def parse_items(lines: List[str], line: str) -> Dict[str, CIFData]: 

168 """Parse a CIF data items and return a dict with all tags.""" 

169 tags: Dict[str, CIFData] = {} 

170 

171 while True: 

172 if not lines: 

173 break 

174 line = lines.pop().strip() 

175 if not line: 

176 continue 

177 lowerline = line.lower() 

178 if not line or line.startswith('#'): 

179 continue 

180 elif line.startswith('_'): 

181 key, value = parse_singletag(lines, line) 

182 tags[key.lower()] = value 

183 elif lowerline.startswith('loop_'): 

184 tags.update(parse_loop(lines)) 

185 elif lowerline.startswith('data_'): 

186 if line: 

187 lines.append(line) 

188 break 

189 elif line.startswith(';'): 

190 parse_multiline_string(lines, line) 

191 else: 

192 raise ValueError('Unexpected CIF file entry: "{0}"'.format(line)) 

193 return tags 

194 

195 

196class NoStructureData(RuntimeError): 

197 pass 

198 

199 

200class CIFBlock(collections.abc.Mapping): 

201 """A block (i.e., a single system) in a crystallographic information file. 

202 

203 Use this object to query CIF tags or import information as ASE objects.""" 

204 

205 cell_tags = ['_cell_length_a', '_cell_length_b', '_cell_length_c', 

206 '_cell_angle_alpha', '_cell_angle_beta', '_cell_angle_gamma'] 

207 

208 def __init__(self, name: str, tags: Dict[str, CIFData]): 

209 self.name = name 

210 self._tags = tags 

211 

212 def __repr__(self) -> str: 

213 tags = set(self._tags) 

214 return f'CIFBlock({self.name}, tags={tags})' 

215 

216 def __getitem__(self, key: str) -> CIFData: 

217 return self._tags[key] 

218 

219 def __iter__(self) -> Iterator[str]: 

220 return iter(self._tags) 

221 

222 def __len__(self) -> int: 

223 return len(self._tags) 

224 

225 def get(self, key, default=None): 

226 return self._tags.get(key, default) 

227 

228 def get_cellpar(self) -> Optional[List]: 

229 try: 

230 return [self[tag] for tag in self.cell_tags] 

231 except KeyError: 

232 return None 

233 

234 def get_cell(self) -> Cell: 

235 cellpar = self.get_cellpar() 

236 if cellpar is None: 

237 return Cell.new([0, 0, 0]) 

238 return Cell.new(cellpar) 

239 

240 def _raw_scaled_positions(self) -> Optional[np.ndarray]: 

241 coords = [self.get(name) for name in ['_atom_site_fract_x', 

242 '_atom_site_fract_y', 

243 '_atom_site_fract_z']] 

244 # XXX Shall we try to handle mixed coordinates? 

245 # (Some scaled vs others fractional) 

246 if None in coords: 

247 return None 

248 return np.array(coords).T 

249 

250 def _raw_positions(self) -> Optional[np.ndarray]: 

251 coords = [self.get('_atom_site_cartn_x'), 

252 self.get('_atom_site_cartn_y'), 

253 self.get('_atom_site_cartn_z')] 

254 if None in coords: 

255 return None 

256 return np.array(coords).T 

257 

258 def _get_site_coordinates(self): 

259 scaled = self._raw_scaled_positions() 

260 

261 if scaled is not None: 

262 return 'scaled', scaled 

263 

264 cartesian = self._raw_positions() 

265 

266 if cartesian is None: 

267 raise NoStructureData('No positions found in structure') 

268 

269 return 'cartesian', cartesian 

270 

271 def _get_symbols_with_deuterium(self): 

272 labels = self._get_any(['_atom_site_type_symbol', 

273 '_atom_site_label']) 

274 if labels is None: 

275 raise NoStructureData('No symbols') 

276 

277 symbols = [] 

278 for label in labels: 

279 if label == '.' or label == '?': 

280 raise NoStructureData('Symbols are undetermined') 

281 # Strip off additional labeling on chemical symbols 

282 match = re.search(r'([A-Z][a-z]?)', label) 

283 symbol = match.group(0) 

284 symbols.append(symbol) 

285 return symbols 

286 

287 def get_symbols(self) -> List[str]: 

288 symbols = self._get_symbols_with_deuterium() 

289 return [symbol if symbol != 'D' else 'H' for symbol in symbols] 

290 

291 def _where_deuterium(self): 

292 return np.array([symbol == 'D' for symbol 

293 in self._get_symbols_with_deuterium()], bool) 

294 

295 def _get_masses(self) -> Optional[np.ndarray]: 

296 mask = self._where_deuterium() 

297 if not any(mask): 

298 return None 

299 

300 symbols = self.get_symbols() 

301 masses = Atoms(symbols).get_masses() 

302 masses[mask] = 2.01355 

303 return masses 

304 

305 def _get_any(self, names): 

306 for name in names: 

307 if name in self: 

308 return self[name] 

309 return None 

310 

311 def _get_spacegroup_number(self): 

312 # Symmetry specification, see 

313 # http://www.iucr.org/resources/cif/dictionaries/cif_sym for a 

314 # complete list of official keys. In addition we also try to 

315 # support some commonly used depricated notations 

316 return self._get_any(['_space_group.it_number', 

317 '_space_group_it_number', 

318 '_symmetry_int_tables_number']) 

319 

320 def _get_spacegroup_name(self): 

321 hm_symbol = self._get_any(['_space_group_name_h-m_alt', 

322 '_symmetry_space_group_name_h-m', 

323 '_space_group.Patterson_name_h-m', 

324 '_space_group.patterson_name_h-m']) 

325 

326 hm_symbol = old_spacegroup_names.get(hm_symbol, hm_symbol) 

327 return hm_symbol 

328 

329 def _get_sitesym(self): 

330 sitesym = self._get_any(['_space_group_symop_operation_xyz', 

331 '_space_group_symop.operation_xyz', 

332 '_symmetry_equiv_pos_as_xyz']) 

333 if isinstance(sitesym, str): 

334 sitesym = [sitesym] 

335 return sitesym 

336 

337 def _get_fractional_occupancies(self): 

338 return self.get('_atom_site_occupancy') 

339 

340 def _get_setting(self) -> Optional[int]: 

341 setting_str = self.get('_symmetry_space_group_setting') 

342 if setting_str is None: 

343 return None 

344 

345 setting = int(setting_str) 

346 if setting not in [1, 2]: 

347 raise ValueError( 

348 f'Spacegroup setting must be 1 or 2, not {setting}') 

349 return setting 

350 

351 def get_spacegroup(self, subtrans_included) -> Spacegroup: 

352 # XXX The logic in this method needs serious cleaning up! 

353 # The setting needs to be passed as either 1 or two, not None (default) 

354 no = self._get_spacegroup_number() 

355 hm_symbol = self._get_spacegroup_name() 

356 sitesym = self._get_sitesym() 

357 

358 setting = 1 

359 spacegroup = 1 

360 if sitesym: 

361 # Special cases: sitesym can be None or an empty list. 

362 # The empty list could be replaced with just the identity 

363 # function, but it seems more correct to try to get the 

364 # spacegroup number and derive the symmetries for that. 

365 subtrans = [(0.0, 0.0, 0.0)] if subtrans_included else None 

366 spacegroup = spacegroup_from_data( 

367 no=no, symbol=hm_symbol, sitesym=sitesym, subtrans=subtrans, 

368 setting=setting) 

369 elif no is not None: 

370 spacegroup = no 

371 elif hm_symbol is not None: 

372 spacegroup = hm_symbol 

373 else: 

374 spacegroup = 1 

375 

376 setting_std = self._get_setting() 

377 

378 setting_name = None 

379 if '_symmetry_space_group_setting' in self: 

380 assert setting_std is not None 

381 setting = setting_std 

382 elif '_space_group_crystal_system' in self: 

383 setting_name = self['_space_group_crystal_system'] 

384 elif '_symmetry_cell_setting' in self: 

385 setting_name = self['_symmetry_cell_setting'] 

386 

387 if setting_name: 

388 no = Spacegroup(spacegroup).no 

389 if no in rhombohedral_spacegroups: 

390 if setting_name == 'hexagonal': 

391 setting = 1 

392 elif setting_name in ('trigonal', 'rhombohedral'): 

393 setting = 2 

394 else: 

395 warnings.warn( 

396 'unexpected crystal system %r for space group %r' % ( 

397 setting_name, spacegroup)) 

398 # FIXME - check for more crystal systems... 

399 else: 

400 warnings.warn( 

401 'crystal system %r is not interpreted for space group %r. ' 

402 'This may result in wrong setting!' % ( 

403 setting_name, spacegroup)) 

404 

405 spg = Spacegroup(spacegroup, setting) 

406 if no is not None: 

407 assert int(spg) == no, (int(spg), no) 

408 return spg 

409 

410 def get_unsymmetrized_structure(self) -> Atoms: 

411 """Return Atoms without symmetrizing coordinates. 

412 

413 This returns a (normally) unphysical Atoms object 

414 corresponding only to those coordinates included 

415 in the CIF file, useful for e.g. debugging. 

416 

417 This method may change behaviour in the future.""" 

418 symbols = self.get_symbols() 

419 coordtype, coords = self._get_site_coordinates() 

420 

421 atoms = Atoms(symbols=symbols, 

422 cell=self.get_cell(), 

423 masses=self._get_masses()) 

424 

425 if coordtype == 'scaled': 

426 atoms.set_scaled_positions(coords) 

427 else: 

428 assert coordtype == 'cartesian' 

429 atoms.positions[:] = coords 

430 

431 return atoms 

432 

433 def has_structure(self): 

434 """Whether this CIF block has an atomic configuration.""" 

435 try: 

436 self.get_symbols() 

437 self._get_site_coordinates() 

438 except NoStructureData: 

439 return False 

440 else: 

441 return True 

442 

443 def get_atoms(self, store_tags=False, primitive_cell=False, 

444 subtrans_included=True, fractional_occupancies=True) -> Atoms: 

445 """Returns an Atoms object from a cif tags dictionary. See read_cif() 

446 for a description of the arguments.""" 

447 if primitive_cell and subtrans_included: 

448 raise RuntimeError( 

449 'Primitive cell cannot be determined when sublattice ' 

450 'translations are included in the symmetry operations listed ' 

451 'in the CIF file, i.e. when `subtrans_included` is True.') 

452 

453 cell = self.get_cell() 

454 assert cell.rank in [0, 3] 

455 

456 kwargs: Dict[str, Any] = {} 

457 if store_tags: 

458 kwargs['info'] = self._tags.copy() 

459 

460 if fractional_occupancies: 

461 occupancies = self._get_fractional_occupancies() 

462 else: 

463 occupancies = None 

464 

465 if occupancies is not None: 

466 # no warnings in this case 

467 kwargs['onduplicates'] = 'keep' 

468 

469 # The unsymmetrized_structure is not the asymmetric unit 

470 # because the asymmetric unit should have (in general) a smaller cell, 

471 # whereas we have the full cell. 

472 unsymmetrized_structure = self.get_unsymmetrized_structure() 

473 

474 if cell.rank == 3: 

475 spacegroup = self.get_spacegroup(subtrans_included) 

476 atoms = crystal(unsymmetrized_structure, 

477 spacegroup=spacegroup, 

478 setting=spacegroup.setting, 

479 occupancies=occupancies, 

480 primitive_cell=primitive_cell, 

481 **kwargs) 

482 else: 

483 atoms = unsymmetrized_structure 

484 if kwargs.get('info') is not None: 

485 atoms.info.update(kwargs['info']) 

486 if occupancies is not None: 

487 # Compile an occupancies dictionary 

488 occ_dict = {} 

489 for i, sym in enumerate(atoms.symbols): 

490 occ_dict[str(i)] = {sym: occupancies[i]} 

491 atoms.info['occupancy'] = occ_dict 

492 

493 return atoms 

494 

495 

496def parse_block(lines: List[str], line: str) -> CIFBlock: 

497 assert line.lower().startswith('data_') 

498 blockname = line.split('_', 1)[1].rstrip() 

499 tags = parse_items(lines, line) 

500 return CIFBlock(blockname, tags) 

501 

502 

503def parse_cif(fileobj, reader='ase') -> Iterator[CIFBlock]: 

504 if reader == 'ase': 

505 return parse_cif_ase(fileobj) 

506 elif reader == 'pycodcif': 

507 return parse_cif_pycodcif(fileobj) 

508 else: 

509 raise ValueError(f'No such reader: {reader}') 

510 

511 

512def parse_cif_ase(fileobj) -> Iterator[CIFBlock]: 

513 """Parse a CIF file using ase CIF parser.""" 

514 

515 if isinstance(fileobj, str): 

516 with open(fileobj, 'rb') as fileobj: 

517 data = fileobj.read() 

518 else: 

519 data = fileobj.read() 

520 

521 if isinstance(data, bytes): 

522 data = data.decode('latin1') 

523 data = format_unicode(data) 

524 lines = [e for e in data.split('\n') if len(e) > 0] 

525 if len(lines) > 0 and lines[0].rstrip() == '#\\#CIF_2.0': 

526 warnings.warn('CIF v2.0 file format detected; `ase` CIF reader might ' 

527 'incorrectly interpret some syntax constructions, use ' 

528 '`pycodcif` reader instead') 

529 lines = [''] + lines[::-1] # all lines (reversed) 

530 

531 while lines: 

532 line = lines.pop().strip() 

533 if not line or line.startswith('#'): 

534 continue 

535 

536 yield parse_block(lines, line) 

537 

538 

539def parse_cif_pycodcif(fileobj) -> Iterator[CIFBlock]: 

540 """Parse a CIF file using pycodcif CIF parser.""" 

541 if not isinstance(fileobj, str): 

542 fileobj = fileobj.name 

543 

544 try: 

545 from pycodcif import parse 

546 except ImportError: 

547 raise ImportError( 

548 'parse_cif_pycodcif requires pycodcif ' + 

549 '(http://wiki.crystallography.net/cod-tools/pycodcif/)') 

550 

551 data, _, _ = parse(fileobj) 

552 

553 for datablock in data: 

554 tags = datablock['values'] 

555 for tag in tags.keys(): 

556 values = [convert_value(x) for x in tags[tag]] 

557 if len(values) == 1: 

558 tags[tag] = values[0] 

559 else: 

560 tags[tag] = values 

561 yield CIFBlock(datablock['name'], tags) 

562 

563 

564def read_cif(fileobj, index, store_tags=False, primitive_cell=False, 

565 subtrans_included=True, fractional_occupancies=True, 

566 reader='ase') -> Iterator[Atoms]: 

567 """Read Atoms object from CIF file. *index* specifies the data 

568 block number or name (if string) to return. 

569 

570 If *index* is None or a slice object, a list of atoms objects will 

571 be returned. In the case of *index* is *None* or *slice(None)*, 

572 only blocks with valid crystal data will be included. 

573 

574 If *store_tags* is true, the *info* attribute of the returned 

575 Atoms object will be populated with all tags in the corresponding 

576 cif data block. 

577 

578 If *primitive_cell* is true, the primitive cell will be built instead 

579 of the conventional cell. 

580 

581 If *subtrans_included* is true, sublattice translations are 

582 assumed to be included among the symmetry operations listed in the 

583 CIF file (seems to be the common behaviour of CIF files). 

584 Otherwise the sublattice translations are determined from setting 

585 1 of the extracted space group. A result of setting this flag to 

586 true, is that it will not be possible to determine the primitive 

587 cell. 

588 

589 If *fractional_occupancies* is true, the resulting atoms object will be 

590 tagged equipped with a dictionary `occupancy`. The keys of this dictionary 

591 will be integers converted to strings. The conversion to string is done 

592 in order to avoid troubles with JSON encoding/decoding of the dictionaries 

593 with non-string keys. 

594 Also, in case of mixed occupancies, the atom's chemical symbol will be 

595 that of the most dominant species. 

596 

597 String *reader* is used to select CIF reader. Value `ase` selects 

598 built-in CIF reader (default), while `pycodcif` selects CIF reader based 

599 on `pycodcif` package. 

600 """ 

601 # Find all CIF blocks with valid crystal data 

602 images = [] 

603 for block in parse_cif(fileobj, reader): 

604 if not block.has_structure(): 

605 continue 

606 

607 atoms = block.get_atoms( 

608 store_tags, primitive_cell, 

609 subtrans_included, 

610 fractional_occupancies=fractional_occupancies) 

611 images.append(atoms) 

612 

613 for atoms in images[index]: 

614 yield atoms 

615 

616 

617def format_cell(cell: Cell) -> str: 

618 assert cell.rank == 3 

619 lines = [] 

620 for name, value in zip(CIFBlock.cell_tags, cell.cellpar()): 

621 line = '{:20} {}\n'.format(name, value) 

622 lines.append(line) 

623 assert len(lines) == 6 

624 return ''.join(lines) 

625 

626 

627def format_generic_spacegroup_info() -> str: 

628 # We assume no symmetry whatsoever 

629 return '\n'.join([ 

630 '_space_group_name_H-M_alt "P 1"', 

631 '_space_group_IT_number 1', 

632 '', 

633 'loop_', 

634 ' _space_group_symop_operation_xyz', 

635 " 'x, y, z'", 

636 '', 

637 ]) 

638 

639 

640class CIFLoop: 

641 def __init__(self): 

642 self.names = [] 

643 self.formats = [] 

644 self.arrays = [] 

645 

646 def add(self, name, array, fmt): 

647 assert name.startswith('_') 

648 self.names.append(name) 

649 self.formats.append(fmt) 

650 self.arrays.append(array) 

651 if len(self.arrays[0]) != len(self.arrays[-1]): 

652 raise ValueError(f'Loop data "{name}" has {len(array)} ' 

653 'elements, expected {len(self.arrays[0])}') 

654 

655 def tostring(self): 

656 lines = [] 

657 append = lines.append 

658 append('loop_') 

659 for name in self.names: 

660 append(f' {name}') 

661 

662 template = ' ' + ' '.join(self.formats) 

663 

664 ncolumns = len(self.arrays) 

665 nrows = len(self.arrays[0]) if ncolumns > 0 else 0 

666 for row in range(nrows): 

667 arraydata = [array[row] for array in self.arrays] 

668 line = template.format(*arraydata) 

669 append(line) 

670 append('') 

671 return '\n'.join(lines) 

672 

673 

674@iofunction('wb') 

675def write_cif(fd, images, cif_format=None, 

676 wrap=True, labels=None, loop_keys=None) -> None: 

677 """Write *images* to CIF file. 

678 

679 wrap: bool 

680 Wrap atoms into unit cell. 

681 

682 labels: list 

683 Use this list (shaped list[i_frame][i_atom] = string) for the 

684 '_atom_site_label' section instead of automatically generating 

685 it from the element symbol. 

686 

687 loop_keys: dict 

688 Add the information from this dictionary to the `loop_` 

689 section. Keys are printed to the `loop_` section preceeded by 

690 ' _'. dict[key] should contain the data printed for each atom, 

691 so it needs to have the setup `dict[key][i_frame][i_atom] = 

692 string`. The strings are printed as they are, so take care of 

693 formating. Information can be re-read using the `store_tags` 

694 option of the cif reader. 

695 

696 """ 

697 

698 if cif_format is not None: 

699 warnings.warn('The cif_format argument is deprecated and may be ' 

700 'removed in the future. Use loop_keys to customize ' 

701 'data written in loop.', FutureWarning) 

702 

703 if loop_keys is None: 

704 loop_keys = {} 

705 

706 if hasattr(images, 'get_positions'): 

707 images = [images] 

708 

709 fd = io.TextIOWrapper(fd, encoding='latin-1') 

710 try: 

711 for i, atoms in enumerate(images): 

712 blockname = f'data_image{i}\n' 

713 image_loop_keys = {key: loop_keys[key][i] for key in loop_keys} 

714 

715 write_cif_image(blockname, atoms, fd, 

716 wrap=wrap, 

717 labels=None if labels is None else labels[i], 

718 loop_keys=image_loop_keys) 

719 

720 finally: 

721 # Using the TextIOWrapper somehow causes the file to close 

722 # when this function returns. 

723 # Detach in order to circumvent this highly illogical problem: 

724 fd.detach() 

725 

726 

727def autolabel(symbols: Sequence[str]) -> List[str]: 

728 no: Dict[str, int] = {} 

729 labels = [] 

730 for symbol in symbols: 

731 if symbol in no: 

732 no[symbol] += 1 

733 else: 

734 no[symbol] = 1 

735 labels.append('%s%d' % (symbol, no[symbol])) 

736 return labels 

737 

738 

739def chemical_formula_header(atoms): 

740 counts = atoms.symbols.formula.count() 

741 formula_sum = ' '.join(f'{sym}{count}' for sym, count 

742 in counts.items()) 

743 return (f'_chemical_formula_structural {atoms.symbols}\n' 

744 f'_chemical_formula_sum "{formula_sum}"\n') 

745 

746 

747class BadOccupancies(ValueError): 

748 pass 

749 

750 

751def expand_kinds(atoms, coords): 

752 # try to fetch occupancies // spacegroup_kinds - occupancy mapping 

753 symbols = list(atoms.symbols) 

754 coords = list(coords) 

755 occupancies = [1] * len(symbols) 

756 occ_info = atoms.info.get('occupancy') 

757 kinds = atoms.arrays.get('spacegroup_kinds') 

758 if occ_info is not None and kinds is not None: 

759 for i, kind in enumerate(kinds): 

760 occ_info_kind = occ_info[str(kind)] 

761 symbol = symbols[i] 

762 if symbol not in occ_info_kind: 

763 raise BadOccupancies('Occupancies present but no occupancy ' 

764 'info for "{symbol}"') 

765 occupancies[i] = occ_info_kind[symbol] 

766 # extend the positions array in case of mixed occupancy 

767 for sym, occ in occ_info[str(kind)].items(): 

768 if sym != symbols[i]: 

769 symbols.append(sym) 

770 coords.append(coords[i]) 

771 occupancies.append(occ) 

772 return symbols, coords, occupancies 

773 

774 

775def atoms_to_loop_data(atoms, wrap, labels, loop_keys): 

776 if atoms.cell.rank == 3: 

777 coord_type = 'fract' 

778 coords = atoms.get_scaled_positions(wrap).tolist() 

779 else: 

780 coord_type = 'Cartn' 

781 coords = atoms.get_positions(wrap).tolist() 

782 

783 try: 

784 symbols, coords, occupancies = expand_kinds(atoms, coords) 

785 except BadOccupancies as err: 

786 warnings.warn(str(err)) 

787 occupancies = [1] * len(atoms) 

788 symbols = list(atoms.symbols) 

789 

790 if labels is None: 

791 labels = autolabel(symbols) 

792 

793 coord_headers = [f'_atom_site_{coord_type}_{axisname}' 

794 for axisname in 'xyz'] 

795 

796 loopdata = {} 

797 loopdata['_atom_site_label'] = (labels, '{:<8s}') 

798 loopdata['_atom_site_occupancy'] = (occupancies, '{:6.4f}') 

799 

800 _coords = np.array(coords) 

801 for i, key in enumerate(coord_headers): 

802 loopdata[key] = (_coords[:, i], '{}') 

803 

804 loopdata['_atom_site_type_symbol'] = (symbols, '{:<2s}') 

805 loopdata['_atom_site_symmetry_multiplicity'] = ( 

806 [1.0] * len(symbols), '{}') 

807 

808 for key in loop_keys: 

809 # Should expand the loop_keys like we expand the occupancy stuff. 

810 # Otherwise user will never figure out how to do this. 

811 values = [loop_keys[key][i] for i in range(len(symbols))] 

812 loopdata['_' + key] = (values, '{}') 

813 

814 return loopdata, coord_headers 

815 

816 

817def write_cif_image(blockname, atoms, fd, *, wrap, 

818 labels, loop_keys): 

819 fd.write(blockname) 

820 fd.write(chemical_formula_header(atoms)) 

821 

822 rank = atoms.cell.rank 

823 if rank == 3: 

824 fd.write(format_cell(atoms.cell)) 

825 fd.write('\n') 

826 fd.write(format_generic_spacegroup_info()) 

827 fd.write('\n') 

828 elif rank != 0: 

829 raise ValueError('CIF format can only represent systems with ' 

830 f'0 or 3 lattice vectors. Got {rank}.') 

831 

832 loopdata, coord_headers = atoms_to_loop_data(atoms, wrap, labels, 

833 loop_keys) 

834 

835 headers = [ 

836 '_atom_site_type_symbol', 

837 '_atom_site_label', 

838 '_atom_site_symmetry_multiplicity', 

839 *coord_headers, 

840 '_atom_site_occupancy', 

841 ] 

842 

843 headers += ['_' + key for key in loop_keys] 

844 

845 loop = CIFLoop() 

846 for header in headers: 

847 array, fmt = loopdata[header] 

848 loop.add(header, array, fmt) 

849 

850 fd.write(loop.tostring())