Coverage for /builds/ase/ase/ase/io/cif.py : 87.08%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Module to read and write atoms in cif file format.
3See http://www.iucr.org/resources/cif/spec/version1.1/cifsyntax for a
4description of the file format. STAR extensions as save frames,
5global blocks, nested loops and multi-data values are not supported.
6The "latin-1" encoding is required by the IUCR specification.
7"""
9import io
10import re
11import shlex
12import warnings
13from typing import Dict, List, Tuple, Optional, Union, Iterator, Any, Sequence
14import collections.abc
16import numpy as np
18from ase import Atoms
19from ase.cell import Cell
20from ase.spacegroup import crystal
21from ase.spacegroup.spacegroup import spacegroup_from_data, Spacegroup
22from ase.io.cif_unicode import format_unicode, handle_subscripts
23from ase.utils import iofunction
26rhombohedral_spacegroups = {146, 148, 155, 160, 161, 166, 167}
29old_spacegroup_names = {'Abm2': 'Aem2',
30 'Aba2': 'Aea2',
31 'Cmca': 'Cmce',
32 'Cmma': 'Cmme',
33 'Ccca': 'Ccc1'}
35# CIF maps names to either single values or to multiple values via loops.
36CIFDataValue = Union[str, int, float]
37CIFData = Union[CIFDataValue, List[CIFDataValue]]
40def convert_value(value: str) -> CIFDataValue:
41 """Convert CIF value string to corresponding python type."""
42 value = value.strip()
43 if re.match('(".*")|(\'.*\')$', value):
44 return handle_subscripts(value[1:-1])
45 elif re.match(r'[+-]?\d+$', value):
46 return int(value)
47 elif re.match(r'[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?$', value):
48 return float(value)
49 elif re.match(r'[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?\(\d+\)$',
50 value):
51 return float(value[:value.index('(')]) # strip off uncertainties
52 elif re.match(r'[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?\(\d+$',
53 value):
54 warnings.warn('Badly formed number: "{0}"'.format(value))
55 return float(value[:value.index('(')]) # strip off uncertainties
56 else:
57 return handle_subscripts(value)
60def parse_multiline_string(lines: List[str], line: str) -> str:
61 """Parse semicolon-enclosed multiline string and return it."""
62 assert line[0] == ';'
63 strings = [line[1:].lstrip()]
64 while True:
65 line = lines.pop().strip()
66 if line[:1] == ';':
67 break
68 strings.append(line)
69 return '\n'.join(strings).strip()
72def parse_singletag(lines: List[str], line: str) -> Tuple[str, CIFDataValue]:
73 """Parse a CIF tag (entries starting with underscore). Returns
74 a key-value pair."""
75 kv = line.split(None, 1)
76 if len(kv) == 1:
77 key = line
78 line = lines.pop().strip()
79 while not line or line[0] == '#':
80 line = lines.pop().strip()
81 if line[0] == ';':
82 value = parse_multiline_string(lines, line)
83 else:
84 value = line
85 else:
86 key, value = kv
87 return key, convert_value(value)
90def parse_cif_loop_headers(lines: List[str]) -> Iterator[str]:
91 while lines:
92 line = lines.pop()
93 tokens = line.split()
95 if len(tokens) == 1 and tokens[0].startswith('_'):
96 header = tokens[0].lower()
97 yield header
98 else:
99 lines.append(line) # 'undo' pop
100 return
103def parse_cif_loop_data(lines: List[str],
104 ncolumns: int) -> List[List[CIFDataValue]]:
105 columns: List[List[CIFDataValue]] = [[] for _ in range(ncolumns)]
107 tokens = []
108 while lines:
109 line = lines.pop().strip()
110 lowerline = line.lower()
111 if (not line or
112 line.startswith('_') or
113 lowerline.startswith('data_') or
114 lowerline.startswith('loop_')):
115 lines.append(line)
116 break
118 if line.startswith('#'):
119 continue
121 if line.startswith(';'):
122 moretokens = [parse_multiline_string(lines, line)]
123 else:
124 if ncolumns == 1:
125 moretokens = [line]
126 else:
127 moretokens = shlex.split(line, posix=False)
129 tokens += moretokens
130 if len(tokens) < ncolumns:
131 continue
132 if len(tokens) == ncolumns:
133 for i, token in enumerate(tokens):
134 columns[i].append(convert_value(token))
135 else:
136 warnings.warn('Wrong number {} of tokens, expected {}: {}'
137 .format(len(tokens), ncolumns, tokens))
139 # (Due to continue statements we cannot move this to start of loop)
140 tokens = []
142 if tokens:
143 assert len(tokens) < ncolumns
144 raise RuntimeError('CIF loop ended unexpectedly with incomplete row')
146 return columns
149def parse_loop(lines: List[str]) -> Dict[str, List[CIFDataValue]]:
150 """Parse a CIF loop. Returns a dict with column tag names as keys
151 and a lists of the column content as values."""
153 headers = list(parse_cif_loop_headers(lines))
154 # Dict would be better. But there can be repeated headers.
156 columns = parse_cif_loop_data(lines, len(headers))
158 columns_dict = {}
159 for i, header in enumerate(headers):
160 if header in columns_dict:
161 warnings.warn('Duplicated loop tags: {0}'.format(header))
162 else:
163 columns_dict[header] = columns[i]
164 return columns_dict
167def parse_items(lines: List[str], line: str) -> Dict[str, CIFData]:
168 """Parse a CIF data items and return a dict with all tags."""
169 tags: Dict[str, CIFData] = {}
171 while True:
172 if not lines:
173 break
174 line = lines.pop().strip()
175 if not line:
176 continue
177 lowerline = line.lower()
178 if not line or line.startswith('#'):
179 continue
180 elif line.startswith('_'):
181 key, value = parse_singletag(lines, line)
182 tags[key.lower()] = value
183 elif lowerline.startswith('loop_'):
184 tags.update(parse_loop(lines))
185 elif lowerline.startswith('data_'):
186 if line:
187 lines.append(line)
188 break
189 elif line.startswith(';'):
190 parse_multiline_string(lines, line)
191 else:
192 raise ValueError('Unexpected CIF file entry: "{0}"'.format(line))
193 return tags
196class NoStructureData(RuntimeError):
197 pass
200class CIFBlock(collections.abc.Mapping):
201 """A block (i.e., a single system) in a crystallographic information file.
203 Use this object to query CIF tags or import information as ASE objects."""
205 cell_tags = ['_cell_length_a', '_cell_length_b', '_cell_length_c',
206 '_cell_angle_alpha', '_cell_angle_beta', '_cell_angle_gamma']
208 def __init__(self, name: str, tags: Dict[str, CIFData]):
209 self.name = name
210 self._tags = tags
212 def __repr__(self) -> str:
213 tags = set(self._tags)
214 return f'CIFBlock({self.name}, tags={tags})'
216 def __getitem__(self, key: str) -> CIFData:
217 return self._tags[key]
219 def __iter__(self) -> Iterator[str]:
220 return iter(self._tags)
222 def __len__(self) -> int:
223 return len(self._tags)
225 def get(self, key, default=None):
226 return self._tags.get(key, default)
228 def get_cellpar(self) -> Optional[List]:
229 try:
230 return [self[tag] for tag in self.cell_tags]
231 except KeyError:
232 return None
234 def get_cell(self) -> Cell:
235 cellpar = self.get_cellpar()
236 if cellpar is None:
237 return Cell.new([0, 0, 0])
238 return Cell.new(cellpar)
240 def _raw_scaled_positions(self) -> Optional[np.ndarray]:
241 coords = [self.get(name) for name in ['_atom_site_fract_x',
242 '_atom_site_fract_y',
243 '_atom_site_fract_z']]
244 # XXX Shall we try to handle mixed coordinates?
245 # (Some scaled vs others fractional)
246 if None in coords:
247 return None
248 return np.array(coords).T
250 def _raw_positions(self) -> Optional[np.ndarray]:
251 coords = [self.get('_atom_site_cartn_x'),
252 self.get('_atom_site_cartn_y'),
253 self.get('_atom_site_cartn_z')]
254 if None in coords:
255 return None
256 return np.array(coords).T
258 def _get_site_coordinates(self):
259 scaled = self._raw_scaled_positions()
261 if scaled is not None:
262 return 'scaled', scaled
264 cartesian = self._raw_positions()
266 if cartesian is None:
267 raise NoStructureData('No positions found in structure')
269 return 'cartesian', cartesian
271 def _get_symbols_with_deuterium(self):
272 labels = self._get_any(['_atom_site_type_symbol',
273 '_atom_site_label'])
274 if labels is None:
275 raise NoStructureData('No symbols')
277 symbols = []
278 for label in labels:
279 if label == '.' or label == '?':
280 raise NoStructureData('Symbols are undetermined')
281 # Strip off additional labeling on chemical symbols
282 match = re.search(r'([A-Z][a-z]?)', label)
283 symbol = match.group(0)
284 symbols.append(symbol)
285 return symbols
287 def get_symbols(self) -> List[str]:
288 symbols = self._get_symbols_with_deuterium()
289 return [symbol if symbol != 'D' else 'H' for symbol in symbols]
291 def _where_deuterium(self):
292 return np.array([symbol == 'D' for symbol
293 in self._get_symbols_with_deuterium()], bool)
295 def _get_masses(self) -> Optional[np.ndarray]:
296 mask = self._where_deuterium()
297 if not any(mask):
298 return None
300 symbols = self.get_symbols()
301 masses = Atoms(symbols).get_masses()
302 masses[mask] = 2.01355
303 return masses
305 def _get_any(self, names):
306 for name in names:
307 if name in self:
308 return self[name]
309 return None
311 def _get_spacegroup_number(self):
312 # Symmetry specification, see
313 # http://www.iucr.org/resources/cif/dictionaries/cif_sym for a
314 # complete list of official keys. In addition we also try to
315 # support some commonly used depricated notations
316 return self._get_any(['_space_group.it_number',
317 '_space_group_it_number',
318 '_symmetry_int_tables_number'])
320 def _get_spacegroup_name(self):
321 hm_symbol = self._get_any(['_space_group_name_h-m_alt',
322 '_symmetry_space_group_name_h-m',
323 '_space_group.Patterson_name_h-m',
324 '_space_group.patterson_name_h-m'])
326 hm_symbol = old_spacegroup_names.get(hm_symbol, hm_symbol)
327 return hm_symbol
329 def _get_sitesym(self):
330 sitesym = self._get_any(['_space_group_symop_operation_xyz',
331 '_space_group_symop.operation_xyz',
332 '_symmetry_equiv_pos_as_xyz'])
333 if isinstance(sitesym, str):
334 sitesym = [sitesym]
335 return sitesym
337 def _get_fractional_occupancies(self):
338 return self.get('_atom_site_occupancy')
340 def _get_setting(self) -> Optional[int]:
341 setting_str = self.get('_symmetry_space_group_setting')
342 if setting_str is None:
343 return None
345 setting = int(setting_str)
346 if setting not in [1, 2]:
347 raise ValueError(
348 f'Spacegroup setting must be 1 or 2, not {setting}')
349 return setting
351 def get_spacegroup(self, subtrans_included) -> Spacegroup:
352 # XXX The logic in this method needs serious cleaning up!
353 # The setting needs to be passed as either 1 or two, not None (default)
354 no = self._get_spacegroup_number()
355 hm_symbol = self._get_spacegroup_name()
356 sitesym = self._get_sitesym()
358 setting = 1
359 spacegroup = 1
360 if sitesym:
361 # Special cases: sitesym can be None or an empty list.
362 # The empty list could be replaced with just the identity
363 # function, but it seems more correct to try to get the
364 # spacegroup number and derive the symmetries for that.
365 subtrans = [(0.0, 0.0, 0.0)] if subtrans_included else None
366 spacegroup = spacegroup_from_data(
367 no=no, symbol=hm_symbol, sitesym=sitesym, subtrans=subtrans,
368 setting=setting)
369 elif no is not None:
370 spacegroup = no
371 elif hm_symbol is not None:
372 spacegroup = hm_symbol
373 else:
374 spacegroup = 1
376 setting_std = self._get_setting()
378 setting_name = None
379 if '_symmetry_space_group_setting' in self:
380 assert setting_std is not None
381 setting = setting_std
382 elif '_space_group_crystal_system' in self:
383 setting_name = self['_space_group_crystal_system']
384 elif '_symmetry_cell_setting' in self:
385 setting_name = self['_symmetry_cell_setting']
387 if setting_name:
388 no = Spacegroup(spacegroup).no
389 if no in rhombohedral_spacegroups:
390 if setting_name == 'hexagonal':
391 setting = 1
392 elif setting_name in ('trigonal', 'rhombohedral'):
393 setting = 2
394 else:
395 warnings.warn(
396 'unexpected crystal system %r for space group %r' % (
397 setting_name, spacegroup))
398 # FIXME - check for more crystal systems...
399 else:
400 warnings.warn(
401 'crystal system %r is not interpreted for space group %r. '
402 'This may result in wrong setting!' % (
403 setting_name, spacegroup))
405 spg = Spacegroup(spacegroup, setting)
406 if no is not None:
407 assert int(spg) == no, (int(spg), no)
408 return spg
410 def get_unsymmetrized_structure(self) -> Atoms:
411 """Return Atoms without symmetrizing coordinates.
413 This returns a (normally) unphysical Atoms object
414 corresponding only to those coordinates included
415 in the CIF file, useful for e.g. debugging.
417 This method may change behaviour in the future."""
418 symbols = self.get_symbols()
419 coordtype, coords = self._get_site_coordinates()
421 atoms = Atoms(symbols=symbols,
422 cell=self.get_cell(),
423 masses=self._get_masses())
425 if coordtype == 'scaled':
426 atoms.set_scaled_positions(coords)
427 else:
428 assert coordtype == 'cartesian'
429 atoms.positions[:] = coords
431 return atoms
433 def has_structure(self):
434 """Whether this CIF block has an atomic configuration."""
435 try:
436 self.get_symbols()
437 self._get_site_coordinates()
438 except NoStructureData:
439 return False
440 else:
441 return True
443 def get_atoms(self, store_tags=False, primitive_cell=False,
444 subtrans_included=True, fractional_occupancies=True) -> Atoms:
445 """Returns an Atoms object from a cif tags dictionary. See read_cif()
446 for a description of the arguments."""
447 if primitive_cell and subtrans_included:
448 raise RuntimeError(
449 'Primitive cell cannot be determined when sublattice '
450 'translations are included in the symmetry operations listed '
451 'in the CIF file, i.e. when `subtrans_included` is True.')
453 cell = self.get_cell()
454 assert cell.rank in [0, 3]
456 kwargs: Dict[str, Any] = {}
457 if store_tags:
458 kwargs['info'] = self._tags.copy()
460 if fractional_occupancies:
461 occupancies = self._get_fractional_occupancies()
462 else:
463 occupancies = None
465 if occupancies is not None:
466 # no warnings in this case
467 kwargs['onduplicates'] = 'keep'
469 # The unsymmetrized_structure is not the asymmetric unit
470 # because the asymmetric unit should have (in general) a smaller cell,
471 # whereas we have the full cell.
472 unsymmetrized_structure = self.get_unsymmetrized_structure()
474 if cell.rank == 3:
475 spacegroup = self.get_spacegroup(subtrans_included)
476 atoms = crystal(unsymmetrized_structure,
477 spacegroup=spacegroup,
478 setting=spacegroup.setting,
479 occupancies=occupancies,
480 primitive_cell=primitive_cell,
481 **kwargs)
482 else:
483 atoms = unsymmetrized_structure
484 if kwargs.get('info') is not None:
485 atoms.info.update(kwargs['info'])
486 if occupancies is not None:
487 # Compile an occupancies dictionary
488 occ_dict = {}
489 for i, sym in enumerate(atoms.symbols):
490 occ_dict[str(i)] = {sym: occupancies[i]}
491 atoms.info['occupancy'] = occ_dict
493 return atoms
496def parse_block(lines: List[str], line: str) -> CIFBlock:
497 assert line.lower().startswith('data_')
498 blockname = line.split('_', 1)[1].rstrip()
499 tags = parse_items(lines, line)
500 return CIFBlock(blockname, tags)
503def parse_cif(fileobj, reader='ase') -> Iterator[CIFBlock]:
504 if reader == 'ase':
505 return parse_cif_ase(fileobj)
506 elif reader == 'pycodcif':
507 return parse_cif_pycodcif(fileobj)
508 else:
509 raise ValueError(f'No such reader: {reader}')
512def parse_cif_ase(fileobj) -> Iterator[CIFBlock]:
513 """Parse a CIF file using ase CIF parser."""
515 if isinstance(fileobj, str):
516 with open(fileobj, 'rb') as fileobj:
517 data = fileobj.read()
518 else:
519 data = fileobj.read()
521 if isinstance(data, bytes):
522 data = data.decode('latin1')
523 data = format_unicode(data)
524 lines = [e for e in data.split('\n') if len(e) > 0]
525 if len(lines) > 0 and lines[0].rstrip() == '#\\#CIF_2.0':
526 warnings.warn('CIF v2.0 file format detected; `ase` CIF reader might '
527 'incorrectly interpret some syntax constructions, use '
528 '`pycodcif` reader instead')
529 lines = [''] + lines[::-1] # all lines (reversed)
531 while lines:
532 line = lines.pop().strip()
533 if not line or line.startswith('#'):
534 continue
536 yield parse_block(lines, line)
539def parse_cif_pycodcif(fileobj) -> Iterator[CIFBlock]:
540 """Parse a CIF file using pycodcif CIF parser."""
541 if not isinstance(fileobj, str):
542 fileobj = fileobj.name
544 try:
545 from pycodcif import parse
546 except ImportError:
547 raise ImportError(
548 'parse_cif_pycodcif requires pycodcif ' +
549 '(http://wiki.crystallography.net/cod-tools/pycodcif/)')
551 data, _, _ = parse(fileobj)
553 for datablock in data:
554 tags = datablock['values']
555 for tag in tags.keys():
556 values = [convert_value(x) for x in tags[tag]]
557 if len(values) == 1:
558 tags[tag] = values[0]
559 else:
560 tags[tag] = values
561 yield CIFBlock(datablock['name'], tags)
564def read_cif(fileobj, index, store_tags=False, primitive_cell=False,
565 subtrans_included=True, fractional_occupancies=True,
566 reader='ase') -> Iterator[Atoms]:
567 """Read Atoms object from CIF file. *index* specifies the data
568 block number or name (if string) to return.
570 If *index* is None or a slice object, a list of atoms objects will
571 be returned. In the case of *index* is *None* or *slice(None)*,
572 only blocks with valid crystal data will be included.
574 If *store_tags* is true, the *info* attribute of the returned
575 Atoms object will be populated with all tags in the corresponding
576 cif data block.
578 If *primitive_cell* is true, the primitive cell will be built instead
579 of the conventional cell.
581 If *subtrans_included* is true, sublattice translations are
582 assumed to be included among the symmetry operations listed in the
583 CIF file (seems to be the common behaviour of CIF files).
584 Otherwise the sublattice translations are determined from setting
585 1 of the extracted space group. A result of setting this flag to
586 true, is that it will not be possible to determine the primitive
587 cell.
589 If *fractional_occupancies* is true, the resulting atoms object will be
590 tagged equipped with a dictionary `occupancy`. The keys of this dictionary
591 will be integers converted to strings. The conversion to string is done
592 in order to avoid troubles with JSON encoding/decoding of the dictionaries
593 with non-string keys.
594 Also, in case of mixed occupancies, the atom's chemical symbol will be
595 that of the most dominant species.
597 String *reader* is used to select CIF reader. Value `ase` selects
598 built-in CIF reader (default), while `pycodcif` selects CIF reader based
599 on `pycodcif` package.
600 """
601 # Find all CIF blocks with valid crystal data
602 images = []
603 for block in parse_cif(fileobj, reader):
604 if not block.has_structure():
605 continue
607 atoms = block.get_atoms(
608 store_tags, primitive_cell,
609 subtrans_included,
610 fractional_occupancies=fractional_occupancies)
611 images.append(atoms)
613 for atoms in images[index]:
614 yield atoms
617def format_cell(cell: Cell) -> str:
618 assert cell.rank == 3
619 lines = []
620 for name, value in zip(CIFBlock.cell_tags, cell.cellpar()):
621 line = '{:20} {}\n'.format(name, value)
622 lines.append(line)
623 assert len(lines) == 6
624 return ''.join(lines)
627def format_generic_spacegroup_info() -> str:
628 # We assume no symmetry whatsoever
629 return '\n'.join([
630 '_space_group_name_H-M_alt "P 1"',
631 '_space_group_IT_number 1',
632 '',
633 'loop_',
634 ' _space_group_symop_operation_xyz',
635 " 'x, y, z'",
636 '',
637 ])
640class CIFLoop:
641 def __init__(self):
642 self.names = []
643 self.formats = []
644 self.arrays = []
646 def add(self, name, array, fmt):
647 assert name.startswith('_')
648 self.names.append(name)
649 self.formats.append(fmt)
650 self.arrays.append(array)
651 if len(self.arrays[0]) != len(self.arrays[-1]):
652 raise ValueError(f'Loop data "{name}" has {len(array)} '
653 'elements, expected {len(self.arrays[0])}')
655 def tostring(self):
656 lines = []
657 append = lines.append
658 append('loop_')
659 for name in self.names:
660 append(f' {name}')
662 template = ' ' + ' '.join(self.formats)
664 ncolumns = len(self.arrays)
665 nrows = len(self.arrays[0]) if ncolumns > 0 else 0
666 for row in range(nrows):
667 arraydata = [array[row] for array in self.arrays]
668 line = template.format(*arraydata)
669 append(line)
670 append('')
671 return '\n'.join(lines)
674@iofunction('wb')
675def write_cif(fd, images, cif_format=None,
676 wrap=True, labels=None, loop_keys=None) -> None:
677 """Write *images* to CIF file.
679 wrap: bool
680 Wrap atoms into unit cell.
682 labels: list
683 Use this list (shaped list[i_frame][i_atom] = string) for the
684 '_atom_site_label' section instead of automatically generating
685 it from the element symbol.
687 loop_keys: dict
688 Add the information from this dictionary to the `loop_`
689 section. Keys are printed to the `loop_` section preceeded by
690 ' _'. dict[key] should contain the data printed for each atom,
691 so it needs to have the setup `dict[key][i_frame][i_atom] =
692 string`. The strings are printed as they are, so take care of
693 formating. Information can be re-read using the `store_tags`
694 option of the cif reader.
696 """
698 if cif_format is not None:
699 warnings.warn('The cif_format argument is deprecated and may be '
700 'removed in the future. Use loop_keys to customize '
701 'data written in loop.', FutureWarning)
703 if loop_keys is None:
704 loop_keys = {}
706 if hasattr(images, 'get_positions'):
707 images = [images]
709 fd = io.TextIOWrapper(fd, encoding='latin-1')
710 try:
711 for i, atoms in enumerate(images):
712 blockname = f'data_image{i}\n'
713 image_loop_keys = {key: loop_keys[key][i] for key in loop_keys}
715 write_cif_image(blockname, atoms, fd,
716 wrap=wrap,
717 labels=None if labels is None else labels[i],
718 loop_keys=image_loop_keys)
720 finally:
721 # Using the TextIOWrapper somehow causes the file to close
722 # when this function returns.
723 # Detach in order to circumvent this highly illogical problem:
724 fd.detach()
727def autolabel(symbols: Sequence[str]) -> List[str]:
728 no: Dict[str, int] = {}
729 labels = []
730 for symbol in symbols:
731 if symbol in no:
732 no[symbol] += 1
733 else:
734 no[symbol] = 1
735 labels.append('%s%d' % (symbol, no[symbol]))
736 return labels
739def chemical_formula_header(atoms):
740 counts = atoms.symbols.formula.count()
741 formula_sum = ' '.join(f'{sym}{count}' for sym, count
742 in counts.items())
743 return (f'_chemical_formula_structural {atoms.symbols}\n'
744 f'_chemical_formula_sum "{formula_sum}"\n')
747class BadOccupancies(ValueError):
748 pass
751def expand_kinds(atoms, coords):
752 # try to fetch occupancies // spacegroup_kinds - occupancy mapping
753 symbols = list(atoms.symbols)
754 coords = list(coords)
755 occupancies = [1] * len(symbols)
756 occ_info = atoms.info.get('occupancy')
757 kinds = atoms.arrays.get('spacegroup_kinds')
758 if occ_info is not None and kinds is not None:
759 for i, kind in enumerate(kinds):
760 occ_info_kind = occ_info[str(kind)]
761 symbol = symbols[i]
762 if symbol not in occ_info_kind:
763 raise BadOccupancies('Occupancies present but no occupancy '
764 'info for "{symbol}"')
765 occupancies[i] = occ_info_kind[symbol]
766 # extend the positions array in case of mixed occupancy
767 for sym, occ in occ_info[str(kind)].items():
768 if sym != symbols[i]:
769 symbols.append(sym)
770 coords.append(coords[i])
771 occupancies.append(occ)
772 return symbols, coords, occupancies
775def atoms_to_loop_data(atoms, wrap, labels, loop_keys):
776 if atoms.cell.rank == 3:
777 coord_type = 'fract'
778 coords = atoms.get_scaled_positions(wrap).tolist()
779 else:
780 coord_type = 'Cartn'
781 coords = atoms.get_positions(wrap).tolist()
783 try:
784 symbols, coords, occupancies = expand_kinds(atoms, coords)
785 except BadOccupancies as err:
786 warnings.warn(str(err))
787 occupancies = [1] * len(atoms)
788 symbols = list(atoms.symbols)
790 if labels is None:
791 labels = autolabel(symbols)
793 coord_headers = [f'_atom_site_{coord_type}_{axisname}'
794 for axisname in 'xyz']
796 loopdata = {}
797 loopdata['_atom_site_label'] = (labels, '{:<8s}')
798 loopdata['_atom_site_occupancy'] = (occupancies, '{:6.4f}')
800 _coords = np.array(coords)
801 for i, key in enumerate(coord_headers):
802 loopdata[key] = (_coords[:, i], '{}')
804 loopdata['_atom_site_type_symbol'] = (symbols, '{:<2s}')
805 loopdata['_atom_site_symmetry_multiplicity'] = (
806 [1.0] * len(symbols), '{}')
808 for key in loop_keys:
809 # Should expand the loop_keys like we expand the occupancy stuff.
810 # Otherwise user will never figure out how to do this.
811 values = [loop_keys[key][i] for i in range(len(symbols))]
812 loopdata['_' + key] = (values, '{}')
814 return loopdata, coord_headers
817def write_cif_image(blockname, atoms, fd, *, wrap,
818 labels, loop_keys):
819 fd.write(blockname)
820 fd.write(chemical_formula_header(atoms))
822 rank = atoms.cell.rank
823 if rank == 3:
824 fd.write(format_cell(atoms.cell))
825 fd.write('\n')
826 fd.write(format_generic_spacegroup_info())
827 fd.write('\n')
828 elif rank != 0:
829 raise ValueError('CIF format can only represent systems with '
830 f'0 or 3 lattice vectors. Got {rank}.')
832 loopdata, coord_headers = atoms_to_loop_data(atoms, wrap, labels,
833 loop_keys)
835 headers = [
836 '_atom_site_type_symbol',
837 '_atom_site_label',
838 '_atom_site_symmetry_multiplicity',
839 *coord_headers,
840 '_atom_site_occupancy',
841 ]
843 headers += ['_' + key for key in loop_keys]
845 loop = CIFLoop()
846 for header in headers:
847 array, fmt = loopdata[header]
848 loop.add(header, array, fmt)
850 fd.write(loop.tostring())