Coverage for /builds/ase/ase/ase/formula.py : 91.39%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import re
2from functools import lru_cache
3from math import gcd
4from typing import Dict, List, Sequence, Tuple, Union
6from ase.data import atomic_numbers, chemical_symbols
8# For type hints (A, A2, A+B):
9Tree = Union[str, Tuple['Tree', int], List['Tree']] # type: ignore
12class Formula:
13 def __init__(self,
14 formula: str = '',
15 *,
16 strict: bool = False,
17 format: str = '',
18 _tree: Tree = None,
19 _count: Dict[str, int] = None):
20 """Chemical formula object.
22 Parameters
23 ----------
24 formula: str
25 Text string representation of formula. Examples: ``'6CO2'``,
26 ``'30Cu+2CO'``, ``'Pt(CO)6'``.
27 strict: bool
28 Only allow real chemical symbols.
29 format: str
30 Reorder according to *format*. Must be one of hill, metal,
31 ab2, a2b, periodic or reduce.
33 Examples
34 --------
35 >>> from ase.formula import Formula
36 >>> w = Formula('H2O')
37 >>> w.count()
38 {'H': 2, 'O': 1}
39 >>> 'H' in w
40 True
41 >>> w == 'HOH'
42 True
43 >>> f'{w:latex}'
44 'H$_{2}$O'
45 >>> w.format('latex')
46 'H$_{2}$O'
47 >>> divmod(6 * w + 'Cu', w)
48 (6, Formula('Cu'))
50 Raises
51 ------
52 ValueError
53 on malformed formula
54 """
55 if format:
56 assert _tree is None and _count is None
57 if format not in {'hill', 'metal', 'abc', 'reduce', 'ab2', 'a2b',
58 'periodic'}:
59 raise ValueError(f'Illegal format: {format}')
60 formula = Formula(formula).format(format)
61 self._formula = formula
62 self._tree = _tree or parse(formula)
63 self._count = _count or count_tree(self._tree)
64 if strict:
65 for symbol in self._count:
66 if symbol not in atomic_numbers:
67 raise ValueError('Unknown chemical symbol: ' + symbol)
69 def convert(self, fmt: str) -> 'Formula':
70 """Reformat this formula as a new Formula.
72 Same formatting rules as Formula(format=...) keyword.
73 """
74 return Formula(self._formula, format=fmt)
76 def count(self) -> Dict[str, int]:
77 """Return dictionary mapping chemical symbol to number of atoms.
79 Example
80 -------
81 >>> Formula('H2O').count()
82 {'H': 2, 'O': 1}
83 """
84 return self._count.copy()
86 def reduce(self) -> Tuple['Formula', int]:
87 """Reduce formula.
89 Returns
90 -------
91 formula: Formula
92 Reduced formula.
93 n: int
94 Number of reduced formula units.
96 Example
97 -------
98 >>> Formula('2H2O').reduce()
99 (Formula('H2O'), 2)
100 """
101 dct, N = self._reduce()
102 return self.from_dict(dct), N
104 def stoichiometry(self) -> Tuple['Formula', 'Formula', int]:
105 """Reduce to unique stoichiometry using "chemical symbols" A, B, C, ...
107 Examples
108 --------
109 >>> Formula('CO2').stoichiometry()
110 (Formula('AB2'), Formula('CO2'), 1)
111 >>> Formula('(H2O)4').stoichiometry()
112 (Formula('AB2'), Formula('OH2'), 4)
113 """
114 count1, N = self._reduce()
115 c = ord('A')
116 count2 = {}
117 count3 = {}
118 for n, symb in sorted((n, symb)
119 for symb, n in count1.items()):
120 count2[chr(c)] = n
121 count3[symb] = n
122 c += 1
123 return self.from_dict(count2), self.from_dict(count3), N
125 def format(self, fmt: str = '') -> str:
126 """Format formula as string.
128 Formats:
130 * ``'hill'``: alphabetically ordered with C and H first
131 * ``'metal'``: alphabetically ordered with metals first
132 * ``'ab2'``: count-ordered first then alphabetically ordered
133 * ``'abc'``: old name for ``'ab2'``
134 * ``'a2b'``: reverse count-ordered first then alphabetically ordered
135 * ``'periodic'``: periodic-table ordered: period first then group
136 * ``'reduce'``: Reduce and keep order (ABBBC -> AB3C)
137 * ``'latex'``: LaTeX representation
138 * ``'html'``: HTML representation
139 * ``'rest'``: reStructuredText representation
141 Example
142 -------
143 >>> Formula('H2O').format('html')
144 'H<sub>2</sub>O'
145 """
146 return format(self, fmt)
148 def __format__(self, fmt: str) -> str:
149 """Format Formula as str.
151 Possible formats: ``'hill'``, ``'metal'``, ``'abc'``, ``'reduce'``,
152 ``'latex'``, ``'html'``, ``'rest'``.
154 Example
155 -------
156 >>> f = Formula('OH2')
157 >>> '{f}, {f:hill}, {f:latex}'.format(f=f)
158 'OH2, H2O, OH$_{2}$'
159 """
161 if fmt == 'hill':
162 count = self.count()
163 count2 = {}
164 for symb in 'CH':
165 if symb in count:
166 count2[symb] = count.pop(symb)
167 for symb, n in sorted(count.items()):
168 count2[symb] = n
169 return dict2str(count2)
171 if fmt == 'metal':
172 count = self.count()
173 result2 = [(s, count.pop(s)) for s in non_metals if s in count]
174 result = [(s, count[s]) for s in sorted(count)]
175 result += sorted(result2)
176 return dict2str(dict(result))
178 if fmt == 'abc' or fmt == 'ab2':
179 _, f, N = self.stoichiometry()
180 return dict2str({symb: n * N for symb, n in f._count.items()})
182 if fmt == 'a2b':
183 _, f, N = self.stoichiometry()
184 return dict2str({symb: -n * N
185 for n, symb
186 in sorted([(-n, symb) for symb, n
187 in f._count.items()])})
189 if fmt == 'periodic':
190 count = self.count()
191 order = periodic_table_order()
192 items = sorted(count.items(),
193 key=lambda item: order.get(item[0], 0))
194 return ''.join(symb + (str(n) if n > 1 else '')
195 for symb, n in items)
197 if fmt == 'reduce':
198 symbols = list(self)
199 nsymb = len(symbols)
200 parts = []
201 i1 = 0
202 for i2, symbol in enumerate(symbols):
203 if i2 == nsymb - 1 or symbol != symbols[i2 + 1]:
204 parts.append(symbol)
205 m = i2 + 1 - i1
206 if m > 1:
207 parts.append(str(m))
208 i1 = i2 + 1
209 return ''.join(parts)
211 if fmt == 'latex':
212 return self._tostr('$_{', '}$')
214 if fmt == 'html':
215 return self._tostr('<sub>', '</sub>')
217 if fmt == 'rest':
218 return self._tostr(r'\ :sub:`', r'`\ ')
220 if fmt == '':
221 return self._formula
223 raise ValueError('Invalid format specifier')
225 @staticmethod
226 def from_dict(dct: Dict[str, int]) -> 'Formula':
227 """Convert dict to Formula.
229 >>> Formula.from_dict({'H': 2})
230 Formula('H2')
231 """
232 dct2 = {}
233 for symb, n in dct.items():
234 if not (isinstance(symb, str) and isinstance(n, int) and n >= 0):
235 raise ValueError('Bad dictionary: {dct}'.format(dct=dct))
236 if n > 0: # filter out n=0 symbols
237 dct2[symb] = n
238 return Formula(dict2str(dct2),
239 _tree=[([(symb, n) for symb, n in dct2.items()], 1)],
240 _count=dct2)
242 @staticmethod
243 def from_list(symbols: Sequence[str]) -> 'Formula':
244 """Convert list of chemical symbols to Formula."""
245 return Formula(''.join(symbols),
246 _tree=[(symbols[:], 1)])
248 def __len__(self) -> int:
249 """Number of atoms."""
250 return sum(self._count.values())
252 def __getitem__(self, symb: str) -> int:
253 """Number of atoms with chemical symbol *symb*."""
254 return self._count.get(symb, 0)
256 def __contains__(self, f: Union[str, 'Formula']) -> bool:
257 """Check if formula contains chemical symbols in *f*.
259 Type of *f* must be str or Formula.
261 Examples
262 --------
263 >>> 'OH' in Formula('H2O')
264 True
265 >>> 'O2' in Formula('H2O')
266 False
267 """
268 if isinstance(f, str):
269 f = Formula(f)
270 for symb, n in f._count.items():
271 if self[symb] < n:
272 return False
273 return True
275 def __eq__(self, other) -> bool:
276 """Equality check.
278 Note that order is not important.
280 Example
281 -------
282 >>> Formula('CO') == Formula('OC')
283 True
284 """
285 if isinstance(other, str):
286 other = Formula(other)
287 elif not isinstance(other, Formula):
288 return False
289 return self._count == other._count
291 def __add__(self, other: Union[str, 'Formula']) -> 'Formula':
292 """Add two formulas."""
293 if not isinstance(other, str):
294 other = other._formula
295 return Formula(self._formula + '+' + other)
297 def __radd__(self, other: str): # -> Formula
298 return Formula(other) + self
300 def __mul__(self, N: int) -> 'Formula':
301 """Repeat formula `N` times."""
302 if N == 0:
303 return Formula('')
304 return self.from_dict({symb: n * N
305 for symb, n in self._count.items()})
307 def __rmul__(self, N: int): # -> Formula
308 return self * N
310 def __divmod__(self,
311 other: Union['Formula', str]) -> Tuple[int, 'Formula']:
312 """Return the tuple (self // other, self % other).
314 Invariant::
316 div, mod = divmod(self, other)
317 div * other + mod == self
319 Example
320 -------
321 >>> divmod(Formula('H2O'), 'H')
322 (2, Formula('O'))
323 """
324 if isinstance(other, str):
325 other = Formula(other)
326 N = min(self[symb] // n for symb, n in other._count.items())
327 dct = self.count()
328 if N:
329 for symb, n in other._count.items():
330 dct[symb] -= n * N
331 if dct[symb] == 0:
332 del dct[symb]
333 return N, self.from_dict(dct)
335 def __rdivmod__(self, other):
336 return divmod(Formula(other), self)
338 def __mod__(self, other):
339 return divmod(self, other)[1]
341 def __rmod__(self, other):
342 return Formula(other) % self
344 def __floordiv__(self, other):
345 return divmod(self, other)[0]
347 def __rfloordiv__(self, other):
348 return Formula(other) // self
350 def __iter__(self, tree=None):
351 if tree is None:
352 tree = self._tree
353 if isinstance(tree, str):
354 yield tree
355 elif isinstance(tree, tuple):
356 tree, N = tree
357 for _ in range(N):
358 yield from self.__iter__(tree)
359 else:
360 for tree in tree:
361 yield from self.__iter__(tree)
363 def __str__(self):
364 return self._formula
366 def __repr__(self):
367 return 'Formula({!r})'.format(self._formula)
369 def _reduce(self):
370 N = 0
371 for n in self._count.values():
372 if N == 0:
373 N = n
374 else:
375 N = gcd(n, N)
376 dct = {symb: n // N for symb, n in self._count.items()}
377 return dct, N
379 def _tostr(self, sub1, sub2):
380 parts = []
381 for tree, n in self._tree:
382 s = tree2str(tree, sub1, sub2)
383 if s[0] == '(' and s[-1] == ')':
384 s = s[1:-1]
385 if n > 1:
386 s = str(n) + s
387 parts.append(s)
388 return '+'.join(parts)
391def dict2str(dct: Dict[str, int]) -> str:
392 """Convert symbol-to-number dict to str.
394 >>> dict2str({'A': 1, 'B': 2})
395 'AB2'
396 """
397 return ''.join(symb + (str(n) if n > 1 else '')
398 for symb, n in dct.items())
401def parse(f: str) -> Tree:
402 """Convert formula string to tree structure.
404 >>> parse('2A+BC2')
405 [('A', 2), (['B', ('C', 2)], 1)]
406 """
407 if not f:
408 return []
409 parts = f.split('+')
410 result = []
411 for part in parts:
412 n, f = strip_number(part)
413 result.append((parse2(f), n))
414 return result
417def parse2(f: str) -> Tree:
418 """Convert formula string to tree structure (no "+" symbols).
420 >>> parse('10(H2O)')
421 [(([('H', 2), 'O'], 1), 10)]
422 """
423 units = []
424 while f:
425 unit: Union[str, Tuple[str, int], Tree]
426 if f[0] == '(':
427 level = 0
428 for i, c in enumerate(f[1:], 1):
429 if c == '(':
430 level += 1
431 elif c == ')':
432 if level == 0:
433 break
434 level -= 1
435 else:
436 raise ValueError
437 f2 = f[1:i]
438 n, f = strip_number(f[i + 1:])
439 unit = (parse2(f2), n)
440 else:
441 m = re.match('([A-Z][a-z]?)([0-9]*)', f)
442 if m is None:
443 raise ValueError
444 symb = m.group(1)
445 number = m.group(2)
446 if number:
447 unit = (symb, int(number))
448 else:
449 unit = symb
450 f = f[m.end():]
451 units.append(unit)
452 if len(units) == 1:
453 return unit
454 return units
457def strip_number(s: str) -> Tuple[int, str]:
458 """Strip leading nuimber.
460 >>> strip_number('10AB2')
461 (10, 'AB2')
462 >>> strip_number('AB2')
463 (1, 'AB2')
464 """
465 m = re.match('[0-9]*', s)
466 assert m is not None
467 return int(m.group() or 1), s[m.end():]
470def tree2str(tree: Tree,
471 sub1: str, sub2: str) -> str:
472 """Helper function for html, latex and rest formats."""
473 if isinstance(tree, str):
474 return tree
475 if isinstance(tree, tuple):
476 tree, N = tree
477 s = tree2str(tree, sub1, sub2)
478 if N == 1:
479 if s[0] == '(' and s[-1] == ')':
480 return s[1:-1]
481 return s
482 return s + sub1 + str(N) + sub2
483 return '(' + ''.join(tree2str(tree, sub1, sub2) for tree in tree) + ')'
486def count_tree(tree: Tree) -> Dict[str, int]:
487 if isinstance(tree, str):
488 return {tree: 1}
489 if isinstance(tree, tuple):
490 tree, N = tree
491 return {symb: n * N for symb, n in count_tree(tree).items()}
492 dct = {} # type: Dict[str, int]
493 for tree in tree:
494 for symb, n in count_tree(tree).items():
495 m = dct.get(symb, 0)
496 dct[symb] = m + n
497 return dct
500# non metals, half-metals/metalloid, halogen, noble gas:
501non_metals = ['H', 'He', 'B', 'C', 'N', 'O', 'F', 'Ne',
502 'Si', 'P', 'S', 'Cl', 'Ar',
503 'Ge', 'As', 'Se', 'Br', 'Kr',
504 'Sb', 'Te', 'I', 'Xe',
505 'Po', 'At', 'Rn']
508@lru_cache()
509def periodic_table_order() -> Dict[str, int]:
510 """Create dict for sorting after period first then row."""
511 return {symbol: n for n, symbol in enumerate(chemical_symbols[87:] +
512 chemical_symbols[55:87] +
513 chemical_symbols[37:55] +
514 chemical_symbols[19:37] +
515 chemical_symbols[11:19] +
516 chemical_symbols[3:11] +
517 chemical_symbols[1:3])}
520# Backwards compatibility:
521def formula_hill(numbers, empirical=False):
522 """Convert list of atomic numbers to a chemical formula as a string.
524 Elements are alphabetically ordered with C and H first.
526 If argument `empirical`, element counts will be divided by greatest common
527 divisor to yield an empirical formula"""
528 symbols = [chemical_symbols[Z] for Z in numbers]
529 f = Formula('', _tree=[(symbols, 1)])
530 if empirical:
531 f, _ = f.reduce()
532 return f.format('hill')
535# Backwards compatibility:
536def formula_metal(numbers, empirical=False):
537 """Convert list of atomic numbers to a chemical formula as a string.
539 Elements are alphabetically ordered with metals first.
541 If argument `empirical`, element counts will be divided by greatest common
542 divisor to yield an empirical formula"""
543 symbols = [chemical_symbols[Z] for Z in numbers]
544 f = Formula('', _tree=[(symbols, 1)])
545 if empirical:
546 f, _ = f.reduce()
547 return f.format('metal')