Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import re 

2from functools import lru_cache 

3from math import gcd 

4from typing import Dict, List, Sequence, Tuple, Union 

5 

6from ase.data import atomic_numbers, chemical_symbols 

7 

8# For type hints (A, A2, A+B): 

9Tree = Union[str, Tuple['Tree', int], List['Tree']] # type: ignore 

10 

11 

12class Formula: 

13 def __init__(self, 

14 formula: str = '', 

15 *, 

16 strict: bool = False, 

17 format: str = '', 

18 _tree: Tree = None, 

19 _count: Dict[str, int] = None): 

20 """Chemical formula object. 

21 

22 Parameters 

23 ---------- 

24 formula: str 

25 Text string representation of formula. Examples: ``'6CO2'``, 

26 ``'30Cu+2CO'``, ``'Pt(CO)6'``. 

27 strict: bool 

28 Only allow real chemical symbols. 

29 format: str 

30 Reorder according to *format*. Must be one of hill, metal, 

31 ab2, a2b, periodic or reduce. 

32 

33 Examples 

34 -------- 

35 >>> from ase.formula import Formula 

36 >>> w = Formula('H2O') 

37 >>> w.count() 

38 {'H': 2, 'O': 1} 

39 >>> 'H' in w 

40 True 

41 >>> w == 'HOH' 

42 True 

43 >>> f'{w:latex}' 

44 'H$_{2}$O' 

45 >>> w.format('latex') 

46 'H$_{2}$O' 

47 >>> divmod(6 * w + 'Cu', w) 

48 (6, Formula('Cu')) 

49 

50 Raises 

51 ------ 

52 ValueError 

53 on malformed formula 

54 """ 

55 if format: 

56 assert _tree is None and _count is None 

57 if format not in {'hill', 'metal', 'abc', 'reduce', 'ab2', 'a2b', 

58 'periodic'}: 

59 raise ValueError(f'Illegal format: {format}') 

60 formula = Formula(formula).format(format) 

61 self._formula = formula 

62 self._tree = _tree or parse(formula) 

63 self._count = _count or count_tree(self._tree) 

64 if strict: 

65 for symbol in self._count: 

66 if symbol not in atomic_numbers: 

67 raise ValueError('Unknown chemical symbol: ' + symbol) 

68 

69 def convert(self, fmt: str) -> 'Formula': 

70 """Reformat this formula as a new Formula. 

71 

72 Same formatting rules as Formula(format=...) keyword. 

73 """ 

74 return Formula(self._formula, format=fmt) 

75 

76 def count(self) -> Dict[str, int]: 

77 """Return dictionary mapping chemical symbol to number of atoms. 

78 

79 Example 

80 ------- 

81 >>> Formula('H2O').count() 

82 {'H': 2, 'O': 1} 

83 """ 

84 return self._count.copy() 

85 

86 def reduce(self) -> Tuple['Formula', int]: 

87 """Reduce formula. 

88 

89 Returns 

90 ------- 

91 formula: Formula 

92 Reduced formula. 

93 n: int 

94 Number of reduced formula units. 

95 

96 Example 

97 ------- 

98 >>> Formula('2H2O').reduce() 

99 (Formula('H2O'), 2) 

100 """ 

101 dct, N = self._reduce() 

102 return self.from_dict(dct), N 

103 

104 def stoichiometry(self) -> Tuple['Formula', 'Formula', int]: 

105 """Reduce to unique stoichiometry using "chemical symbols" A, B, C, ... 

106 

107 Examples 

108 -------- 

109 >>> Formula('CO2').stoichiometry() 

110 (Formula('AB2'), Formula('CO2'), 1) 

111 >>> Formula('(H2O)4').stoichiometry() 

112 (Formula('AB2'), Formula('OH2'), 4) 

113 """ 

114 count1, N = self._reduce() 

115 c = ord('A') 

116 count2 = {} 

117 count3 = {} 

118 for n, symb in sorted((n, symb) 

119 for symb, n in count1.items()): 

120 count2[chr(c)] = n 

121 count3[symb] = n 

122 c += 1 

123 return self.from_dict(count2), self.from_dict(count3), N 

124 

125 def format(self, fmt: str = '') -> str: 

126 """Format formula as string. 

127 

128 Formats: 

129 

130 * ``'hill'``: alphabetically ordered with C and H first 

131 * ``'metal'``: alphabetically ordered with metals first 

132 * ``'ab2'``: count-ordered first then alphabetically ordered 

133 * ``'abc'``: old name for ``'ab2'`` 

134 * ``'a2b'``: reverse count-ordered first then alphabetically ordered 

135 * ``'periodic'``: periodic-table ordered: period first then group 

136 * ``'reduce'``: Reduce and keep order (ABBBC -> AB3C) 

137 * ``'latex'``: LaTeX representation 

138 * ``'html'``: HTML representation 

139 * ``'rest'``: reStructuredText representation 

140 

141 Example 

142 ------- 

143 >>> Formula('H2O').format('html') 

144 'H<sub>2</sub>O' 

145 """ 

146 return format(self, fmt) 

147 

148 def __format__(self, fmt: str) -> str: 

149 """Format Formula as str. 

150 

151 Possible formats: ``'hill'``, ``'metal'``, ``'abc'``, ``'reduce'``, 

152 ``'latex'``, ``'html'``, ``'rest'``. 

153 

154 Example 

155 ------- 

156 >>> f = Formula('OH2') 

157 >>> '{f}, {f:hill}, {f:latex}'.format(f=f) 

158 'OH2, H2O, OH$_{2}$' 

159 """ 

160 

161 if fmt == 'hill': 

162 count = self.count() 

163 count2 = {} 

164 for symb in 'CH': 

165 if symb in count: 

166 count2[symb] = count.pop(symb) 

167 for symb, n in sorted(count.items()): 

168 count2[symb] = n 

169 return dict2str(count2) 

170 

171 if fmt == 'metal': 

172 count = self.count() 

173 result2 = [(s, count.pop(s)) for s in non_metals if s in count] 

174 result = [(s, count[s]) for s in sorted(count)] 

175 result += sorted(result2) 

176 return dict2str(dict(result)) 

177 

178 if fmt == 'abc' or fmt == 'ab2': 

179 _, f, N = self.stoichiometry() 

180 return dict2str({symb: n * N for symb, n in f._count.items()}) 

181 

182 if fmt == 'a2b': 

183 _, f, N = self.stoichiometry() 

184 return dict2str({symb: -n * N 

185 for n, symb 

186 in sorted([(-n, symb) for symb, n 

187 in f._count.items()])}) 

188 

189 if fmt == 'periodic': 

190 count = self.count() 

191 order = periodic_table_order() 

192 items = sorted(count.items(), 

193 key=lambda item: order.get(item[0], 0)) 

194 return ''.join(symb + (str(n) if n > 1 else '') 

195 for symb, n in items) 

196 

197 if fmt == 'reduce': 

198 symbols = list(self) 

199 nsymb = len(symbols) 

200 parts = [] 

201 i1 = 0 

202 for i2, symbol in enumerate(symbols): 

203 if i2 == nsymb - 1 or symbol != symbols[i2 + 1]: 

204 parts.append(symbol) 

205 m = i2 + 1 - i1 

206 if m > 1: 

207 parts.append(str(m)) 

208 i1 = i2 + 1 

209 return ''.join(parts) 

210 

211 if fmt == 'latex': 

212 return self._tostr('$_{', '}$') 

213 

214 if fmt == 'html': 

215 return self._tostr('<sub>', '</sub>') 

216 

217 if fmt == 'rest': 

218 return self._tostr(r'\ :sub:`', r'`\ ') 

219 

220 if fmt == '': 

221 return self._formula 

222 

223 raise ValueError('Invalid format specifier') 

224 

225 @staticmethod 

226 def from_dict(dct: Dict[str, int]) -> 'Formula': 

227 """Convert dict to Formula. 

228 

229 >>> Formula.from_dict({'H': 2}) 

230 Formula('H2') 

231 """ 

232 dct2 = {} 

233 for symb, n in dct.items(): 

234 if not (isinstance(symb, str) and isinstance(n, int) and n >= 0): 

235 raise ValueError('Bad dictionary: {dct}'.format(dct=dct)) 

236 if n > 0: # filter out n=0 symbols 

237 dct2[symb] = n 

238 return Formula(dict2str(dct2), 

239 _tree=[([(symb, n) for symb, n in dct2.items()], 1)], 

240 _count=dct2) 

241 

242 @staticmethod 

243 def from_list(symbols: Sequence[str]) -> 'Formula': 

244 """Convert list of chemical symbols to Formula.""" 

245 return Formula(''.join(symbols), 

246 _tree=[(symbols[:], 1)]) 

247 

248 def __len__(self) -> int: 

249 """Number of atoms.""" 

250 return sum(self._count.values()) 

251 

252 def __getitem__(self, symb: str) -> int: 

253 """Number of atoms with chemical symbol *symb*.""" 

254 return self._count.get(symb, 0) 

255 

256 def __contains__(self, f: Union[str, 'Formula']) -> bool: 

257 """Check if formula contains chemical symbols in *f*. 

258 

259 Type of *f* must be str or Formula. 

260 

261 Examples 

262 -------- 

263 >>> 'OH' in Formula('H2O') 

264 True 

265 >>> 'O2' in Formula('H2O') 

266 False 

267 """ 

268 if isinstance(f, str): 

269 f = Formula(f) 

270 for symb, n in f._count.items(): 

271 if self[symb] < n: 

272 return False 

273 return True 

274 

275 def __eq__(self, other) -> bool: 

276 """Equality check. 

277 

278 Note that order is not important. 

279 

280 Example 

281 ------- 

282 >>> Formula('CO') == Formula('OC') 

283 True 

284 """ 

285 if isinstance(other, str): 

286 other = Formula(other) 

287 elif not isinstance(other, Formula): 

288 return False 

289 return self._count == other._count 

290 

291 def __add__(self, other: Union[str, 'Formula']) -> 'Formula': 

292 """Add two formulas.""" 

293 if not isinstance(other, str): 

294 other = other._formula 

295 return Formula(self._formula + '+' + other) 

296 

297 def __radd__(self, other: str): # -> Formula 

298 return Formula(other) + self 

299 

300 def __mul__(self, N: int) -> 'Formula': 

301 """Repeat formula `N` times.""" 

302 if N == 0: 

303 return Formula('') 

304 return self.from_dict({symb: n * N 

305 for symb, n in self._count.items()}) 

306 

307 def __rmul__(self, N: int): # -> Formula 

308 return self * N 

309 

310 def __divmod__(self, 

311 other: Union['Formula', str]) -> Tuple[int, 'Formula']: 

312 """Return the tuple (self // other, self % other). 

313 

314 Invariant:: 

315 

316 div, mod = divmod(self, other) 

317 div * other + mod == self 

318 

319 Example 

320 ------- 

321 >>> divmod(Formula('H2O'), 'H') 

322 (2, Formula('O')) 

323 """ 

324 if isinstance(other, str): 

325 other = Formula(other) 

326 N = min(self[symb] // n for symb, n in other._count.items()) 

327 dct = self.count() 

328 if N: 

329 for symb, n in other._count.items(): 

330 dct[symb] -= n * N 

331 if dct[symb] == 0: 

332 del dct[symb] 

333 return N, self.from_dict(dct) 

334 

335 def __rdivmod__(self, other): 

336 return divmod(Formula(other), self) 

337 

338 def __mod__(self, other): 

339 return divmod(self, other)[1] 

340 

341 def __rmod__(self, other): 

342 return Formula(other) % self 

343 

344 def __floordiv__(self, other): 

345 return divmod(self, other)[0] 

346 

347 def __rfloordiv__(self, other): 

348 return Formula(other) // self 

349 

350 def __iter__(self, tree=None): 

351 if tree is None: 

352 tree = self._tree 

353 if isinstance(tree, str): 

354 yield tree 

355 elif isinstance(tree, tuple): 

356 tree, N = tree 

357 for _ in range(N): 

358 yield from self.__iter__(tree) 

359 else: 

360 for tree in tree: 

361 yield from self.__iter__(tree) 

362 

363 def __str__(self): 

364 return self._formula 

365 

366 def __repr__(self): 

367 return 'Formula({!r})'.format(self._formula) 

368 

369 def _reduce(self): 

370 N = 0 

371 for n in self._count.values(): 

372 if N == 0: 

373 N = n 

374 else: 

375 N = gcd(n, N) 

376 dct = {symb: n // N for symb, n in self._count.items()} 

377 return dct, N 

378 

379 def _tostr(self, sub1, sub2): 

380 parts = [] 

381 for tree, n in self._tree: 

382 s = tree2str(tree, sub1, sub2) 

383 if s[0] == '(' and s[-1] == ')': 

384 s = s[1:-1] 

385 if n > 1: 

386 s = str(n) + s 

387 parts.append(s) 

388 return '+'.join(parts) 

389 

390 

391def dict2str(dct: Dict[str, int]) -> str: 

392 """Convert symbol-to-number dict to str. 

393 

394 >>> dict2str({'A': 1, 'B': 2}) 

395 'AB2' 

396 """ 

397 return ''.join(symb + (str(n) if n > 1 else '') 

398 for symb, n in dct.items()) 

399 

400 

401def parse(f: str) -> Tree: 

402 """Convert formula string to tree structure. 

403 

404 >>> parse('2A+BC2') 

405 [('A', 2), (['B', ('C', 2)], 1)] 

406 """ 

407 if not f: 

408 return [] 

409 parts = f.split('+') 

410 result = [] 

411 for part in parts: 

412 n, f = strip_number(part) 

413 result.append((parse2(f), n)) 

414 return result 

415 

416 

417def parse2(f: str) -> Tree: 

418 """Convert formula string to tree structure (no "+" symbols). 

419 

420 >>> parse('10(H2O)') 

421 [(([('H', 2), 'O'], 1), 10)] 

422 """ 

423 units = [] 

424 while f: 

425 unit: Union[str, Tuple[str, int], Tree] 

426 if f[0] == '(': 

427 level = 0 

428 for i, c in enumerate(f[1:], 1): 

429 if c == '(': 

430 level += 1 

431 elif c == ')': 

432 if level == 0: 

433 break 

434 level -= 1 

435 else: 

436 raise ValueError 

437 f2 = f[1:i] 

438 n, f = strip_number(f[i + 1:]) 

439 unit = (parse2(f2), n) 

440 else: 

441 m = re.match('([A-Z][a-z]?)([0-9]*)', f) 

442 if m is None: 

443 raise ValueError 

444 symb = m.group(1) 

445 number = m.group(2) 

446 if number: 

447 unit = (symb, int(number)) 

448 else: 

449 unit = symb 

450 f = f[m.end():] 

451 units.append(unit) 

452 if len(units) == 1: 

453 return unit 

454 return units 

455 

456 

457def strip_number(s: str) -> Tuple[int, str]: 

458 """Strip leading nuimber. 

459 

460 >>> strip_number('10AB2') 

461 (10, 'AB2') 

462 >>> strip_number('AB2') 

463 (1, 'AB2') 

464 """ 

465 m = re.match('[0-9]*', s) 

466 assert m is not None 

467 return int(m.group() or 1), s[m.end():] 

468 

469 

470def tree2str(tree: Tree, 

471 sub1: str, sub2: str) -> str: 

472 """Helper function for html, latex and rest formats.""" 

473 if isinstance(tree, str): 

474 return tree 

475 if isinstance(tree, tuple): 

476 tree, N = tree 

477 s = tree2str(tree, sub1, sub2) 

478 if N == 1: 

479 if s[0] == '(' and s[-1] == ')': 

480 return s[1:-1] 

481 return s 

482 return s + sub1 + str(N) + sub2 

483 return '(' + ''.join(tree2str(tree, sub1, sub2) for tree in tree) + ')' 

484 

485 

486def count_tree(tree: Tree) -> Dict[str, int]: 

487 if isinstance(tree, str): 

488 return {tree: 1} 

489 if isinstance(tree, tuple): 

490 tree, N = tree 

491 return {symb: n * N for symb, n in count_tree(tree).items()} 

492 dct = {} # type: Dict[str, int] 

493 for tree in tree: 

494 for symb, n in count_tree(tree).items(): 

495 m = dct.get(symb, 0) 

496 dct[symb] = m + n 

497 return dct 

498 

499 

500# non metals, half-metals/metalloid, halogen, noble gas: 

501non_metals = ['H', 'He', 'B', 'C', 'N', 'O', 'F', 'Ne', 

502 'Si', 'P', 'S', 'Cl', 'Ar', 

503 'Ge', 'As', 'Se', 'Br', 'Kr', 

504 'Sb', 'Te', 'I', 'Xe', 

505 'Po', 'At', 'Rn'] 

506 

507 

508@lru_cache() 

509def periodic_table_order() -> Dict[str, int]: 

510 """Create dict for sorting after period first then row.""" 

511 return {symbol: n for n, symbol in enumerate(chemical_symbols[87:] + 

512 chemical_symbols[55:87] + 

513 chemical_symbols[37:55] + 

514 chemical_symbols[19:37] + 

515 chemical_symbols[11:19] + 

516 chemical_symbols[3:11] + 

517 chemical_symbols[1:3])} 

518 

519 

520# Backwards compatibility: 

521def formula_hill(numbers, empirical=False): 

522 """Convert list of atomic numbers to a chemical formula as a string. 

523 

524 Elements are alphabetically ordered with C and H first. 

525 

526 If argument `empirical`, element counts will be divided by greatest common 

527 divisor to yield an empirical formula""" 

528 symbols = [chemical_symbols[Z] for Z in numbers] 

529 f = Formula('', _tree=[(symbols, 1)]) 

530 if empirical: 

531 f, _ = f.reduce() 

532 return f.format('hill') 

533 

534 

535# Backwards compatibility: 

536def formula_metal(numbers, empirical=False): 

537 """Convert list of atomic numbers to a chemical formula as a string. 

538 

539 Elements are alphabetically ordered with metals first. 

540 

541 If argument `empirical`, element counts will be divided by greatest common 

542 divisor to yield an empirical formula""" 

543 symbols = [chemical_symbols[Z] for Z in numbers] 

544 f = Formula('', _tree=[(symbols, 1)]) 

545 if empirical: 

546 f, _ = f.reduce() 

547 return f.format('metal')