microsoft/qdk

Public

mirrored fromhttps://github.com/microsoft/qdkAvailable

CodeCommitsIssuesPull requestsActionsInsightsSecurity
alex/pythontelem

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

samples/estimation/df-chemistry/chemistry.py

577lines · modecode

1# Copyright (c) Microsoft Corporation. All rights reserved.
2# Licensed under the MIT License.
3import json
4import math
5import numpy as np
6import numpy.typing as npt
7import qsharp
8from argparse import ArgumentParser
9from dataclasses import dataclass
10from scipy import linalg as LA
11from urllib.parse import urlparse
12from urllib.request import urlretrieve
13
14
15@dataclass
16class FCIDumpFileContent:
17 num_orbitals: int
18 one_body_terms: list[tuple[list[int], float]]
19 two_body_terms: list[tuple[list[int], float]]
20
21 @classmethod
22 def from_file(self, file_url):
23 """Read FCIDump file (given by either a local path or a URL) and parse
24 it to get the input data for double-factorized chemistry algorithm."""
25 url = urlparse(file_url)
26
27 file_name = ""
28 if url.scheme in ["http", "https"]:
29 # Download the file
30 file_name = url.path.rsplit("/", 1)[-1]
31 print(f"Downloading {file_url} to {file_name}...")
32 urlretrieve(file_url, file_name)
33 elif url.scheme in ["", "file"]:
34 # Use the whole URL as the file path
35 file_name = file_url
36 else:
37 raise f"Unsupported file location {file_url}"
38
39 with open(file_name, "r") as f:
40 lines = [str.strip() for str in f.readlines()]
41
42 assert lines[0].startswith("&FCI")
43
44 parse_header = True
45 header = ""
46 header_values = {}
47 self.one_body_terms = []
48 self.two_body_terms = []
49
50 for line in lines:
51 if parse_header:
52 # File header might not have key-value pairs in separate lines,
53 # so accumulate the whole header first and then parse.
54 header += line + " "
55 if line.endswith("&END"):
56 # Strip "&FCI" and "&END"
57 rest_header = header[4:-4]
58
59 # Find each key-value pair based on the "=" sign
60 while (ind := rest_header.find("=")) > -1:
61 key = rest_header[0:ind].strip()
62 rest_header = rest_header[ind + 1 :]
63 # Figure out which part of the rest is value: until
64 # either whitespace or (if key is not ORBSYM) comma
65 ind = rest_header.find("," if key != "ORBSYM" else " ")
66 value = rest_header[0:ind].strip()
67 rest_header = rest_header[ind + 1 :]
68 if key == "ORBSYM":
69 value = value[:-1]
70 header_values[key] = value
71 parse_header = False
72 else:
73 tokens = line.split()
74 coefficient = float(tokens[0])
75 indices = [int(str) - 1 for str in tokens[1:] if str != "0"]
76
77 if len(indices) == 2:
78 indices.sort()
79 self.one_body_terms.append((indices, coefficient))
80 elif len(indices) == 4:
81 # conversion from Mulliken to Dirac
82 i = indices[0]
83 j = indices[2]
84 k = indices[3]
85 l = indices[1]
86
87 symmetries = [
88 [i, j, k, l],
89 [j, i, l, k],
90 [k, l, i, j],
91 [l, k, j, i],
92 [i, k, j, l],
93 [k, i, l, j],
94 [j, l, i, k],
95 [l, j, k, i],
96 ]
97 symmetries.sort()
98 self.two_body_terms.append((symmetries[0], coefficient))
99
100 self.num_orbitals = int(header_values["NORB"])
101
102 return self
103
104
105@dataclass
106class TwoBodyResult:
107 eigenvalues: list[npt.NDArray[np.float64]]
108 eigenvectors: list[npt.NDArray[np.float64]]
109 one_norms: list[float]
110 two_norms: list[float]
111
112
113@dataclass
114class DoubleFactorization:
115 num_orbitals: int
116 rank: int
117 one_body_norm: float
118 two_body_norm: float
119 one_body_eigenvalues: list[float]
120 one_body_eigenvectors: npt.NDArray[npt.NDArray[np.float64]]
121 two_body_eigenvalues: npt.NDArray[npt.NDArray[np.float64]]
122 two_body_eigenvectors: list[npt.NDArray[npt.NDArray[np.float64]]]
123
124 @classmethod
125 def process_fcidump(self, structure: FCIDumpFileContent, error: float):
126
127 # The `structure` provides us with one- and two electron integrals
128 # h_{ij} and h_{ijkl} as described in Eq. (2). These coefficients are
129 # real and satisfy some symmetries which are outlined in Eq. (6).
130
131 # In this step we compute R (`rank`) as in Eq. (7), h_{ij}
132 # (`one_electron_vector`), and L_{ij}^{(r)} (`two_electron_vectors`).
133 (rank, eigenvalue_signs, one_electron_vector, two_electron_vectors) = (
134 self.perform_svd(structure)
135 )
136
137 # This computes Eq. (15). It stores L^{-1}_{ij} into
138 # `one_body_eigenvectors`, which is repurposed from
139 # `one_electron_vector` and uses the same indices. It also returns the
140 # Schatten norm, which is the first summand in Eq. (16).
141 (one_body_eigenvalues, one_body_eigenvectors, one_body_norm, norm2) = (
142 self.process_one_body_term(
143 structure.num_orbitals,
144 rank,
145 one_electron_vector,
146 eigenvalue_signs,
147 two_electron_vectors,
148 )
149 )
150
151 assert len(one_body_eigenvectors) == structure.num_orbitals**2
152
153 # This computes the second sum of Eq. (9), it computes the eigenvalues
154 # \lambda_m^{(r)} and eigenvectors $\vec R_{m,i}^{(r)}$. It does not
155 # normalize the vectors, but returns the one- and two-norms of the
156 # eigenvalues.
157 two_body_result = self.process_two_body_terms(
158 structure.num_orbitals, rank, two_electron_vectors
159 )
160
161 # Discard terms that will make the description exceed the error budget
162 self.truncate_terms(structure.num_orbitals, error, two_body_result)
163
164 # This computes the second summand in Eq. (16).
165 two_body_norm = 0.0
166 two_body_norm += sum(0.25 * norm * norm for norm in two_body_result.one_norms)
167
168 # Reshape the one body eigenvectors so that they are represented
169 # row-wise in a 2D array
170 one_body_eigenvectors = np.reshape(
171 one_body_eigenvectors, (structure.num_orbitals, structure.num_orbitals)
172 )
173
174 for i in range(len(two_body_result.eigenvectors)):
175 cols = structure.num_orbitals
176 rows = int(len(two_body_result.eigenvectors[i]) / cols)
177 assert rows * cols == len(two_body_result.eigenvectors[i])
178
179 # Reshape the two body eigenvectors so that they represented
180 # row-wise in a 2D array
181 two_body_result.eigenvectors[i] = np.reshape(
182 two_body_result.eigenvectors[i], (rows, cols)
183 )
184
185 self.num_orbitals = structure.num_orbitals
186 # Use the post-truncation rank
187 self.rank = len(two_body_result.eigenvectors)
188 self.one_body_norm = one_body_norm
189 self.two_body_norm = two_body_norm
190 self.one_body_eigenvalues = one_body_eigenvalues
191 self.one_body_eigenvectors = one_body_eigenvectors
192 self.two_body_eigenvalues = two_body_result.eigenvalues
193 self.two_body_eigenvectors = two_body_result.eigenvectors
194
195 return self
196
197 def combined_index(i: int, j: int) -> int:
198 return int(max(i, j) * (max(i, j) + 1) / 2 + min(i, j))
199
200 def vectors_to_sym_mat(
201 vector: npt.NDArray[float], dimension: int
202 ) -> npt.NDArray[npt.NDArray[float]]:
203 matrix = np.zeros((dimension, dimension), dtype=float)
204
205 # Create lower triangular matrix
206 matrix[np.tril_indices(dimension, 0)] = vector
207
208 # Convert lower triangular matrix to symmetrix matrix
209 matrix = matrix + matrix.T
210
211 # Halve elements of diagonal to avoid doubling
212 matrix = matrix - 0.5 * np.diag(np.diagonal(matrix, 0))
213
214 return matrix
215
216 @classmethod
217 def populate_two_body_terms(
218 self, two_body_terms: list[tuple[list[int], float]]
219 ) -> list[tuple[list[int], float]]:
220 complete_two_body_terms = []
221
222 for [i, j, k, l], val in two_body_terms:
223 ii = self.combined_index(i, l)
224 jj = self.combined_index(j, k)
225
226 complete_two_body_terms.append(([ii, jj], val))
227 if ii != jj:
228 complete_two_body_terms.append(([jj, ii], val))
229
230 return complete_two_body_terms
231
232 @classmethod
233 def eigen_svd(
234 self, orbitals: int, two_body_terms: list[tuple[list[int], float]]
235 ) -> (int, list[int], list[float]):
236 # combined = nC2 for n = orbitals
237 combined = int(orbitals * (orbitals + 1) / 2)
238
239 coeff_matrix = np.zeros((combined, combined), dtype=float)
240 for [i, j], v in two_body_terms:
241 coeff_matrix[int(i)][int(j)] = v
242
243 # Compute the eigen decomposition of the symmetric coefficient matrix
244 # with two body terms
245 evals, evecs = LA.eigh(coeff_matrix)
246 rows, cols = np.shape(evecs)
247
248 evals_signs = np.zeros(cols, dtype=int)
249
250 # let eigenvectors be represented as row vectors
251 evecs = np.transpose(evecs)
252
253 # Scale eigenvector by square root of corresponding eigenvalue
254 for i in range(len(evals)):
255 evecs[i] = math.sqrt(abs(evals[i])) * evecs[i]
256 evals_signs[i] = np.sign(evals[i])
257
258 # Collect eigenvectors as 1D array
259 scaled_evecs_1D = np.reshape(evecs, cols * rows)
260
261 return (cols, evals_signs.tolist(), scaled_evecs_1D)
262
263 @classmethod
264 def perform_svd(
265 self, structure: FCIDumpFileContent
266 ) -> (int, npt.NDArray[int], npt.NDArray[float], npt.NDArray[float]):
267
268 full_two_body_terms = self.populate_two_body_terms(structure.two_body_terms)
269
270 # Compute the eigen decomposition of two-electron terms
271 (rank, eigenvalue_signs, two_electron_vectors) = self.eigen_svd(
272 structure.num_orbitals, full_two_body_terms
273 )
274
275 length = (
276 self.combined_index(structure.num_orbitals - 1, structure.num_orbitals - 1)
277 + 1
278 )
279 one_electron_vector = np.zeros((length), dtype=float)
280
281 # Collect one-electron terms into a single 2D array
282 for [i, j], v in structure.one_body_terms:
283 one_electron_vector[self.combined_index(i, j)] = v
284
285 return (rank, eigenvalue_signs, one_electron_vector, two_electron_vectors)
286
287 @classmethod
288 def eigen_decomp(
289 self, dimension: int, vector: npt.NDArray[float]
290 ) -> (npt.NDArray[float], npt.NDArray[float], float, float):
291 matrix = np.zeros((dimension, dimension), dtype=float)
292
293 # Create lower triangular matrix
294 matrix[np.tril_indices(dimension, 0)] = vector
295
296 # Compute the eigen decomposition of the lower triangular matrix
297 evals, evecs = LA.eigh(matrix, lower=True)
298 norm1 = np.linalg.norm(evals, 1)
299 norm2 = np.linalg.norm(evals)
300
301 (rows, cols) = np.shape(evecs)
302 evecs_1D = np.reshape(np.transpose(evecs), cols * rows)
303
304 return (evals, evecs_1D, norm1, norm2)
305
306 @classmethod
307 def process_one_body_term(
308 self,
309 orbitals: int,
310 rank: int,
311 one_electron_vector: npt.NDArray[float],
312 eigenvalue_signs: npt.NDArray[bool],
313 two_electron_vectors: npt.NDArray[float],
314 ) -> (list[float], npt.NDArray[float], float, float):
315 # combined = nC2 for n = orbitals
316 combined = int(orbitals * (orbitals + 1) / 2)
317
318 vector = np.zeros(combined, dtype=float)
319
320 for l in range(rank):
321 matrix = np.zeros((orbitals, orbitals), dtype=float)
322 H_l = self.vectors_to_sym_mat(
323 two_electron_vectors[range(combined * l, combined * (l + 1))], orbitals
324 )
325
326 H_issj = eigenvalue_signs[l] * np.matmul(H_l, H_l)
327
328 H_ssij = eigenvalue_signs[l] * np.trace(H_l) * H_l
329
330 matrix = -0.5 * H_issj + H_ssij
331
332 # Convert symmetric matrix to lower triangular matrix
333 vector += matrix[np.tril_indices(orbitals, 0)]
334
335 one_electron_vector += vector
336
337 (one_body_eigenvalues, one_body_eigenvectors, one_body_norm, norm2) = (
338 self.eigen_decomp(orbitals, one_electron_vector)
339 )
340
341 return (one_body_eigenvalues, one_body_eigenvectors, one_body_norm, norm2)
342
343 @classmethod
344 def process_two_body_terms(
345 self, orbitals: int, rank: int, two_electron_vectors: npt.NDArray[float]
346 ) -> TwoBodyResult:
347
348 two_body_eigenvectors = []
349 two_body_eigenvalues = []
350 one_norms = []
351 two_norms = []
352 combined = int(orbitals * (orbitals + 1) / 2)
353
354 for i in range(rank):
355 matrix = two_electron_vectors[range(combined * i, combined * (i + 1))]
356 (evals, evecs, norm1, norm2) = self.eigen_decomp(orbitals, matrix)
357
358 two_body_eigenvalues.append(evals)
359 two_body_eigenvectors.append(evecs)
360 one_norms.append(norm1)
361 two_norms.append(norm2)
362
363 two_body_result = TwoBodyResult(
364 two_body_eigenvalues, two_body_eigenvectors, one_norms, two_norms
365 )
366
367 return two_body_result
368
369 @classmethod
370 def truncate_terms(
371 self, orbitals: int, error_eigenvalues: float, two_body_result: TwoBodyResult
372 ):
373 values_with_error = []
374 for r, values in enumerate(two_body_result.eigenvalues):
375 for i, v in enumerate(values):
376 error = abs(v) * two_body_result.two_norms[r]
377 values_with_error.append((error, r, i))
378
379 # Sort in ascending order by error values
380 values_with_error = sorted(values_with_error, key=lambda tup: tup[0])
381
382 # Truncate the list so that the sum of squares of errors left is less
383 # than the square of the input error
384 total_error = 0
385 truncate = len(values_with_error)
386 for i, (error, _, _) in enumerate(values_with_error):
387 error_compare = error**2
388 if total_error + error_compare < error_eigenvalues**2:
389 total_error += error_compare
390 else:
391 truncate = i
392 break
393
394 # Keep the first `truncate` values
395 values_with_error = values_with_error[:truncate]
396
397 indices_by_rank = []
398 for _, r, i in values_with_error:
399 while r >= len(indices_by_rank):
400 indices_by_rank.append([])
401 indices_by_rank[r].append(i)
402
403 for r, indices in reversed(list(enumerate(indices_by_rank))):
404 if len(indices) == orbitals:
405 # All indices are to be removed: fully remove the r^th entry
406 # for the norms, eigenvalues and eigenvectors.
407 del two_body_result.eigenvalues[r]
408 del two_body_result.eigenvectors[r]
409 del two_body_result.one_norms[r]
410 del two_body_result.two_norms[r]
411 else:
412 indices.sort()
413 # Remove only eigenvalues/vectors corresponding to `indices`
414 two_body_result.eigenvalues[r] = np.delete(
415 arr=two_body_result.eigenvalues[r], obj=indices
416 )
417 arr = np.reshape(two_body_result.eigenvectors[r], (orbitals, orbitals))
418 arr = np.delete(arr=arr, obj=indices, axis=0)
419 (rows, cols) = np.shape(arr)
420 two_body_result.eigenvectors[r] = np.reshape(arr, rows * cols)
421
422 # Check that same number of terms are removed for norms,
423 # eigenvalues and eigenvectors
424 assert (
425 len(two_body_result.one_norms) == len(two_body_result.two_norms)
426 and len(two_body_result.one_norms) == len(two_body_result.eigenvalues)
427 and len(two_body_result.one_norms) == len(two_body_result.eigenvectors)
428 )
429
430
431# Convert 2D array into string representation
432def ndarray2d_to_string(arr):
433 str_arr = []
434 for elem in arr:
435 str_arr.append(np.array2string(elem, separator=","))
436 return f"[{','.join(str_arr)}]"
437
438# The script takes one required positional argument, URI of the FCIDUMP file
439parser = ArgumentParser(description="Double-factorized chemistry sample")
440# Use n2-10e-8o as the default sample.
441# Pass a different filename to get estimates for different compounds
442parser.add_argument(
443 "-f",
444 "--fcidumpfile",
445 default="https://aka.ms/fcidump/n2-10e-8o",
446 help="Path to the FCIDUMP file describing the Hamiltonian",
447)
448parser.add_argument(
449 "-p",
450 "--paramsfile",
451 nargs="*",
452 help="Optional parameter files to use for estimation",
453)
454args = parser.parse_args()
455
456# ----- Read the FCIDUMP file and get resource estimates from Q# algorithm -----
457structure = FCIDumpFileContent.from_file(args.fcidumpfile)
458df = DoubleFactorization.process_fcidump(structure, 0.001)
459
460# Load Q# project
461qsharp.init(project_root=".")
462
463# Construct the Q# operation call for which we need to perform resource estimate
464str_one_body_eigenvalues = np.array2string(df.one_body_eigenvalues, separator=",")
465
466str_one_body_eigenvectors = ndarray2d_to_string(df.one_body_eigenvectors)
467
468str_two_body_eigenvalues = ndarray2d_to_string(df.two_body_eigenvalues)
469
470str_two_body_eigenvectors = (
471 "["
472 + ",".join(
473 [ndarray2d_to_string(eigenvectors) for eigenvectors in df.two_body_eigenvectors]
474 )
475 + "]"
476)
477
478qsharp_string = (
479 "Microsoft.Quantum.Applications.Chemistry.DoubleFactorizedChemistry("
480 "Microsoft.Quantum.Applications.Chemistry.DoubleFactorizedChemistryProblem("
481 f"{df.num_orbitals}, {df.one_body_norm}, {df.two_body_norm}, "
482 f"{str_one_body_eigenvalues}, {str_one_body_eigenvectors}, "
483 f"[1.0, size = {df.rank}], {str_two_body_eigenvalues}, "
484 f"{str_two_body_eigenvectors}),"
485 "Microsoft.Quantum.Applications.Chemistry.DoubleFactorizedChemistryParameters(0.001,))"
486)
487
488# Collect resource estimation parameters
489if args.paramsfile is None:
490 params = [
491 {
492 "errorBudget": 0.01,
493 "qubitParams": {"name": "qubit_maj_ns_e6"},
494 "qecScheme": {"name": "floquet_code"},
495 },
496 {
497 "qubitParams": { "name": "qubit_gate_us_e3" },
498 "estimateType": "frontier",
499 }
500 ]
501else:
502 params = []
503 for paramsfile in args.paramsfile:
504 with open(paramsfile) as f:
505 data = json.load(f)
506 if isinstance(data, dict):
507 params.append(data)
508 else:
509 params += data
510
511# Get resource estimates
512res = qsharp.estimate(
513 qsharp_string,
514 params=params,
515)
516
517# Store estimates in json file
518with open("resource_estimate.json", "w") as f:
519 f.write(res.json)
520
521result_obj = json.loads(res.json)
522if isinstance(result_obj,dict):
523 result_obj = [ result_obj ]
524
525data = []
526for item in result_obj:
527 data_item = []
528
529 # Run name
530 data_item.append(item["jobParams"]["qubitParams"]["name"])
531
532 # T factory fraction and Runtime
533 if "physicalCountsFormatted" in item:
534 data_item.append(item["physicalCountsFormatted"]["physicalQubitsForTfactoriesPercentage"])
535 data_item.append(item["physicalCountsFormatted"]["runtime"])
536 elif "frontierEntries" in item:
537 data_item.append(item["frontierEntries"][0]["physicalCountsFormatted"]["physicalQubitsForTfactoriesPercentage"])
538 data_item.append(item["frontierEntries"][0]["physicalCountsFormatted"]["runtime"])
539 else:
540 data_item.append("-")
541 data_item.append("-")
542
543 # Physical qubits and rQOPS
544 if "physicalCounts" in item:
545 data_item.append(item["physicalCounts"]["physicalQubits"])
546 data_item.append(item["physicalCounts"]["rqops"])
547 elif "frontierEntries" in item:
548 data_item.append(item["frontierEntries"][0]["physicalCounts"]["physicalQubits"])
549 data_item.append(item["frontierEntries"][0]["physicalCounts"]["rqops"])
550 else:
551 data_item.append("-")
552 data_item.append("-")
553
554 data.append(data_item)
555
556# Define the table headers
557headers = ["Run name", "T factory fraction", "Runtime", "Physical qubits", "rQOPS"]
558
559# Determine the width of each column
560col_widths = [max(len(str(item)) for item in column) for column in zip(headers, *data)]
561
562# Function to format a row
563def format_row(row):
564 return " | ".join(f"{str(item).ljust(width)}" for item, width in zip(row, col_widths))
565
566# Create the table
567header_row = format_row(headers)
568separator_row = "-+-".join("-" * width for width in col_widths)
569data_rows = [format_row(row) for row in data]
570
571# Print the table
572print(header_row)
573print(separator_row)
574for row in data_rows:
575 print(row)
576
577print("For more detailed resource counts, see file resource_estimate.json")
578