microsoft/qdk

Public

mirrored fromhttps://github.com/microsoft/qdkAvailable

CodeCommitsIssuesPull requestsActionsInsightsSecurity
iadavis/3208-leak-fixes

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

samples/estimation/df-chemistry/chemistry.py

588lines · modecode

1# Copyright (c) Microsoft Corporation. All rights reserved.
2# Licensed under the MIT License.
3import json
4import math
5import numpy as np
6import numpy.linalg as LA
7import numpy.typing as npt
8from qdk import qsharp
9from argparse import ArgumentParser
10from dataclasses import dataclass
11from pathlib import Path
12from urllib.parse import urlparse
13from urllib.request import urlretrieve
14
15
16@dataclass
17class FCIDumpFileContent:
18 num_orbitals: int
19 one_body_terms: list[tuple[list[int], float]]
20 two_body_terms: list[tuple[list[int], float]]
21
22 @classmethod
23 def from_file(self, file_url):
24 """Read FCIDump file (given by either a local path or a URL) and parse
25 it to get the input data for double-factorized chemistry algorithm."""
26 url = urlparse(file_url)
27
28 file_name = ""
29 if url.scheme in ["http", "https"]:
30 # Download the file
31 file_name = url.path.rsplit("/", 1)[-1]
32 print(f"Downloading {file_url} to {file_name}...")
33 urlretrieve(file_url, file_name)
34 elif url.scheme in ["", "file"]:
35 # Use the whole URL as the file path
36 file_name = file_url
37 else:
38 raise f"Unsupported file location {file_url}"
39
40 with open(file_name, "r") as f:
41 lines = [str.strip() for str in f.readlines()]
42
43 assert lines[0].startswith("&FCI")
44
45 parse_header = True
46 header = ""
47 header_values = {}
48 self.one_body_terms = []
49 self.two_body_terms = []
50
51 for line in lines:
52 if parse_header:
53 # File header might not have key-value pairs in separate lines,
54 # so accumulate the whole header first and then parse.
55 header += line + " "
56 if line.endswith("&END"):
57 # Strip "&FCI" and "&END"
58 rest_header = header[4:-4]
59
60 # Find each key-value pair based on the "=" sign
61 while (ind := rest_header.find("=")) > -1:
62 key = rest_header[0:ind].strip()
63 rest_header = rest_header[ind + 1 :]
64 # Figure out which part of the rest is value: until
65 # either whitespace or (if key is not ORBSYM) comma
66 ind = rest_header.find("," if key != "ORBSYM" else " ")
67 value = rest_header[0:ind].strip()
68 rest_header = rest_header[ind + 1 :]
69 if key == "ORBSYM":
70 value = value[:-1]
71 header_values[key] = value
72 parse_header = False
73 else:
74 tokens = line.split()
75 coefficient = float(tokens[0])
76 indices = [int(str) - 1 for str in tokens[1:] if str != "0"]
77
78 if len(indices) == 2:
79 indices.sort()
80 self.one_body_terms.append((indices, coefficient))
81 elif len(indices) == 4:
82 # conversion from Mulliken to Dirac
83 i = indices[0]
84 j = indices[2]
85 k = indices[3]
86 l = indices[1]
87
88 symmetries = [
89 [i, j, k, l],
90 [j, i, l, k],
91 [k, l, i, j],
92 [l, k, j, i],
93 [i, k, j, l],
94 [k, i, l, j],
95 [j, l, i, k],
96 [l, j, k, i],
97 ]
98 symmetries.sort()
99 self.two_body_terms.append((symmetries[0], coefficient))
100
101 self.num_orbitals = int(header_values["NORB"])
102
103 return self
104
105
106@dataclass
107class TwoBodyResult:
108 eigenvalues: list[npt.NDArray[np.float64]]
109 eigenvectors: list[npt.NDArray[np.float64]]
110 one_norms: list[float]
111 two_norms: list[float]
112
113
114@dataclass
115class DoubleFactorization:
116 num_orbitals: int
117 rank: int
118 one_body_norm: float
119 two_body_norm: float
120 one_body_eigenvalues: list[float]
121 one_body_eigenvectors: npt.NDArray[npt.NDArray[np.float64]]
122 two_body_eigenvalues: npt.NDArray[npt.NDArray[np.float64]]
123 two_body_eigenvectors: list[npt.NDArray[npt.NDArray[np.float64]]]
124
125 @classmethod
126 def process_fcidump(self, structure: FCIDumpFileContent, error: float):
127
128 # The `structure` provides us with one- and two electron integrals
129 # h_{ij} and h_{ijkl} as described in Eq. (2). These coefficients are
130 # real and satisfy some symmetries which are outlined in Eq. (6).
131
132 # In this step we compute R (`rank`) as in Eq. (7), h_{ij}
133 # (`one_electron_vector`), and L_{ij}^{(r)} (`two_electron_vectors`).
134 (rank, eigenvalue_signs, one_electron_vector, two_electron_vectors) = (
135 self.perform_svd(structure)
136 )
137
138 # This computes Eq. (15). It stores L^{-1}_{ij} into
139 # `one_body_eigenvectors`, which is repurposed from
140 # `one_electron_vector` and uses the same indices. It also returns the
141 # Schatten norm, which is the first summand in Eq. (16).
142 (one_body_eigenvalues, one_body_eigenvectors, one_body_norm, norm2) = (
143 self.process_one_body_term(
144 structure.num_orbitals,
145 rank,
146 one_electron_vector,
147 eigenvalue_signs,
148 two_electron_vectors,
149 )
150 )
151
152 assert len(one_body_eigenvectors) == structure.num_orbitals**2
153
154 # This computes the second sum of Eq. (9), it computes the eigenvalues
155 # \lambda_m^{(r)} and eigenvectors $\vec R_{m,i}^{(r)}$. It does not
156 # normalize the vectors, but returns the one- and two-norms of the
157 # eigenvalues.
158 two_body_result = self.process_two_body_terms(
159 structure.num_orbitals, rank, two_electron_vectors
160 )
161
162 # Discard terms that will make the description exceed the error budget
163 self.truncate_terms(structure.num_orbitals, error, two_body_result)
164
165 # This computes the second summand in Eq. (16).
166 two_body_norm = 0.0
167 two_body_norm += sum(0.25 * norm * norm for norm in two_body_result.one_norms)
168
169 # Reshape the one body eigenvectors so that they are represented
170 # row-wise in a 2D array
171 one_body_eigenvectors = np.reshape(
172 one_body_eigenvectors, (structure.num_orbitals, structure.num_orbitals)
173 )
174
175 for i in range(len(two_body_result.eigenvectors)):
176 cols = structure.num_orbitals
177 rows = int(len(two_body_result.eigenvectors[i]) / cols)
178 assert rows * cols == len(two_body_result.eigenvectors[i])
179
180 # Reshape the two body eigenvectors so that they represented
181 # row-wise in a 2D array
182 two_body_result.eigenvectors[i] = np.reshape(
183 two_body_result.eigenvectors[i], (rows, cols)
184 )
185
186 self.num_orbitals = structure.num_orbitals
187 # Use the post-truncation rank
188 self.rank = len(two_body_result.eigenvectors)
189 self.one_body_norm = one_body_norm
190 self.two_body_norm = two_body_norm
191 self.one_body_eigenvalues = one_body_eigenvalues
192 self.one_body_eigenvectors = one_body_eigenvectors
193 self.two_body_eigenvalues = two_body_result.eigenvalues
194 self.two_body_eigenvectors = two_body_result.eigenvectors
195
196 return self
197
198 def combined_index(i: int, j: int) -> int:
199 return int(max(i, j) * (max(i, j) + 1) / 2 + min(i, j))
200
201 def vectors_to_sym_mat(
202 vector: npt.NDArray[float], dimension: int
203 ) -> npt.NDArray[npt.NDArray[float]]:
204 matrix = np.zeros((dimension, dimension), dtype=float)
205
206 # Create lower triangular matrix
207 matrix[np.tril_indices(dimension, 0)] = vector
208
209 # Convert lower triangular matrix to symmetrix matrix
210 matrix = matrix + matrix.T
211
212 # Halve elements of diagonal to avoid doubling
213 matrix = matrix - 0.5 * np.diag(np.diagonal(matrix, 0))
214
215 return matrix
216
217 @classmethod
218 def populate_two_body_terms(
219 self, two_body_terms: list[tuple[list[int], float]]
220 ) -> list[tuple[list[int], float]]:
221 complete_two_body_terms = []
222
223 for [i, j, k, l], val in two_body_terms:
224 ii = self.combined_index(i, l)
225 jj = self.combined_index(j, k)
226
227 complete_two_body_terms.append(([ii, jj], val))
228 if ii != jj:
229 complete_two_body_terms.append(([jj, ii], val))
230
231 return complete_two_body_terms
232
233 @classmethod
234 def eigen_svd(
235 self, orbitals: int, two_body_terms: list[tuple[list[int], float]]
236 ) -> (int, list[int], list[float]):
237 # combined = nC2 for n = orbitals
238 combined = int(orbitals * (orbitals + 1) / 2)
239
240 coeff_matrix = np.zeros((combined, combined), dtype=float)
241 for [i, j], v in two_body_terms:
242 coeff_matrix[int(i)][int(j)] = v
243
244 # Compute the eigen decomposition of the symmetric coefficient matrix
245 # with two body terms
246 evals, evecs = LA.eigh(coeff_matrix)
247 rows, cols = np.shape(evecs)
248
249 evals_signs = np.zeros(cols, dtype=int)
250
251 # let eigenvectors be represented as row vectors
252 evecs = np.transpose(evecs)
253
254 # Scale eigenvector by square root of corresponding eigenvalue
255 for i in range(len(evals)):
256 evecs[i] = math.sqrt(abs(evals[i])) * evecs[i]
257 evals_signs[i] = np.sign(evals[i])
258
259 # Collect eigenvectors as 1D array
260 scaled_evecs_1D = np.reshape(evecs, cols * rows)
261
262 return (cols, evals_signs.tolist(), scaled_evecs_1D)
263
264 @classmethod
265 def perform_svd(
266 self, structure: FCIDumpFileContent
267 ) -> (int, npt.NDArray[int], npt.NDArray[float], npt.NDArray[float]):
268
269 full_two_body_terms = self.populate_two_body_terms(structure.two_body_terms)
270
271 # Compute the eigen decomposition of two-electron terms
272 (rank, eigenvalue_signs, two_electron_vectors) = self.eigen_svd(
273 structure.num_orbitals, full_two_body_terms
274 )
275
276 length = (
277 self.combined_index(structure.num_orbitals - 1, structure.num_orbitals - 1)
278 + 1
279 )
280 one_electron_vector = np.zeros((length), dtype=float)
281
282 # Collect one-electron terms into a single 2D array
283 for [i, j], v in structure.one_body_terms:
284 one_electron_vector[self.combined_index(i, j)] = v
285
286 return (rank, eigenvalue_signs, one_electron_vector, two_electron_vectors)
287
288 @classmethod
289 def eigen_decomp(
290 self, dimension: int, vector: npt.NDArray[float]
291 ) -> (npt.NDArray[float], npt.NDArray[float], float, float):
292 matrix = np.zeros((dimension, dimension), dtype=float)
293
294 # Create lower triangular matrix
295 matrix[np.tril_indices(dimension, 0)] = vector
296
297 # Compute the eigen decomposition of the lower triangular matrix
298 evals, evecs = LA.eigh(matrix, UPLO="L")
299 norm1 = LA.norm(evals, 1)
300 norm2 = LA.norm(evals)
301
302 (rows, cols) = np.shape(evecs)
303 evecs_1D = np.reshape(np.transpose(evecs), cols * rows)
304
305 return (evals, evecs_1D, norm1, norm2)
306
307 @classmethod
308 def process_one_body_term(
309 self,
310 orbitals: int,
311 rank: int,
312 one_electron_vector: npt.NDArray[float],
313 eigenvalue_signs: npt.NDArray[bool],
314 two_electron_vectors: npt.NDArray[float],
315 ) -> (list[float], npt.NDArray[float], float, float):
316 # combined = nC2 for n = orbitals
317 combined = int(orbitals * (orbitals + 1) / 2)
318
319 vector = np.zeros(combined, dtype=float)
320
321 for l in range(rank):
322 matrix = np.zeros((orbitals, orbitals), dtype=float)
323 H_l = self.vectors_to_sym_mat(
324 two_electron_vectors[range(combined * l, combined * (l + 1))], orbitals
325 )
326
327 H_issj = eigenvalue_signs[l] * np.matmul(H_l, H_l)
328
329 H_ssij = eigenvalue_signs[l] * np.trace(H_l) * H_l
330
331 matrix = -0.5 * H_issj + H_ssij
332
333 # Convert symmetric matrix to lower triangular matrix
334 vector += matrix[np.tril_indices(orbitals, 0)]
335
336 one_electron_vector += vector
337
338 (one_body_eigenvalues, one_body_eigenvectors, one_body_norm, norm2) = (
339 self.eigen_decomp(orbitals, one_electron_vector)
340 )
341
342 return (one_body_eigenvalues, one_body_eigenvectors, one_body_norm, norm2)
343
344 @classmethod
345 def process_two_body_terms(
346 self, orbitals: int, rank: int, two_electron_vectors: npt.NDArray[float]
347 ) -> TwoBodyResult:
348
349 two_body_eigenvectors = []
350 two_body_eigenvalues = []
351 one_norms = []
352 two_norms = []
353 combined = int(orbitals * (orbitals + 1) / 2)
354
355 for i in range(rank):
356 matrix = two_electron_vectors[range(combined * i, combined * (i + 1))]
357 (evals, evecs, norm1, norm2) = self.eigen_decomp(orbitals, matrix)
358
359 two_body_eigenvalues.append(evals)
360 two_body_eigenvectors.append(evecs)
361 one_norms.append(norm1)
362 two_norms.append(norm2)
363
364 two_body_result = TwoBodyResult(
365 two_body_eigenvalues, two_body_eigenvectors, one_norms, two_norms
366 )
367
368 return two_body_result
369
370 @classmethod
371 def truncate_terms(
372 self, orbitals: int, error_eigenvalues: float, two_body_result: TwoBodyResult
373 ):
374 values_with_error = []
375 for r, values in enumerate(two_body_result.eigenvalues):
376 for i, v in enumerate(values):
377 error = abs(v) * two_body_result.two_norms[r]
378 values_with_error.append((error, r, i))
379
380 # Sort in ascending order by error values
381 values_with_error = sorted(values_with_error, key=lambda tup: tup[0])
382
383 # Truncate the list so that the sum of squares of errors left is less
384 # than the square of the input error
385 total_error = 0
386 truncate = len(values_with_error)
387 for i, (error, _, _) in enumerate(values_with_error):
388 error_compare = error**2
389 if total_error + error_compare < error_eigenvalues**2:
390 total_error += error_compare
391 else:
392 truncate = i
393 break
394
395 # Keep the first `truncate` values
396 values_with_error = values_with_error[:truncate]
397
398 indices_by_rank = []
399 for _, r, i in values_with_error:
400 while r >= len(indices_by_rank):
401 indices_by_rank.append([])
402 indices_by_rank[r].append(i)
403
404 for r, indices in reversed(list(enumerate(indices_by_rank))):
405 if len(indices) == orbitals:
406 # All indices are to be removed: fully remove the r^th entry
407 # for the norms, eigenvalues and eigenvectors.
408 del two_body_result.eigenvalues[r]
409 del two_body_result.eigenvectors[r]
410 del two_body_result.one_norms[r]
411 del two_body_result.two_norms[r]
412 else:
413 indices.sort()
414 # Remove only eigenvalues/vectors corresponding to `indices`
415 two_body_result.eigenvalues[r] = np.delete(
416 arr=two_body_result.eigenvalues[r], obj=indices
417 )
418 arr = np.reshape(two_body_result.eigenvectors[r], (orbitals, orbitals))
419 arr = np.delete(arr=arr, obj=indices, axis=0)
420 (rows, cols) = np.shape(arr)
421 two_body_result.eigenvectors[r] = np.reshape(arr, rows * cols)
422
423 # Check that same number of terms are removed for norms,
424 # eigenvalues and eigenvectors
425 assert (
426 len(two_body_result.one_norms) == len(two_body_result.two_norms)
427 and len(two_body_result.one_norms) == len(two_body_result.eigenvalues)
428 and len(two_body_result.one_norms) == len(two_body_result.eigenvectors)
429 )
430
431
432# Convert 2D array into string representation
433def ndarray2d_to_string(arr):
434 str_arr = []
435 for elem in arr:
436 str_arr.append(np.array2string(elem, separator=","))
437 return f"[{','.join(str_arr)}]"
438
439
440# The script takes one required positional argument, URI of the FCIDUMP file
441parser = ArgumentParser(description="Double-factorized chemistry sample")
442# Use n2-10e-8o as the default sample.
443# Pass a different filename to get estimates for different compounds
444parser.add_argument(
445 "-f",
446 "--fcidumpfile",
447 default="https://aka.ms/fcidump/n2-10e-8o",
448 help="Path to the FCIDUMP file describing the Hamiltonian",
449)
450parser.add_argument(
451 "-p",
452 "--paramsfile",
453 nargs="*",
454 help="Optional parameter files to use for estimation",
455)
456args = parser.parse_args()
457
458# ----- Read the FCIDUMP file and get resource estimates from Q# algorithm -----
459structure = FCIDumpFileContent.from_file(args.fcidumpfile)
460df = DoubleFactorization.process_fcidump(structure, 0.001)
461
462# Load Q# project
463this_dir = Path(__file__).parent
464qsharp.init(project_root=this_dir)
465
466# Construct the Q# operation call for which we need to perform resource estimate
467str_one_body_eigenvalues = np.array2string(df.one_body_eigenvalues, separator=",")
468
469str_one_body_eigenvectors = ndarray2d_to_string(df.one_body_eigenvectors)
470
471str_two_body_eigenvalues = ndarray2d_to_string(df.two_body_eigenvalues)
472
473str_two_body_eigenvectors = (
474 "["
475 + ",".join(
476 [ndarray2d_to_string(eigenvectors) for eigenvectors in df.two_body_eigenvectors]
477 )
478 + "]"
479)
480
481qsharp_string = (
482 "DoubleFactorizedChemistry.DoubleFactorizedChemistry("
483 "DoubleFactorizedChemistry.DoubleFactorizedChemistryProblem("
484 f"{df.num_orbitals}, {df.one_body_norm}, {df.two_body_norm}, "
485 f"{str_one_body_eigenvalues}, {str_one_body_eigenvectors}, "
486 f"[1.0, size = {df.rank}], {str_two_body_eigenvalues}, "
487 f"{str_two_body_eigenvectors}),"
488 "DoubleFactorizedChemistry.DoubleFactorizedChemistryParameters(0.001,))"
489)
490
491# Collect resource estimation parameters
492if args.paramsfile is None:
493 params = [
494 {
495 "errorBudget": 0.01,
496 "qubitParams": {"name": "qubit_maj_ns_e6"},
497 "qecScheme": {"name": "floquet_code"},
498 }
499 ]
500else:
501 params = []
502 for paramsfile in args.paramsfile:
503 with open(paramsfile) as f:
504 data = json.load(f)
505 if isinstance(data, dict):
506 params.append(data)
507 else:
508 params += data
509
510# Get resource estimates
511res = qsharp.estimate(
512 qsharp_string,
513 params=params,
514)
515
516# Store estimates in json file
517with open("resource_estimate.json", "w") as f:
518 f.write(res.json)
519
520result_obj = json.loads(res.json)
521if isinstance(result_obj, dict):
522 result_obj = [result_obj]
523
524data = []
525for item in result_obj:
526 data_item = []
527
528 # Run name
529 data_item.append(item["jobParams"]["qubitParams"]["name"])
530
531 # T factory fraction and Runtime
532 if "physicalCountsFormatted" in item:
533 data_item.append(
534 item["physicalCountsFormatted"]["physicalQubitsForTfactoriesPercentage"]
535 )
536 data_item.append(item["physicalCountsFormatted"]["runtime"])
537 elif "frontierEntries" in item:
538 data_item.append(
539 item["frontierEntries"][0]["physicalCountsFormatted"][
540 "physicalQubitsForTfactoriesPercentage"
541 ]
542 )
543 data_item.append(
544 item["frontierEntries"][0]["physicalCountsFormatted"]["runtime"]
545 )
546 else:
547 data_item.append("-")
548 data_item.append("-")
549
550 # Physical qubits and rQOPS
551 if "physicalCounts" in item:
552 data_item.append(item["physicalCounts"]["physicalQubits"])
553 data_item.append(item["physicalCounts"]["rqops"])
554 elif "frontierEntries" in item:
555 data_item.append(item["frontierEntries"][0]["physicalCounts"]["physicalQubits"])
556 data_item.append(item["frontierEntries"][0]["physicalCounts"]["rqops"])
557 else:
558 data_item.append("-")
559 data_item.append("-")
560
561 data.append(data_item)
562
563# Define the table headers
564headers = ["Run name", "T factory fraction", "Runtime", "Physical qubits", "rQOPS"]
565
566# Determine the width of each column
567col_widths = [max(len(str(item)) for item in column) for column in zip(headers, *data)]
568
569
570# Function to format a row
571def format_row(row):
572 return " | ".join(
573 f"{str(item).ljust(width)}" for item, width in zip(row, col_widths)
574 )
575
576
577# Create the table
578header_row = format_row(headers)
579separator_row = "-+-".join("-" * width for width in col_widths)
580data_rows = [format_row(row) for row in data]
581
582# Print the table
583print(header_row)
584print(separator_row)
585for row in data_rows:
586 print(row)
587
588print("For more detailed resource counts, see file resource_estimate.json")