src.hadamard_count_mean.private_hcms_server
1import os 2import numpy as np 3from rich.progress import Progress 4 5from utils.utils import display_results 6 7class privateHCMSServer: 8 """ 9 A private Hadamard Count-Min Sketch (HCMS) server implementation. 10 """ 11 def __init__(self, epsilon, k, m, df, hashes): 12 """ 13 Initializes the private HCMS server. 14 15 :param epsilon: Privacy parameter 16 :param k: Number of hash functions 17 :param m: Number of columns in the sketch matrix 18 :param df: Dataframe containing the dataset 19 :param hashes: List of hash functions 20 """ 21 self.epsilon = epsilon 22 self.k = k 23 self.m = m 24 self.dataset = self.df['value'].tolist() 25 self.domain = self.df['value'].unique().tolist() 26 self.H = self.hadamard_matrix(self.m) 27 self.N = len(self.dataset) 28 self.hashes = hashes 29 30 # Creation of the sketch matrix 31 self.M = np.zeros((self.k, self.m)) 32 33 def update_sketch_matrix(self, w, j, l): 34 """ 35 Updates the sketch matrix with a new data point. 36 37 :param w: Weight of the data point 38 :param j: Hash function index 39 :param l: Hash value 40 """ 41 c_e = (np.exp(self.epsilon/2)+1) / ((np.exp(self.epsilon/2))-1) 42 x = self.k * c_e * w 43 self.M[j,l] = self.M[j,l] + x 44 45 def traspose_M(self): 46 """ 47 Applies the Hadamard transformation to the sketch matrix. 48 """ 49 self.M = self.M @ np.transpose(self.H) 50 51 def estimate_server(self,d): 52 """ 53 Estimates the frequency of an element in the dataset. 54 55 :param d: Element to estimate 56 :return: Estimated frequency 57 """ 58 return (self.m / (self.m-1)) * (1/self.k * np.sum([self.M[i,self.hashes[i](d)] for i in range(self.k)]) - self.N/self.m) 59 60 def execute_server(self, privatized_data): 61 """ 62 Processes the privatized data and estimates frequencies. 63 64 :param privatized_data: List of privatized data points 65 :return: Dictionary of estimated frequencies 66 """ 67 with Progress() as progress: 68 task = progress.add_task('[cyan]Update sketch matrix', total=len(privatized_data)) 69 for data in privatized_data: 70 self.update_sketch_matrix(data[0],data[1],data[2]) 71 progress.update(task, advance=1) 72 73 # Transpose the matrix 74 self.traspose_M() 75 76 # Estimate the frequencies 77 F_estimated = {} 78 task = progress.add_task('[cyan]Obtaining histogram of estimated frequencies', total=len(self.domain)) 79 for x in self.domain: 80 F_estimated[x] = self.estimate_server(x) 81 progress.update(task, advance=1) 82 return F_estimated 83 84 def query_server(self, query_element): 85 """ 86 Queries the estimated frequency of an element. 87 88 :param query_element: Element to query 89 :return: Estimated frequency or a message if the element is not in the domain 90 """ 91 if query_element not in self.domain: 92 return "Element not in the domain" 93 estimation = self.estimate_server(query_element) 94 return estimation 95 96def run_private_hcms_server(k, m, e, df, hashes, privatized_data): 97 """ 98 Runs the private HCMS server pipeline. 99 100 :param k: Number of hash functions 101 :param m: Number of columns in the sketch matrix 102 :param e: Privacy parameter 103 :param df: Dataframe containing the dataset 104 :param hashes: List of hash functions 105 :param privatized_data: List of privatized data points 106 """ 107 # Initialize the server 108 server = privateHCMSServer(e, k, m, df, hashes) 109 110 # Save the privatized data 111 privatized_data_save = pd.DataFrame(privatized_data) 112 privatized_data_file = os.path.join(os.path.join('..', 'data', 'private'), 'privatized_data.csv') 113 privatized_data_save.to_csv(privatized_data_file, index=False) 114 115 # Execute the server 116 f_estimated = server.execute_server(privatized_data) 117 118 # Show the results 119 display_results(df, f_estimated) 120 121 # Query the server 122 while True: 123 query = input("Enter an element to query the server or 'exit' to finish: ") 124 if query.lower() == 'exit': 125 break 126 estimation = server.query_server(query) 127 print(f"The estimated frequency of {query} is {estimation:.2f}") 128 129 130
class
privateHCMSServer:
8class privateHCMSServer: 9 """ 10 A private Hadamard Count-Min Sketch (HCMS) server implementation. 11 """ 12 def __init__(self, epsilon, k, m, df, hashes): 13 """ 14 Initializes the private HCMS server. 15 16 :param epsilon: Privacy parameter 17 :param k: Number of hash functions 18 :param m: Number of columns in the sketch matrix 19 :param df: Dataframe containing the dataset 20 :param hashes: List of hash functions 21 """ 22 self.epsilon = epsilon 23 self.k = k 24 self.m = m 25 self.dataset = self.df['value'].tolist() 26 self.domain = self.df['value'].unique().tolist() 27 self.H = self.hadamard_matrix(self.m) 28 self.N = len(self.dataset) 29 self.hashes = hashes 30 31 # Creation of the sketch matrix 32 self.M = np.zeros((self.k, self.m)) 33 34 def update_sketch_matrix(self, w, j, l): 35 """ 36 Updates the sketch matrix with a new data point. 37 38 :param w: Weight of the data point 39 :param j: Hash function index 40 :param l: Hash value 41 """ 42 c_e = (np.exp(self.epsilon/2)+1) / ((np.exp(self.epsilon/2))-1) 43 x = self.k * c_e * w 44 self.M[j,l] = self.M[j,l] + x 45 46 def traspose_M(self): 47 """ 48 Applies the Hadamard transformation to the sketch matrix. 49 """ 50 self.M = self.M @ np.transpose(self.H) 51 52 def estimate_server(self,d): 53 """ 54 Estimates the frequency of an element in the dataset. 55 56 :param d: Element to estimate 57 :return: Estimated frequency 58 """ 59 return (self.m / (self.m-1)) * (1/self.k * np.sum([self.M[i,self.hashes[i](d)] for i in range(self.k)]) - self.N/self.m) 60 61 def execute_server(self, privatized_data): 62 """ 63 Processes the privatized data and estimates frequencies. 64 65 :param privatized_data: List of privatized data points 66 :return: Dictionary of estimated frequencies 67 """ 68 with Progress() as progress: 69 task = progress.add_task('[cyan]Update sketch matrix', total=len(privatized_data)) 70 for data in privatized_data: 71 self.update_sketch_matrix(data[0],data[1],data[2]) 72 progress.update(task, advance=1) 73 74 # Transpose the matrix 75 self.traspose_M() 76 77 # Estimate the frequencies 78 F_estimated = {} 79 task = progress.add_task('[cyan]Obtaining histogram of estimated frequencies', total=len(self.domain)) 80 for x in self.domain: 81 F_estimated[x] = self.estimate_server(x) 82 progress.update(task, advance=1) 83 return F_estimated 84 85 def query_server(self, query_element): 86 """ 87 Queries the estimated frequency of an element. 88 89 :param query_element: Element to query 90 :return: Estimated frequency or a message if the element is not in the domain 91 """ 92 if query_element not in self.domain: 93 return "Element not in the domain" 94 estimation = self.estimate_server(query_element) 95 return estimation
A private Hadamard Count-Min Sketch (HCMS) server implementation.
privateHCMSServer(epsilon, k, m, df, hashes)
12 def __init__(self, epsilon, k, m, df, hashes): 13 """ 14 Initializes the private HCMS server. 15 16 :param epsilon: Privacy parameter 17 :param k: Number of hash functions 18 :param m: Number of columns in the sketch matrix 19 :param df: Dataframe containing the dataset 20 :param hashes: List of hash functions 21 """ 22 self.epsilon = epsilon 23 self.k = k 24 self.m = m 25 self.dataset = self.df['value'].tolist() 26 self.domain = self.df['value'].unique().tolist() 27 self.H = self.hadamard_matrix(self.m) 28 self.N = len(self.dataset) 29 self.hashes = hashes 30 31 # Creation of the sketch matrix 32 self.M = np.zeros((self.k, self.m))
Initializes the private HCMS server.
Parameters
- epsilon: Privacy parameter
- k: Number of hash functions
- m: Number of columns in the sketch matrix
- df: Dataframe containing the dataset
- hashes: List of hash functions
def
update_sketch_matrix(self, w, j, l):
34 def update_sketch_matrix(self, w, j, l): 35 """ 36 Updates the sketch matrix with a new data point. 37 38 :param w: Weight of the data point 39 :param j: Hash function index 40 :param l: Hash value 41 """ 42 c_e = (np.exp(self.epsilon/2)+1) / ((np.exp(self.epsilon/2))-1) 43 x = self.k * c_e * w 44 self.M[j,l] = self.M[j,l] + x
Updates the sketch matrix with a new data point.
Parameters
- w: Weight of the data point
- j: Hash function index
- l: Hash value
def
traspose_M(self):
46 def traspose_M(self): 47 """ 48 Applies the Hadamard transformation to the sketch matrix. 49 """ 50 self.M = self.M @ np.transpose(self.H)
Applies the Hadamard transformation to the sketch matrix.
def
estimate_server(self, d):
52 def estimate_server(self,d): 53 """ 54 Estimates the frequency of an element in the dataset. 55 56 :param d: Element to estimate 57 :return: Estimated frequency 58 """ 59 return (self.m / (self.m-1)) * (1/self.k * np.sum([self.M[i,self.hashes[i](d)] for i in range(self.k)]) - self.N/self.m)
Estimates the frequency of an element in the dataset.
Parameters
- d: Element to estimate
Returns
Estimated frequency
def
execute_server(self, privatized_data):
61 def execute_server(self, privatized_data): 62 """ 63 Processes the privatized data and estimates frequencies. 64 65 :param privatized_data: List of privatized data points 66 :return: Dictionary of estimated frequencies 67 """ 68 with Progress() as progress: 69 task = progress.add_task('[cyan]Update sketch matrix', total=len(privatized_data)) 70 for data in privatized_data: 71 self.update_sketch_matrix(data[0],data[1],data[2]) 72 progress.update(task, advance=1) 73 74 # Transpose the matrix 75 self.traspose_M() 76 77 # Estimate the frequencies 78 F_estimated = {} 79 task = progress.add_task('[cyan]Obtaining histogram of estimated frequencies', total=len(self.domain)) 80 for x in self.domain: 81 F_estimated[x] = self.estimate_server(x) 82 progress.update(task, advance=1) 83 return F_estimated
Processes the privatized data and estimates frequencies.
Parameters
- privatized_data: List of privatized data points
Returns
Dictionary of estimated frequencies
def
query_server(self, query_element):
85 def query_server(self, query_element): 86 """ 87 Queries the estimated frequency of an element. 88 89 :param query_element: Element to query 90 :return: Estimated frequency or a message if the element is not in the domain 91 """ 92 if query_element not in self.domain: 93 return "Element not in the domain" 94 estimation = self.estimate_server(query_element) 95 return estimation
Queries the estimated frequency of an element.
Parameters
- query_element: Element to query
Returns
Estimated frequency or a message if the element is not in the domain
def
run_private_hcms_server(k, m, e, df, hashes, privatized_data):
97def run_private_hcms_server(k, m, e, df, hashes, privatized_data): 98 """ 99 Runs the private HCMS server pipeline. 100 101 :param k: Number of hash functions 102 :param m: Number of columns in the sketch matrix 103 :param e: Privacy parameter 104 :param df: Dataframe containing the dataset 105 :param hashes: List of hash functions 106 :param privatized_data: List of privatized data points 107 """ 108 # Initialize the server 109 server = privateHCMSServer(e, k, m, df, hashes) 110 111 # Save the privatized data 112 privatized_data_save = pd.DataFrame(privatized_data) 113 privatized_data_file = os.path.join(os.path.join('..', 'data', 'private'), 'privatized_data.csv') 114 privatized_data_save.to_csv(privatized_data_file, index=False) 115 116 # Execute the server 117 f_estimated = server.execute_server(privatized_data) 118 119 # Show the results 120 display_results(df, f_estimated) 121 122 # Query the server 123 while True: 124 query = input("Enter an element to query the server or 'exit' to finish: ") 125 if query.lower() == 'exit': 126 break 127 estimation = server.query_server(query) 128 print(f"The estimated frequency of {query} is {estimation:.2f}")
Runs the private HCMS server pipeline.
Parameters
- k: Number of hash functions
- m: Number of columns in the sketch matrix
- e: Privacy parameter
- df: Dataframe containing the dataset
- hashes: List of hash functions
- privatized_data: List of privatized data points