src.private_count_mean.private_cms_server
1import numpy as np 2import pandas as pd 3import os 4from rich.progress import Progress 5 6from utils.utils import display_results 7 8class privateCMSServer: 9 """ 10 This class represents the server side of the Private Count-Mean Sketch (PCMS). 11 It is responsible for updating the sketch matrix and providing frequency estimations. 12 13 Attributes: 14 df (pandas.DataFrame): The dataset containing the values. 15 epsilon (float): The privacy parameter epsilon. 16 k (int): The number of hash functions. 17 m (int): The size of the sketch. 18 dataset (list): The list of values in the dataset. 19 domain (list): The unique values in the dataset. 20 N (int): The size of the dataset. 21 H (list): The list of hash functions. 22 M (numpy.ndarray): The sketch matrix. 23 """ 24 def __init__(self, epsilon, k, m, df, H): 25 """ 26 Initializes the privateCMSServer class with the given parameters. 27 28 Args: 29 epsilon (float): The privacy parameter epsilon. 30 k (int): The number of hash functions. 31 m (int): The size of the sketch. 32 df (pandas.DataFrame): The dataset containing the values. 33 H (list): The list of hash functions. 34 """ 35 self.df = df 36 self.epsilon = epsilon 37 self.k = k 38 self.m = m 39 self.dataset = self.df['value'].tolist() 40 self.domain = self.df['value'].unique().tolist() 41 self.N = len(self.dataset) 42 self.H = H 43 44 # Creation of the sketch matrix 45 self.M = np.zeros((self.k, self.m)) 46 47 def update_sketch_matrix(self,v,j): 48 """ 49 Updates the sketch matrix based on the given privatized data. 50 51 Args: 52 v (numpy.ndarray): The privatized vector. 53 j (int): The index of the hash function used. 54 """ 55 c_e = (np.exp(self.epsilon/2)+1) / ((np.exp(self.epsilon/2))-1) 56 x = self.k * ((c_e/2) * v + (1/2) * np.ones_like(v)) 57 for i in range (self.m): 58 self.M[j,i] += x[i] 59 60 def execute_server(self,privatized_data): 61 """ 62 Executes the server-side operations, including updating the sketch matrix 63 and estimating the frequencies. 64 65 Args: 66 privatized_data (list): The privatized data from the client. 67 68 Returns: 69 dict: A dictionary containing the estimated frequencies for each element. 70 """ 71 with Progress() as progress: 72 task = progress.add_task('[cyan]Update sketch matrix', total=len(privatized_data)) 73 74 for data in privatized_data: 75 self.update_sketch_matrix(data[0],data[1]) 76 progress.update(task, advance=1) 77 78 F_estimated = {} 79 task = progress.add_task('[cyan]Obtaining histogram of estimated frequencies', total=len(self.domain)) 80 for x in self.domain: 81 F_estimated[x] = self.estimate_server(x) 82 progress.update(task, advance=1) 83 84 return F_estimated 85 86 def estimate_server(self,d): 87 """ 88 Estimates the frequency of an element based on the current sketch matrix. 89 90 Args: 91 d (any): The element whose frequency is to be estimated. 92 93 Returns: 94 float: The estimated frequency of the element. 95 """ 96 sum_aux = 0 97 for i in range(self.k): 98 selected_hash = self.H[i] 99 sum_aux += self.M[i, selected_hash(d)] 100 101 f_estimated = (self.m/(self.m-1))*((sum_aux/self.k)-(self.N/self.m)) 102 return f_estimated 103 104 def query_server(self, query_element): 105 """ 106 Queries the server for the estimated frequency of an element. 107 108 Args: 109 query_element (any): The element to query. 110 111 Returns: 112 float or str: The estimated frequency of the element, or a message if the element is not in the domain. 113 """ 114 if query_element not in self.domain: 115 return "Element not in the domain" 116 estimation = self.estimate_server(query_element) 117 return estimation 118 119 120def run_private_cms_server(k, m, e, df, H, privatized_data): 121 """ 122 Runs the server-side operations for the Private Count-Mean Sketch, including 123 estimating frequencies and querying the server. 124 125 Args: 126 k (int): The number of hash functions. 127 m (int): The size of the sketch. 128 e (float): The privacy parameter epsilon. 129 df (pandas.DataFrame): The dataset containing the values. 130 H (list): The list of hash functions. 131 privatized_data (list): The privatized data from the client. 132 """ 133 #Initialize the server Count-Mean Sketch 134 server = privateCMSServer(e, k, m, df, H) 135 136 # Save the privatized data 137 privatized_data_save = pd.DataFrame(privatized_data) 138 privatized_data_file = os.path.join(os.path.join('..', 'data', 'private'), 'privatized_data.csv') 139 privatized_data_save.to_csv(privatized_data_file, index=False) 140 141 # Execute the server 142 f_estimated = server.execute_server(privatized_data) 143 144 # Show the results 145 display_results(df, f_estimated) 146 147 # Query the server 148 while True: 149 query = input("Enter an element to query the server or 'exit' to finish: ") 150 if query.lower() == 'exit': 151 break 152 estimation = server.query_server(query) 153 print(f"The estimated frequency of {query} is {estimation:.2f}")
10class privateCMSServer: 11 """ 12 This class represents the server side of the Private Count-Mean Sketch (PCMS). 13 It is responsible for updating the sketch matrix and providing frequency estimations. 14 15 Attributes: 16 df (pandas.DataFrame): The dataset containing the values. 17 epsilon (float): The privacy parameter epsilon. 18 k (int): The number of hash functions. 19 m (int): The size of the sketch. 20 dataset (list): The list of values in the dataset. 21 domain (list): The unique values in the dataset. 22 N (int): The size of the dataset. 23 H (list): The list of hash functions. 24 M (numpy.ndarray): The sketch matrix. 25 """ 26 def __init__(self, epsilon, k, m, df, H): 27 """ 28 Initializes the privateCMSServer class with the given parameters. 29 30 Args: 31 epsilon (float): The privacy parameter epsilon. 32 k (int): The number of hash functions. 33 m (int): The size of the sketch. 34 df (pandas.DataFrame): The dataset containing the values. 35 H (list): The list of hash functions. 36 """ 37 self.df = df 38 self.epsilon = epsilon 39 self.k = k 40 self.m = m 41 self.dataset = self.df['value'].tolist() 42 self.domain = self.df['value'].unique().tolist() 43 self.N = len(self.dataset) 44 self.H = H 45 46 # Creation of the sketch matrix 47 self.M = np.zeros((self.k, self.m)) 48 49 def update_sketch_matrix(self,v,j): 50 """ 51 Updates the sketch matrix based on the given privatized data. 52 53 Args: 54 v (numpy.ndarray): The privatized vector. 55 j (int): The index of the hash function used. 56 """ 57 c_e = (np.exp(self.epsilon/2)+1) / ((np.exp(self.epsilon/2))-1) 58 x = self.k * ((c_e/2) * v + (1/2) * np.ones_like(v)) 59 for i in range (self.m): 60 self.M[j,i] += x[i] 61 62 def execute_server(self,privatized_data): 63 """ 64 Executes the server-side operations, including updating the sketch matrix 65 and estimating the frequencies. 66 67 Args: 68 privatized_data (list): The privatized data from the client. 69 70 Returns: 71 dict: A dictionary containing the estimated frequencies for each element. 72 """ 73 with Progress() as progress: 74 task = progress.add_task('[cyan]Update sketch matrix', total=len(privatized_data)) 75 76 for data in privatized_data: 77 self.update_sketch_matrix(data[0],data[1]) 78 progress.update(task, advance=1) 79 80 F_estimated = {} 81 task = progress.add_task('[cyan]Obtaining histogram of estimated frequencies', total=len(self.domain)) 82 for x in self.domain: 83 F_estimated[x] = self.estimate_server(x) 84 progress.update(task, advance=1) 85 86 return F_estimated 87 88 def estimate_server(self,d): 89 """ 90 Estimates the frequency of an element based on the current sketch matrix. 91 92 Args: 93 d (any): The element whose frequency is to be estimated. 94 95 Returns: 96 float: The estimated frequency of the element. 97 """ 98 sum_aux = 0 99 for i in range(self.k): 100 selected_hash = self.H[i] 101 sum_aux += self.M[i, selected_hash(d)] 102 103 f_estimated = (self.m/(self.m-1))*((sum_aux/self.k)-(self.N/self.m)) 104 return f_estimated 105 106 def query_server(self, query_element): 107 """ 108 Queries the server for the estimated frequency of an element. 109 110 Args: 111 query_element (any): The element to query. 112 113 Returns: 114 float or str: The estimated frequency of the element, or a message if the element is not in the domain. 115 """ 116 if query_element not in self.domain: 117 return "Element not in the domain" 118 estimation = self.estimate_server(query_element) 119 return estimation
This class represents the server side of the Private Count-Mean Sketch (PCMS). It is responsible for updating the sketch matrix and providing frequency estimations.
Attributes: df (pandas.DataFrame): The dataset containing the values. epsilon (float): The privacy parameter epsilon. k (int): The number of hash functions. m (int): The size of the sketch. dataset (list): The list of values in the dataset. domain (list): The unique values in the dataset. N (int): The size of the dataset. H (list): The list of hash functions. M (numpy.ndarray): The sketch matrix.
26 def __init__(self, epsilon, k, m, df, H): 27 """ 28 Initializes the privateCMSServer class with the given parameters. 29 30 Args: 31 epsilon (float): The privacy parameter epsilon. 32 k (int): The number of hash functions. 33 m (int): The size of the sketch. 34 df (pandas.DataFrame): The dataset containing the values. 35 H (list): The list of hash functions. 36 """ 37 self.df = df 38 self.epsilon = epsilon 39 self.k = k 40 self.m = m 41 self.dataset = self.df['value'].tolist() 42 self.domain = self.df['value'].unique().tolist() 43 self.N = len(self.dataset) 44 self.H = H 45 46 # Creation of the sketch matrix 47 self.M = np.zeros((self.k, self.m))
Initializes the privateCMSServer class with the given parameters.
Args: epsilon (float): The privacy parameter epsilon. k (int): The number of hash functions. m (int): The size of the sketch. df (pandas.DataFrame): The dataset containing the values. H (list): The list of hash functions.
49 def update_sketch_matrix(self,v,j): 50 """ 51 Updates the sketch matrix based on the given privatized data. 52 53 Args: 54 v (numpy.ndarray): The privatized vector. 55 j (int): The index of the hash function used. 56 """ 57 c_e = (np.exp(self.epsilon/2)+1) / ((np.exp(self.epsilon/2))-1) 58 x = self.k * ((c_e/2) * v + (1/2) * np.ones_like(v)) 59 for i in range (self.m): 60 self.M[j,i] += x[i]
Updates the sketch matrix based on the given privatized data.
Args: v (numpy.ndarray): The privatized vector. j (int): The index of the hash function used.
62 def execute_server(self,privatized_data): 63 """ 64 Executes the server-side operations, including updating the sketch matrix 65 and estimating the frequencies. 66 67 Args: 68 privatized_data (list): The privatized data from the client. 69 70 Returns: 71 dict: A dictionary containing the estimated frequencies for each element. 72 """ 73 with Progress() as progress: 74 task = progress.add_task('[cyan]Update sketch matrix', total=len(privatized_data)) 75 76 for data in privatized_data: 77 self.update_sketch_matrix(data[0],data[1]) 78 progress.update(task, advance=1) 79 80 F_estimated = {} 81 task = progress.add_task('[cyan]Obtaining histogram of estimated frequencies', total=len(self.domain)) 82 for x in self.domain: 83 F_estimated[x] = self.estimate_server(x) 84 progress.update(task, advance=1) 85 86 return F_estimated
Executes the server-side operations, including updating the sketch matrix and estimating the frequencies.
Args: privatized_data (list): The privatized data from the client.
Returns: dict: A dictionary containing the estimated frequencies for each element.
88 def estimate_server(self,d): 89 """ 90 Estimates the frequency of an element based on the current sketch matrix. 91 92 Args: 93 d (any): The element whose frequency is to be estimated. 94 95 Returns: 96 float: The estimated frequency of the element. 97 """ 98 sum_aux = 0 99 for i in range(self.k): 100 selected_hash = self.H[i] 101 sum_aux += self.M[i, selected_hash(d)] 102 103 f_estimated = (self.m/(self.m-1))*((sum_aux/self.k)-(self.N/self.m)) 104 return f_estimated
Estimates the frequency of an element based on the current sketch matrix.
Args: d (any): The element whose frequency is to be estimated.
Returns: float: The estimated frequency of the element.
106 def query_server(self, query_element): 107 """ 108 Queries the server for the estimated frequency of an element. 109 110 Args: 111 query_element (any): The element to query. 112 113 Returns: 114 float or str: The estimated frequency of the element, or a message if the element is not in the domain. 115 """ 116 if query_element not in self.domain: 117 return "Element not in the domain" 118 estimation = self.estimate_server(query_element) 119 return estimation
Queries the server for the estimated frequency of an element.
Args: query_element (any): The element to query.
Returns: float or str: The estimated frequency of the element, or a message if the element is not in the domain.
122def run_private_cms_server(k, m, e, df, H, privatized_data): 123 """ 124 Runs the server-side operations for the Private Count-Mean Sketch, including 125 estimating frequencies and querying the server. 126 127 Args: 128 k (int): The number of hash functions. 129 m (int): The size of the sketch. 130 e (float): The privacy parameter epsilon. 131 df (pandas.DataFrame): The dataset containing the values. 132 H (list): The list of hash functions. 133 privatized_data (list): The privatized data from the client. 134 """ 135 #Initialize the server Count-Mean Sketch 136 server = privateCMSServer(e, k, m, df, H) 137 138 # Save the privatized data 139 privatized_data_save = pd.DataFrame(privatized_data) 140 privatized_data_file = os.path.join(os.path.join('..', 'data', 'private'), 'privatized_data.csv') 141 privatized_data_save.to_csv(privatized_data_file, index=False) 142 143 # Execute the server 144 f_estimated = server.execute_server(privatized_data) 145 146 # Show the results 147 display_results(df, f_estimated) 148 149 # Query the server 150 while True: 151 query = input("Enter an element to query the server or 'exit' to finish: ") 152 if query.lower() == 'exit': 153 break 154 estimation = server.query_server(query) 155 print(f"The estimated frequency of {query} is {estimation:.2f}")
Runs the server-side operations for the Private Count-Mean Sketch, including estimating frequencies and querying the server.
Args: k (int): The number of hash functions. m (int): The size of the sketch. e (float): The privacy parameter epsilon. df (pandas.DataFrame): The dataset containing the values. H (list): The list of hash functions. privatized_data (list): The privatized data from the client.