src.private_count_min.private_cmins_server

 1import os
 2import importlib.util
 3import numpy as np
 4import pandas as pd
 5import random
 6import argparse
 7from sympy import primerange
 8from progress.bar import Bar
 9import pickle
10
11from utils.utils import load_dataset, display_results
12
13class privateCMinSServer:
14    def __init__(self, epsilon, k, m, dataset, domain, H):
15        self.epsilon = epsilon
16        self.k = k
17        self.m = m
18        self.dataset = dataset
19        self.domain = domain
20        self.N = len(dataset)
21        self.H = H
22
23        # Creation of the sketch matrix
24        self.M = np.zeros((self.k, self.m))
25    
26    def update_sketch_matrix(self,v,j):
27        c_e = (np.exp(self.epsilon/2)+1) / ((np.exp(self.epsilon/2))-1)
28        x = self.k * ((c_e/2) * v + (1/2) * np.ones_like(v))
29        for i in range (self.m):
30            self.M[j,i] += x[i]
31
32    def execute_server(self,privatized_data):
33        bar = Bar('Update sketch matrix', max=len(privatized_data), suffix='%(percent)d%%')
34
35        for data in privatized_data:
36            self.update_sketch_matrix(data[0],data[1])
37            bar.next()
38        bar.finish()
39
40        F_estimated = {}
41        for x in self.domain:
42            F_estimated[x] = self.estimate_server(x)
43            bar.next()
44        bar.finish()
45        return F_estimated
46
47    def estimate_server(self,d):
48        v_minimum = []
49        for i in range(self.k):
50            selected_hash = self.H[i]
51            v_minimum.append(self.M[i, selected_hash(d)])
52        
53        minimum = min(v_minimum)
54        f_estimated = (self.m / (self.m-1)) * (minimum - (self.N/self.m))
55        return f_estimated
56    
57    def query_server(self, query_element):
58        if query_element not in self.domain:
59            return "Element not in the domain"
60        estimation = self.estimate_server(query_element)
61        return estimation
62
63    
64def run_private_cmins_server(k, m, e, d, H):
65    dataset, df, domain = load_dataset(f"{d}_filtered")
66    
67    #Initialize the server Count-Mean Sketch
68    server = privateCMSServer(e, k, m, dataset, domain, H)
69
70    # Obtain the privatized data
71    script_dir = os.path.dirname(os.path.abspath(__file__))
72    output_dir = os.path.join(script_dir, "../../data/privatized")
73
74    fav_output_file = os.path.join(output_dir, f"{d}_private_fav.pkl")
75    default_output_file = os.path.join(output_dir, f"{d}_private.pkl")
76
77    output_file = fav_output_file if os.path.exists(fav_output_file) else default_output_file
78    with open(output_file, 'rb') as f:
79        privatized_data = pickle.load(f)
80    
81    # Execute the server
82    f_estimated = server.execute_server(privatized_data)
83
84    # Show the results
85    os.system('cls' if os.name == 'nt' else 'clear>/dev/null')
86    display_results(df, f_estimated)
87
88    # Query the server
89    while True:
90        query = input("Enter an element to query the server or 'exit' to finish: ")
91        if query.lower() == 'exit':
92            break
93        estimation = server.query_server(query)
94        print(f"The estimated frequency of {query} is {estimation}")
class privateCMinSServer:
15class privateCMinSServer:
16    def __init__(self, epsilon, k, m, dataset, domain, H):
17        self.epsilon = epsilon
18        self.k = k
19        self.m = m
20        self.dataset = dataset
21        self.domain = domain
22        self.N = len(dataset)
23        self.H = H
24
25        # Creation of the sketch matrix
26        self.M = np.zeros((self.k, self.m))
27    
28    def update_sketch_matrix(self,v,j):
29        c_e = (np.exp(self.epsilon/2)+1) / ((np.exp(self.epsilon/2))-1)
30        x = self.k * ((c_e/2) * v + (1/2) * np.ones_like(v))
31        for i in range (self.m):
32            self.M[j,i] += x[i]
33
34    def execute_server(self,privatized_data):
35        bar = Bar('Update sketch matrix', max=len(privatized_data), suffix='%(percent)d%%')
36
37        for data in privatized_data:
38            self.update_sketch_matrix(data[0],data[1])
39            bar.next()
40        bar.finish()
41
42        F_estimated = {}
43        for x in self.domain:
44            F_estimated[x] = self.estimate_server(x)
45            bar.next()
46        bar.finish()
47        return F_estimated
48
49    def estimate_server(self,d):
50        v_minimum = []
51        for i in range(self.k):
52            selected_hash = self.H[i]
53            v_minimum.append(self.M[i, selected_hash(d)])
54        
55        minimum = min(v_minimum)
56        f_estimated = (self.m / (self.m-1)) * (minimum - (self.N/self.m))
57        return f_estimated
58    
59    def query_server(self, query_element):
60        if query_element not in self.domain:
61            return "Element not in the domain"
62        estimation = self.estimate_server(query_element)
63        return estimation
privateCMinSServer(epsilon, k, m, dataset, domain, H)
16    def __init__(self, epsilon, k, m, dataset, domain, H):
17        self.epsilon = epsilon
18        self.k = k
19        self.m = m
20        self.dataset = dataset
21        self.domain = domain
22        self.N = len(dataset)
23        self.H = H
24
25        # Creation of the sketch matrix
26        self.M = np.zeros((self.k, self.m))
epsilon
k
m
dataset
domain
N
H
M
def update_sketch_matrix(self, v, j):
28    def update_sketch_matrix(self,v,j):
29        c_e = (np.exp(self.epsilon/2)+1) / ((np.exp(self.epsilon/2))-1)
30        x = self.k * ((c_e/2) * v + (1/2) * np.ones_like(v))
31        for i in range (self.m):
32            self.M[j,i] += x[i]
def execute_server(self, privatized_data):
34    def execute_server(self,privatized_data):
35        bar = Bar('Update sketch matrix', max=len(privatized_data), suffix='%(percent)d%%')
36
37        for data in privatized_data:
38            self.update_sketch_matrix(data[0],data[1])
39            bar.next()
40        bar.finish()
41
42        F_estimated = {}
43        for x in self.domain:
44            F_estimated[x] = self.estimate_server(x)
45            bar.next()
46        bar.finish()
47        return F_estimated
def estimate_server(self, d):
49    def estimate_server(self,d):
50        v_minimum = []
51        for i in range(self.k):
52            selected_hash = self.H[i]
53            v_minimum.append(self.M[i, selected_hash(d)])
54        
55        minimum = min(v_minimum)
56        f_estimated = (self.m / (self.m-1)) * (minimum - (self.N/self.m))
57        return f_estimated
def query_server(self, query_element):
59    def query_server(self, query_element):
60        if query_element not in self.domain:
61            return "Element not in the domain"
62        estimation = self.estimate_server(query_element)
63        return estimation
def run_private_cmins_server(k, m, e, d, H):
66def run_private_cmins_server(k, m, e, d, H):
67    dataset, df, domain = load_dataset(f"{d}_filtered")
68    
69    #Initialize the server Count-Mean Sketch
70    server = privateCMSServer(e, k, m, dataset, domain, H)
71
72    # Obtain the privatized data
73    script_dir = os.path.dirname(os.path.abspath(__file__))
74    output_dir = os.path.join(script_dir, "../../data/privatized")
75
76    fav_output_file = os.path.join(output_dir, f"{d}_private_fav.pkl")
77    default_output_file = os.path.join(output_dir, f"{d}_private.pkl")
78
79    output_file = fav_output_file if os.path.exists(fav_output_file) else default_output_file
80    with open(output_file, 'rb') as f:
81        privatized_data = pickle.load(f)
82    
83    # Execute the server
84    f_estimated = server.execute_server(privatized_data)
85
86    # Show the results
87    os.system('cls' if os.name == 'nt' else 'clear>/dev/null')
88    display_results(df, f_estimated)
89
90    # Query the server
91    while True:
92        query = input("Enter an element to query the server or 'exit' to finish: ")
93        if query.lower() == 'exit':
94            break
95        estimation = server.query_server(query)
96        print(f"The estimated frequency of {query} is {estimation}")