src.main.general_method
1from main.individual_method import IndividualMethod 2from scripts.preprocess import run_data_processor 3from scripts.parameter_fitting import PrivacyUtilityOptimizer 4 5import pandas as pd 6from tabulate import tabulate 7from colorama import Fore, Style 8import ast 9 10def run_general_method(): 11 """ 12 Executes the general method for optimizing privacy and utility trade-offs. 13 14 Steps: 15 1. Selects the error metric to optimize (MSE, LP, or Percentage Error). 16 2. Identifies the user with the most data in the dataset. 17 3. Calculates k and m values using the IndividualMethod class. 18 4. Executes no-privacy and private algorithms. 19 5. Optimizes privacy-utility trade-off for each user. 20 21 Args: 22 df (pd.DataFrame): The dataset containing user data with frequency values. 23 """ 24 25 # Load the dataset 26 # base_path = os.path.join('..', 'data', 'raw') 27 # latest_file = max([f for f in os.listdir(base_path) if f.endswith('.xlsx')], key=lambda x: os.path.getmtime(os.path.join(base_path, x))) 28 # excel_file = os.path.join(base_path, latest_file) 29 # df = pd.read_excel(excel_file) 30 31 # Preprocess the dataset 32 df = run_data_processor() 33 34 print(f"Processing {Style.BRIGHT}{latest_file}{Style.RESET_ALL}") 35 36 # Step 1: Set value for error metric 37 print(f"📊 Selection of the Optimization Metric") 38 metric = input(f"\nEnter the metric to optimize: \n1. {Fore.CYAN}MSE{Style.RESET_ALL}\n2. {Fore.CYAN}LP{Style.RESET_ALL}\n3. {Fore.CYAN}Porcentual Error{Style.RESET_ALL} \nSelect: ") 39 if metric == "1": 40 Lp = float(input("⭢ Enter the MSE to reach: ")) 41 p = 2 42 elif metric == "2": 43 Lp = float(input("⭢ Enter the Lp to reach: ")) 44 p = float(input("⭢ Enter the type of error ρ: ")) 45 elif metric == "3": 46 Lp = float(input(f"⭢ Enter the {Fore.CYAN}Porcentual Error{Style.RESET_ALL} to reach: ")) 47 p = 1 48 49 # Step 2: Set the user with more data 50 df['values'] = df['values'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x) 51 df = df.explode("values", ignore_index=True).rename(columns={"values": "value"}) 52 53 user_counts = df["user"].value_counts() # Count the number of times each user appears in the dataset 54 max_user = user_counts.idxmax() # Get the user with more data 55 df_user = df[df["user"] == max_user] # Get the data of the user with more data 56 57 # Step 3: Set k and m 58 e = 150 59 while(True): 60 individual = IndividualMethod(df_user) 61 k, m = individual.calculate_k_m() 62 individual.execute_no_privacy() 63 individual.execute_private_algorithms(e) 64 algorithm = individual.select_algorithm() 65 66 print(f"\n Do you want to test with another value of ϵ? (yes/no): ") 67 if input() == "no": 68 break 69 else: 70 e = input("⭢ Enter the value of ϵ: ") 71 72 # Step 4: Execute utility error 73 headers = ["Element", "Real Frequency", "Real Percentage", "Estimated Frequency", "Estimated Percentage", "Estimation Difference", "Percentage Error"] 74 results = [] 75 for user in df["user"].unique(): 76 print(f"Processing user {user}") 77 df_user_specific = df[df["user"] == user] 78 79 optimizer = PrivacyUtilityOptimizer(df_user_specific, k, m, algorithm) 80 e, _, _, data_table = optimizer.utility_error(Lp, p, metric) 81 82 data_table = pd.DataFrame(data_table, columns=headers) 83 results.append({"e": e, "Porcentual Error Table": data_table}) 84 85 results_df = pd.DataFrame(results) 86 87 for index, result in results_df.iterrows(): 88 print(f"\nUser: {df['user'].unique()[index]}, ϵ:{result['e']}, k:{k}, m:{m}") # Imprimir el usuario 89 print(tabulate(result["Porcentual Error Table"], headers='keys', tablefmt='fancy_grid')) 90 91if __name__ == "__main__": 92 run_general_method()
def
run_general_method():
12def run_general_method(): 13 """ 14 Executes the general method for optimizing privacy and utility trade-offs. 15 16 Steps: 17 1. Selects the error metric to optimize (MSE, LP, or Percentage Error). 18 2. Identifies the user with the most data in the dataset. 19 3. Calculates k and m values using the IndividualMethod class. 20 4. Executes no-privacy and private algorithms. 21 5. Optimizes privacy-utility trade-off for each user. 22 23 Args: 24 df (pd.DataFrame): The dataset containing user data with frequency values. 25 """ 26 27 # Load the dataset 28 # base_path = os.path.join('..', 'data', 'raw') 29 # latest_file = max([f for f in os.listdir(base_path) if f.endswith('.xlsx')], key=lambda x: os.path.getmtime(os.path.join(base_path, x))) 30 # excel_file = os.path.join(base_path, latest_file) 31 # df = pd.read_excel(excel_file) 32 33 # Preprocess the dataset 34 df = run_data_processor() 35 36 print(f"Processing {Style.BRIGHT}{latest_file}{Style.RESET_ALL}") 37 38 # Step 1: Set value for error metric 39 print(f"📊 Selection of the Optimization Metric") 40 metric = input(f"\nEnter the metric to optimize: \n1. {Fore.CYAN}MSE{Style.RESET_ALL}\n2. {Fore.CYAN}LP{Style.RESET_ALL}\n3. {Fore.CYAN}Porcentual Error{Style.RESET_ALL} \nSelect: ") 41 if metric == "1": 42 Lp = float(input("⭢ Enter the MSE to reach: ")) 43 p = 2 44 elif metric == "2": 45 Lp = float(input("⭢ Enter the Lp to reach: ")) 46 p = float(input("⭢ Enter the type of error ρ: ")) 47 elif metric == "3": 48 Lp = float(input(f"⭢ Enter the {Fore.CYAN}Porcentual Error{Style.RESET_ALL} to reach: ")) 49 p = 1 50 51 # Step 2: Set the user with more data 52 df['values'] = df['values'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x) 53 df = df.explode("values", ignore_index=True).rename(columns={"values": "value"}) 54 55 user_counts = df["user"].value_counts() # Count the number of times each user appears in the dataset 56 max_user = user_counts.idxmax() # Get the user with more data 57 df_user = df[df["user"] == max_user] # Get the data of the user with more data 58 59 # Step 3: Set k and m 60 e = 150 61 while(True): 62 individual = IndividualMethod(df_user) 63 k, m = individual.calculate_k_m() 64 individual.execute_no_privacy() 65 individual.execute_private_algorithms(e) 66 algorithm = individual.select_algorithm() 67 68 print(f"\n Do you want to test with another value of ϵ? (yes/no): ") 69 if input() == "no": 70 break 71 else: 72 e = input("⭢ Enter the value of ϵ: ") 73 74 # Step 4: Execute utility error 75 headers = ["Element", "Real Frequency", "Real Percentage", "Estimated Frequency", "Estimated Percentage", "Estimation Difference", "Percentage Error"] 76 results = [] 77 for user in df["user"].unique(): 78 print(f"Processing user {user}") 79 df_user_specific = df[df["user"] == user] 80 81 optimizer = PrivacyUtilityOptimizer(df_user_specific, k, m, algorithm) 82 e, _, _, data_table = optimizer.utility_error(Lp, p, metric) 83 84 data_table = pd.DataFrame(data_table, columns=headers) 85 results.append({"e": e, "Porcentual Error Table": data_table}) 86 87 results_df = pd.DataFrame(results) 88 89 for index, result in results_df.iterrows(): 90 print(f"\nUser: {df['user'].unique()[index]}, ϵ:{result['e']}, k:{k}, m:{m}") # Imprimir el usuario 91 print(tabulate(result["Porcentual Error Table"], headers='keys', tablefmt='fancy_grid'))
Executes the general method for optimizing privacy and utility trade-offs.
Steps:
- Selects the error metric to optimize (MSE, LP, or Percentage Error).
- Identifies the user with the most data in the dataset.
- Calculates k and m values using the IndividualMethod class.
- Executes no-privacy and private algorithms.
- Optimizes privacy-utility trade-off for each user.
Args: df (pd.DataFrame): The dataset containing user data with frequency values.