Example input
Prepare Phyton script for extracting such data: "TREATS", "BISCUITS" , "CHEWS". Don't use operator IN cuz in columns can contain 'float' .
Example output
import pandas as pd
# Define the mapping for snack types
snack_type_map = {
"TREATS": ["FRIANDISE"],
"BISCUITS": ["BISCUIT", "BISC"],
"CHEWS": ["CHEWS", "CHEW"]
}
# Read the excel file
df = pd.read_excel('data.xlsx')
# Define a function to check for snack type
def get_snack_type(row):
# Check FORM_TYPE column first
if isinstance(row['FORM_TYPE'], str):
for snack_type, keywords in snack_type_map.items():
for keyword in keywords:
if keyword in row['FORM_TYPE']:
return snack_type
# Check SKU_DESCRIPTION column next
if isinstance(row['SKU_DESCRIPTION'], str):
for snack_type, keywords in snack_type_map.items():
for keyword in keywords:
if keyword in row['SKU_DESCRIPTION']:
return snack_type
# Return empty string if no match found
return ''
# Apply the function to create a new column for harmonized snack type
df['SNACK_TYPE_HARMONIZED'] = df.apply(lambda row: get_snack_type(row), axis=1)
# Save the updated dataframe to a new excel file
df.to_excel('data_harmonized.xlsx', index=False)