Test:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col
# Initializing Spark session (if not already initialized)
spark = SparkSession.builder.appName("databricks_example").getOrCreate()
# Creating some synthetic data for storesDF
data = [
{"storeId": 1, "numberOfEmployees": 10, "sqft": 500},
{"storeId": 2, "numberOfEmployees": 15, "sqft": 750},
{"storeId": 3, "numberOfEmployees": 8, "sqft": 400}
]
storesDF = spark.createDataFrame(data)
# Option A:
try:
df_a = storesDF.withColumn("employeesPerSqft", col("numberOfEmployees") / col("sqft"))
df_a.show()
print("Option A works")
except Exception as e:
print("Option A doesn't work:", str(e))