Function Examples

Function 1: dot product of two vectors double[] and double[]


    import numpy as np

    def dot_product(vector1, vector2):
      return np.dot(np.array(vector1), np.array(vector2))
    

Function 2: dot product of one vectors double[] and one vectors varchar


    import numpy as np

    def dot_product2(vector1, vector2):
      d_vector = [float(x) for x in vector2.split(',')]
      return np.dot(np.array(vector1), np.array(d_vector))
    

Register UDF: register both dot product function


    import duckdb
    from duckdb.typing import VARCHAR, DOUBLE

    duckdb.con.create_function("dot_product", dot_product,
        [duckdb.array_type(float),
         duckdb.array_type(float)], 
        DOUBLE, side_effects=True)
    duckdb.con.create_function("dot_product2", dot_product2,
        [duckdb.array_type(float),
         VARCHAR],
        DOUBLE, side_effects=True)
    

Run UDF: execute the query with both dot product function


    import duckdb
    from duckdb.typing import VARCHAR, DOUBLE

    vectors1 = ', '.join(str(value) for value in vectors)
    vector_columns = ', '.join([str(f"\"{i}\"") for i in range(0, 384)])

    start_time = time.time()
    duckdb.con.execute(f"""SELECT {col1, col2},
            dot_product( [{vector1}], [{vector_columns}]) AS similarity
            FROM {tablename} ORDER BY similarity DESC""").fetchdf()
    end_time = time.time()
    print(f"Time {end_time - start_time}")

    start_time = time.time()
    duckdb.con.execute(f"""SELECT {select},
            dot_product2( [{vector1}], vectors_col_name ) AS similarity
            FROM {tablename} ORDER BY similarity DESC""").fetchdf()
    end_time = time.time()
    print(f"Time {end_time - start_time}")