JSA通过ffi调用duckdb进行SQL查询——非odbc
创作者俱乐部成员
论坛里很多朋友都希望wps可以支持duckdb
这样就可以极大提升wps查询外部数据源的效率
今天在linux里尝试通过ffi调用libduckdb.so
直接上代码:
| 📌 | function aa() { const OutPointer = ffi.Buffer("pointer") const DuckDBResult = ffi.Struct([ { name: "column_count", type: "size_t" }, { name: "row_count", type: "size_t" }, { name: "rows_changed", type: "size_t" }, { name: "columns", type: "pointer" }, { name: "error_message", type: "string" }, { name: "internal", type: "pointer" } ]) const ResultBufferType = ffi.Buffer(DuckDBResult) const duckdb = ffi.LoadLibrary("libduckdb", { duckdb_open: { returnType: "int32", parameters: ["pointer", OutPointer] }, duckdb_close: { returnType: "void", parameters: [OutPointer] }, duckdb_connect: { returnType: "int32", parameters: ["pointer", OutPointer] }, duckdb_disconnect: { returnType: "void", parameters: [OutPointer] }, duckdb_query: { returnType: "int32", parameters: ["pointer", "string", ResultBufferType] }, duckdb_destroy_result: { returnType: "void", parameters: [OutPointer] }, duckdb_row_count: { returnType: "size_t", parameters: [ResultBufferType] }, duckdb_column_count: { returnType: "size_t", parameters: [ResultBufferType] }, duckdb_column_name: { returnType: "string", parameters: [ResultBufferType, "size_t"] }, duckdb_value_varchar: { returnType: "string", parameters: [ResultBufferType, "size_t", "size_t"] } }) let dbBuffer = new OutPointer(); let status = duckdb.duckdb_open(null, dbBuffer); if (status !== 0) { throw new Error(`打开数据库失败,状态码: ${status}`); }
let dbHandle = dbBuffer.read(); console.log("数据库打开成功,句柄地址:", dbHandle.ToBigInt().toString(16));
let connBuffer = new OutPointer(); status = duckdb.duckdb_connect(dbHandle, connBuffer); if (status !== 0) { throw new Error(`创建连接失败,状态码: ${status}`); }
let connHandle = connBuffer.read(); console.log("连接创建成功");
let resultBuffer = new ResultBufferType(); status = duckdb.duckdb_query(connHandle, "SELECT * FROM '/home/wils/a.csv'", resultBuffer); if (status !== 0) { throw new Error(`执行 SQL 失败,状态码: ${status}`); } let rowCount = duckdb.duckdb_row_count(resultBuffer); console.log(`查询成功,共 ${rowCount} 行`); let colCount = duckdb.duckdb_column_count(resultBuffer); console.log(`查询成功,共 ${colCount} 列`);
let columnNames = []; for (let i = 0n; i < colCount; i++) { columnNames.push(duckdb.duckdb_column_name(resultBuffer, i)); } console.log("列名:", columnNames.join(", "));
for (let row = 0n; row < rowCount; row++) { let rowData = []; for (let col = 0n; col < colCount; col++) { let value = duckdb.duckdb_value_varchar(resultBuffer, col, row); rowData.push(value ?? "NULL"); } console.log(`行 ${row}:`, rowData.join(" | ")); }
duckdb.duckdb_destroy_result(resultBuffer); duckdb.duckdb_disconnect(connBuffer); console.log("连接已关闭"); duckdb.duckdb_close(dbBuffer); console.log("数据库已关闭"); } |
基本都是让ai帮忙写的,duckdb的接口很清晰
只是发现"size_t"类型的参数似乎只能传递bigint,所以这里的行列数字写成0n
另外,这里只是把所有数据当成varchar读出来,如果整列读取会快很多,但需要判断每列类型分别处理,会多写很多代码
另外,这里没有严谨的处理报错,实际使用时需要更严格一些
最后,希望感兴趣的朋友多来尝试交流,一起完善这个方法