Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions sqlite-vec.c
Original file line number Diff line number Diff line change
Expand Up @@ -3012,13 +3012,19 @@ int vec0_parse_vector_column(const char *source, int source_length,
token.token_type != TOKEN_TYPE_IDENTIFIER) {
return SQLITE_EMPTY;
}
if (sqlite3_strnicmp(token.start, "float", 5) == 0 ||
sqlite3_strnicmp(token.start, "f32", 3) == 0) {
// Match the full identifier, not just a prefix: `sqlite3_strnicmp` only
// compares the given number of bytes, so a bare prefix check would coerce
// typos and lookalikes (e.g. `float16`, `bitcoin`) to a real type instead of
// rejecting them.
const int typeLength = token.end - token.start;
if ((typeLength == 5 && sqlite3_strnicmp(token.start, "float", 5) == 0) ||
(typeLength == 7 && sqlite3_strnicmp(token.start, "float32", 7) == 0) ||
(typeLength == 3 && sqlite3_strnicmp(token.start, "f32", 3) == 0)) {
elementType = SQLITE_VEC_ELEMENT_TYPE_FLOAT32;
} else if (sqlite3_strnicmp(token.start, "int8", 4) == 0 ||
sqlite3_strnicmp(token.start, "i8", 2) == 0) {
} else if ((typeLength == 4 && sqlite3_strnicmp(token.start, "int8", 4) == 0) ||
(typeLength == 2 && sqlite3_strnicmp(token.start, "i8", 2) == 0)) {
elementType = SQLITE_VEC_ELEMENT_TYPE_INT8;
} else if (sqlite3_strnicmp(token.start, "bit", 3) == 0) {
} else if (typeLength == 3 && sqlite3_strnicmp(token.start, "bit", 3) == 0) {
elementType = SQLITE_VEC_ELEMENT_TYPE_BIT;
} else {
return SQLITE_EMPTY;
Expand Down
44 changes: 44 additions & 0 deletions tests/test-column-type-parse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import sqlite3

import pytest

# Element-type spellings that vec0 must accept in a vector column definition.
# `float32` is undocumented but has always been accepted (it prefix-matched
# "float"), so it stays supported to avoid a silent regression.
VALID_TYPE_DEFS = [
"float[2]",
"f32[2]",
"float32[2]",
"int8[2]",
"i8[2]",
"bit[8]",
]

# Malformed type names that merely share a prefix with a valid element type.
# vec0 used a prefix-only strnicmp match and silently coerced these to the
# prefix's type (e.g. `float16` -> float32, `bitcoin` -> bit). That hides typos
# and would silently shadow real future types like float16/bfloat16, so the
# parser must reject any identifier that is not an exact element-type spelling.
INVALID_TYPE_DEFS = [
"floaty[2]",
"floating[2]",
"float16[2]",
"f32x[2]",
"int8_t[2]",
"int8garbage[2]",
"i8x[2]",
"bitcoin[2]",
"bits[2]",
"bfloat16[2]",
]


@pytest.mark.parametrize("type_def", VALID_TYPE_DEFS)
def test_valid_vector_column_types_accepted(db, type_def):
db.execute(f"create virtual table t using vec0(a {type_def})")


@pytest.mark.parametrize("type_def", INVALID_TYPE_DEFS)
def test_malformed_vector_column_types_rejected(db, type_def):
with pytest.raises(sqlite3.OperationalError):
db.execute(f"create virtual table t using vec0(a {type_def})")
4 changes: 2 additions & 2 deletions tests/test-loadable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1320,7 +1320,7 @@ def test_vec0_text_pk():
create virtual table t using vec0(
t_id text primary key,
aaa float[1],
bbb float8[1],
bbb float[1],
chunk_size=8
);
"""
Expand Down Expand Up @@ -1437,7 +1437,7 @@ def test_vec0_best_index():
"""
create virtual table t using vec0(
aaa float[1],
bbb float8[1]
bbb float[1]
);
"""
)
Expand Down