Actual Behavior
I am using graph-index-build-bftree-spherical-quantization based on benchmark example to build an index on Ada dataset. Tried different configurations, it shows Recall 0
Ls, KNN, Avg cmps, Avg hops, QPS - mean(max), Avg Latency, p99 Latency, Recall, Threads
=============================================================================================================================
50, 50, 1821.4697, 60.7184, 1361.5 (1399.9), 36914.7us (37347.6us), 82491.0us (83761us), 0, 56
100, 50, 2886.2495, 110.5933, 810.8 (816.8), 61367.2us (62271.0us), 133940.4us (136886us), 0, 56
Example Code
This is the conf file.
{
"search_directories": [
"https://github.com/ssd2/nash/anndata/ada/"
],
"jobs": [
{
"type": "graph-index-build-bftree-spherical-quantization",
"content": {
"build": {
"data_type": "float32",
"data": "ada_002_1000000_base_vectors.bin",
"distance": "squared_l2",
"max_degree": 64,
"l_build": 50,
"insert_retry": null,
"start_point_strategy": "medoid",
"alpha": 1.2,
"backedge_ratio": 1.0,
"num_threads": 56,
"multi_insert": null
},
"search_phase": {
"search-type": "topk",
"queries": "ada_002_1000000_query_vectors_10000.bin",
"groundtruth": "https://github.com/ssd2/nash/anndata/ada/ada_new_gt10",
"reps": 5,
"num_threads": [
56
],
"runs": [
{
"search_n": 50,
"search_l": [
50,
100
],
"recall_k": 10
}
]
},
"seed": 42,
"num_bits": 2,
"pre_scale": "reciprocal_mean_norm",
"transform_kind": "null",
"vector_store_config": {
"cb_size_byte": 67108864,
"leaf_page_size": 4096,
"cb_max_record_size": null,
"cb_min_record_size": null,
"read_promotion_rate": null,
"scan_promotion_rate": null,
"cb_copy_on_access_ratio": null,
"read_record_cache": null,
"cache_only": null
},
"neighbor_store_config": {
"cb_size_byte": 67108864,
"leaf_page_size": 4096,
"cb_max_record_size": null,
"cb_min_record_size": null,
"read_promotion_rate": null,
"scan_promotion_rate": null,
"cb_copy_on_access_ratio": null,
"read_record_cache": null,
"cache_only": null
},
"quant_store_config": {
"cb_size_byte": 67108864,
"leaf_page_size": 4096,
"cb_max_record_size": null,
"cb_min_record_size": null,
"read_promotion_rate": null,
"scan_promotion_rate": null,
"cb_copy_on_access_ratio": null,
"read_record_cache": null,
"cache_only": null
}
}
}
]
}
Dataset Description
Please tell us about the shape and datatype of your data, (e.g. 128 dimensions, 12.3 billion points, floats)
- Dimensions: 1536
- Number of Points: 1M
- Data type: float32
Your Environment
- DiskANN version (or commit built from): Commit 3aa44ac
Additional Details
The similar configuration works on Sift 100k dataset.
But on wikipedia-cohere dataset (768 dim), it again shows Recall 0
Actual Behavior
I am using graph-index-build-bftree-spherical-quantization based on benchmark example to build an index on Ada dataset. Tried different configurations, it shows Recall 0
Example Code
This is the conf file.
Dataset Description
Please tell us about the shape and datatype of your data, (e.g. 128 dimensions, 12.3 billion points, floats)
Your Environment
Additional Details
The similar configuration works on Sift 100k dataset.
But on wikipedia-cohere dataset (768 dim), it again shows Recall 0