diff --git a/.gitignore b/.gitignore index 4c790d0..98e224d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ -/target -/Cargo.lock -.DS_Store +target +Cargo.lock +/demo/data/env +.DS_Store \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index d193241..8cb485e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,29 +1,7 @@ -[package] -name = "rindex" -version = "0.1.0" -edition = "2021" -rust-version = "1.63" -description = "Rindex: reverse nearest neighbor search index for high-dimensional clustered datasets." -readme = "README.md" -documentation = "https://docs.rs/rindex" -homepage = "https://github.com/azizkayumov/rindex" -repository = "https://github.com/azizkayumov/rindex" -license = "Apache-2.0" -keywords = ["tree", "dynamic-connectivity"] -categories = ["algorithms", "data-structures"] -authors = ["Kayumov A.I. "] -exclude = ["./github"] +[workspace] -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -conv = "0.3.3" -ordered-float = "4.1.1" - -[dev-dependencies] -rand = "0.8" -criterion = "0.5.1" - -[[bench]] -name = "benchmark" -harness = false +members = [ + "lib", + "demo", +] +resolver = "2" diff --git a/README.md b/README.md index fcc1465..58ff6a5 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,8 @@ +[![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/azizkayumov/rindex/ci.yml?style=plastic)](#) +[![crates.io](https://img.shields.io/crates/v/rindex)](https://crates.io/crates/rindex) + # rindex -Rindex: reverse nearest neighbor search index for high-dimensional clustered datasets. \ No newline at end of file +Rindex: fully dynamic nearest neighbor search index for high-dimensional clustered datasets. + +## License +This project is licensed under the [Apache License, Version 2.0](LICENSE.md) - See the [LICENSE.md](https://github.com/azizkayumov/rindex/blob/main/LICENSE) file for details. diff --git a/benches/benchmark.rs b/benches/benchmark.rs deleted file mode 100644 index 47c2566..0000000 --- a/benches/benchmark.rs +++ /dev/null @@ -1,41 +0,0 @@ -use criterion::{criterion_group, criterion_main, Criterion}; -use rand::{rngs::StdRng, Rng, SeedableRng}; -use rindex::Index; - -const K: usize = 10; -const SEED: u64 = 0; -const N: usize = 10000; - -fn benchmark(criterion: &mut Criterion) { - let mut group = criterion.benchmark_group("rknn"); - group.sample_size(10); - - group.bench_function("SSTree", |b| b.iter(|| bench_sstree())); - group.bench_function("Linear", |b| b.iter(|| bench_linear())); -} - -criterion_group!(benches, benchmark); -criterion_main!(benches); - -fn bench_sstree() { - let mut tree = rindex::SSTree::new(K); - let pts = dataset(); - for p in pts { - tree.rknn(p); - tree.insert(p); - } -} - -fn bench_linear() { - let mut linear = rindex::LinearIndex::new(K); - let dataset = dataset(); - for point in dataset { - linear.rknn(point); - linear.insert(point); - } -} - -fn dataset() -> Vec<[f64; 2]> { - let mut rng = StdRng::seed_from_u64(SEED); - (0..N).map(|_| [rng.gen(), rng.gen()]).collect() -} diff --git a/demo/Cargo.toml b/demo/Cargo.toml new file mode 100644 index 0000000..e9904c0 --- /dev/null +++ b/demo/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "demo" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +rindex = { path = "../lib" } +rand = "0.8" diff --git a/demo/data/plot_tree.py b/demo/data/plot_tree.py new file mode 100644 index 0000000..c878717 --- /dev/null +++ b/demo/data/plot_tree.py @@ -0,0 +1,42 @@ +import matplotlib.pyplot as plt + + +# Read the data +lines = open("tree.csv").readlines() + +points = {} +bubbles = {} +for row in lines: + s = row.split(",") + id = int(s[0]) + height = int(s[1]) + if height == 0: + x, y = float(s[-2]), float(s[-1]) + points[id] = (x, y) + elif height == 1: + radius = float(s[-3]) + x, y = float(s[-2]), float(s[-1]) + children = s[3].split("+") + bubbles[id] = (x, y, radius, children) + +fig, ax = plt.subplots() +colors = ['r', 'g', 'b', 'y', 'c', 'm'] +for id, (x, y, radius, children) in bubbles.items(): + color = colors[id % len(colors)] + circle = plt.Circle((x, y), radius, color='b', alpha=0.25) + ax.add_artist(circle) + for child in children: + child = int(child) + x, y = points[child][0], points[child][1] + plt.scatter(x, y, color='b', s=2, marker='+') + +# remove coordinate axis names +plt.xticks([]) +plt.yticks([]) +plt.gca().set_aspect('equal') + +title = "N = " + str(len(points)) + ", L = " + str(len(bubbles)) +plt.title(title) + +# set axis limits +plt.savefig("tree.png", dpi=300) \ No newline at end of file diff --git a/demo/data/requirements.txt b/demo/data/requirements.txt new file mode 100644 index 0000000..937b6e2 --- /dev/null +++ b/demo/data/requirements.txt @@ -0,0 +1,13 @@ +contourpy==1.2.1 +cycler==0.12.1 +fonttools==4.53.1 +importlib_resources==6.4.2 +kiwisolver==1.4.5 +matplotlib==3.9.2 +numpy==2.0.1 +packaging==24.1 +pillow==10.4.0 +pyparsing==3.1.2 +python-dateutil==2.9.0.post0 +six==1.16.0 +zipp==3.20.0 diff --git a/demo/data/sparse.csv b/demo/data/sparse.csv new file mode 100644 index 0000000..b9beb51 --- /dev/null +++ b/demo/data/sparse.csv @@ -0,0 +1,475 @@ +x,y,color +414.04238417806994,360.5391356447236,2 +210.0560306725783,267.59780914534224,0 +416.36828816805166,357.08505989754764,2 +394.0726323231333,51.35105316827139,1 +363.8039241536012,186.69315485473777,1 +89.21561076775261,215.9288757903417,0 +238.9278343497511,193.2505205236532,0 +414.10867609705593,106.17193924768036,1 +115.12996533600892,149.12435045236316,0 +519.6678916059598,390.8431058719641,2 +513.6246625754995,394.30197532407806,2 +189.298373880358,182.14804777651307,0 +403.3607805853801,151.9502416304573,1 +458.8240903972122,218.1333260385274,1 +438.8218079721212,334.0561262412899,2 +460.6490718766141,313.43885272616257,2 +133.77098400699185,111.61910694723552,0 +439.7864842714855,222.41636258777385,1 +422.9512434075008,56.76985469879551,1 +466.0268654513272,208.4885368739728,1 +442.9442793890141,434.4985714469241,2 +454.9407141329503,388.9393831465803,2 +403.2741418491526,308.81087256400826,2 +380.18942352839576,300.98098278267315,2 +487.0138145212319,449.13406865462673,2 +419.9380728320948,168.98031298469283,1 +412.8826289439411,175.3214846661254,1 +174.29291155641846,146.5775131487732,0 +409.6393589482627,351.33243279009827,2 +239.5880661383232,349.65545377031765,2 +369.1098862443752,156.48588742474556,1 +431.4454111867569,130.05679143398106,1 +193.43674692791603,159.2712672132954,0 +460.55986035595504,376.433530142359,2 +313.98658389638126,437.97426574465067,2 +436.9068502044666,200.38990851930464,1 +425.24719526481016,202.633281672981,1 +497.3213130779319,307.4011671409872,2 +524.7502866367305,332.11996320524844,2 +427.8943399100643,134.56942896620865,1 +96.83866404377515,150.37529130719793,0 +134.5912904178421,124.79908037434816,0 +448.5563737845117,166.89497227132944,1 +293.3541789873959,443.107404132948,2 +371.8644735290759,136.06684447507808,1 +358.259690476968,325.7833660686555,2 +327.65296341350216,354.62900433680574,2 +267.92541379507503,362.4562154827748,2 +222.2349229733876,158.6039900764171,0 +407.18399443246415,163.6115582974022,1 +483.6491672009646,359.1370307228643,2 +445.4265210809703,198.95919268479972,1 +327.3971174914086,440.6240342178062,2 +391.4443871428842,103.42371784799268,1 +333.8153890379068,447.5176489244592,2 +413.09926052306,134.89085696403475,1 +437.1610131339835,404.3844914483944,2 +509.5073948118843,189.412966382646,1 +247.81963416489975,403.989880728725,2 +122.27358975895432,237.69775689823877,0 +465.6945600932888,172.184827676236,1 +153.84906611265404,177.28534541323765,0 +150.5086992589832,187.60788906169935,0 +420.3674734592487,334.60334535151344,2 +521.1590791699823,334.5746378144926,2 +204.6175181397595,240.83452823764537,0 +395.5633788311932,456.4853980328724,2 +235.78305434630025,373.5686806447727,2 +429.0078826432695,129.6096935050652,1 +490.469590578802,115.81677281672184,1 +113.66253850250918,83.17134645798161,0 +163.11330286001714,154.6423290539098,0 +457.8052337141568,80.22135732814034,1 +303.06492102623884,479.2823350336996,2 +227.35907098387088,168.0048725637591,0 +142.16893841697106,201.06823071678,0 +396.811716789951,75.52419093498092,1 +406.7710869442661,195.43467719908224,1 +394.0683877971658,197.19073151575063,1 +534.366158360098,383.947368678537,2 +298.9817295966399,452.8067839843614,2 +345.7278217133453,398.14359409236096,2 +403.9527899348756,82.3201576792233,1 +325.4245805156956,378.90780265396006,2 +413.7178254348391,232.79085427906472,1 +455.21724392043006,192.14291967627997,1 +192.207717942689,255.9978341673592,0 +442.4688627902266,377.25565473274287,2 +301.93600699148226,358.2604998620988,2 +350.47271097474766,444.53685910932,2 +439.0456212424375,174.40489243744383,1 +167.5863600623344,231.92798091681215,0 +167.3282017507935,290.0132576728731,0 +431.3163928845697,89.39032575345709,1 +189.94500946740263,80.50807303331821,0 +418.7333814771656,118.89364528437952,1 +451.2100296705124,115.49030483275406,1 +351.2092302820438,387.14132621312217,2 +465.82700381005776,367.65024772770414,2 +458.1434553144955,491.6548397397195,2 +156.78663242377888,192.7762092817314,0 +374.780210777955,344.47038172438994,2 +411.52586396959913,97.7363893603878,1 +376.5791427033611,398.0048948305208,2 +485.0988432503182,451.2155620374061,2 +170.1319034684904,227.6321103264839,0 +439.5139499068096,213.47036576321565,1 +431.8902801128049,95.54178833414169,1 +472.5884957428221,421.85577216096925,2 +448.818587886061,159.22931962546028,1 +189.5208987392201,124.68252952114926,0 +464.4136205355094,416.4834560234735,2 +412.28962266621664,103.56083122063885,1 +472.1078595968147,192.516094673388,1 +239.40119475009584,128.37807247831716,0 +490.4534228150477,318.1361837612505,2 +326.76590574218386,373.5901768727766,2 +185.90707242093293,274.07320668414553,0 +475.7896465031374,87.84522669843089,1 +468.6798525458752,151.07629926535714,1 +474.9222277394821,147.81974362104393,1 +503.3285785985965,118.47644247631352,1 +461.0445996424092,371.5065506567357,2 +433.8293830648768,113.83562360695235,1 +229.93674981278636,205.4808890372896,0 +412.2273623943956,94.74801483459896,1 +460.0516220499396,158.91727457151114,1 +138.56425306258438,194.02579207862368,0 +353.7763792150048,396.13317646158623,2 +509.0058362572503,362.8968019883944,2 +164.38993425228472,187.6665338807828,0 +530.074293660455,359.5626972386202,2 +483.2458819600859,138.5624972803692,1 +159.78139617884085,241.8427679152569,0 +560.4508335424356,314.2375233289304,2 +357.6214681895822,390.4090898523758,2 +526.6009441939992,310.27926299763243,2 +284.18256664898576,430.4410458552411,2 +493.65806060787946,387.3168983105709,2 +457.9999162678287,146.1290079388062,1 +519.5639853681644,353.8389670029957,2 +427.16543695896576,142.72192067742614,1 +453.7532868826727,105.61974665615544,1 +416.4462532905093,147.96287668880555,1 +314.01080290551755,418.4318755756857,2 +417.603755381552,227.2558227411826,1 +355.8295356490492,378.18488620549016,2 +188.15006649461708,96.52432384505462,0 +494.6918604629075,368.8895603801452,2 +311.19238953275743,361.7605192668958,2 +325.62796835351173,481.0221627031693,2 +430.9983785797966,166.97589869402674,1 +490.3662586803688,192.9438527061447,1 +365.0270176615879,120.85132988588364,1 +177.58678544354132,166.20462272526282,0 +506.6317761613888,424.8634906167098,2 +140.83614613145858,117.62385173714657,0 +540.6258438304378,162.07404887469437,1 +444.5726372469785,55.62932897132288,1 +325.0206956813249,473.1715230531072,2 +390.1467776420142,35.129761262293144,1 +75.58872175410498,227.5859421400248,0 +489.5218325178779,128.48521490833653,1 +475.73878423626513,173.90981873828548,1 +413.620702518353,85.61515674416358,1 +528.6628166467968,374.2975673782873,2 +170.97917930680026,239.40222098876964,0 +505.1009229529137,164.07807092385696,1 +442.112815345234,113.08051885831966,1 +429.1717781385488,113.30619390888006,1 +498.513477511849,164.91970386735252,1 +478.87551052358566,78.1165736718818,1 +236.85770963558028,144.86999922247986,0 +428.2276276642137,202.15627332791985,1 +368.6423331537606,461.1846354625873,2 +317.90378565173285,399.7706536117664,2 +386.52977288680506,163.0380037500385,1 +164.03982867095678,247.87324705742716,0 +471.0918349331844,311.2845483410806,2 +362.4528167116694,355.7886420359198,2 +119.88473518787433,81.8559902575523,0 +91.393246009725,125.50958272258542,0 +90.16137328884145,128.92334153131355,0 +111.48730226283818,259.3711613399081,0 +460.0981454746776,193.93022880945097,1 +166.30442566576198,145.72126856457703,0 +445.94861136632926,370.3754684664657,2 +371.0459339601925,394.91281527783065,2 +191.42099187542675,156.70882036311684,0 +371.7386353221689,104.63557507398036,1 +480.4754237401369,371.839700606847,2 +132.5292397000211,126.92635928477134,0 +319.66223530212056,328.10968404600646,2 +414.6501367838347,153.64142954144194,1 +406.531996036688,358.575909195224,2 +423.3158665170892,88.29232867011864,1 +474.26213451284633,184.31306839809508,1 +394.9064902844664,359.4600327342916,2 +446.3127771825351,162.90197352483688,1 +324.2180357402434,157.6381506308449,1 +474.740153856816,139.61433390987634,1 +206.90760402938724,206.53330499815797,0 +461.8916493622356,132.25996089339273,1 +473.85838838251817,440.25885108346114,2 +418.9978568156852,147.8961143432541,1 +406.94290901861547,417.84198298340186,2 +157.65284382485245,125.7772212021002,0 +273.46993404521106,431.6205843835691,2 +372.736947215419,416.921692141038,2 +120.56255353264818,190.55505098480535,0 +438.3704039565786,371.4412178801774,2 +329.32550907520414,344.69622260616217,2 +357.27181410120755,299.4421903389697,2 +145.92639031627195,252.48208933863884,0 +349.39183902936406,293.88773087622064,2 +202.39043438920805,92.6955836955823,0 +117.51744234167758,255.5851251574628,0 +536.6766084970541,372.0880903774244,2 +302.58866986821823,337.24067875579755,2 +470.6403410283015,140.44489811201407,1 +436.7820478349354,172.56692914135766,1 +474.13125062005423,469.4378525557747,2 +371.16144045307976,150.79752517634878,1 +384.4756054246833,183.65531564317504,1 +148.70316087414636,212.39233664334847,0 +359.11223156350843,461.40979967174286,2 +460.5788782820063,198.1865043868739,1 +289.6550408813298,431.3150961294129,2 +418.6357374442614,74.59371097960926,1 +364.6055992619544,110.50096538797824,1 +387.397962965656,306.7163912819822,2 +432.7857114376682,378.2405850038306,2 +482.7284698972424,172.31556887703692,1 +377.25449535457,94.87258960360764,1 +384.0436529033775,122.93871019523638,1 +364.5841843260106,138.78900963943042,1 +441.5184840730327,136.611695828566,1 +442.2540630489696,103.571746676162,1 +329.273396719614,331.0912363603958,2 +400.6001701120873,476.4451413752557,2 +328.1473887964802,316.73118559777515,2 +384.08286662188914,476.5339865391573,2 +279.623998255432,393.68990521123374,2 +341.58068132117273,433.1186963282307,2 +430.9437136789535,432.2371973625314,2 +185.1690617970002,164.09511816020006,0 +76.49329640309685,234.92519545726012,0 +129.8090258337528,102.50637441976616,0 +343.0643701533032,422.4889051798568,2 +152.56307604183127,116.97403120216268,0 +464.6008418627993,177.44876298863065,1 +369.5740459841992,432.9628484618792,2 +393.71402202107583,132.11007914996105,1 +394.9999022602636,178.26268420308867,1 +462.9102569223694,68.7991792779258,1 +399.08060241170057,131.8073532238716,1 +196.40707150814632,240.72477660674505,0 +356.2362246060975,342.40388952665137,2 +86.42643293613973,191.05312586057008,0 +460.077963336692,413.9797242981838,2 +97.06042678339188,227.65626965277423,0 +396.83441281262105,195.04341025450373,1 +364.8156822592848,487.1519122758671,2 +331.2201977091321,168.3424935429187,1 +140.3007546443187,280.8932786950205,0 +156.98254592108657,109.40894370265268,0 +480.73679238621634,146.85117837258932,1 +155.33677681382727,274.38764335572625,0 +427.5832206448943,369.858208621742,2 +422.2351486792562,514.406973044994,2 +363.026654951499,350.62161467079386,2 +198.1318181313809,273.02639812543,0 +388.40546648841433,64.34475819409226,1 +378.9821855122209,82.43705667328128,1 +352.1192266940969,344.97620860735344,2 +492.0820724192613,126.4901549538582,1 +329.0770465785317,341.691463093454,2 +468.19696219047614,151.11409962101197,1 +531.97022594828,297.3412104734574,2 +390.4312050430028,153.27048487690985,1 +493.5938673327805,113.48429282022236,1 +230.4435713383629,124.94620201707671,0 +442.63325508731145,187.1687687251893,1 +382.5311381441364,144.83358597113704,1 +378.67832811193983,294.2640905301913,2 +87.22301099503112,238.2193475913096,0 +199.98205172881688,82.94124346006988,0 +427.9061805805475,360.5701689200855,2 +150.9376753284776,170.46411295166894,0 +277.62370916196267,457.4178017656107,2 +400.2564313144812,345.9600991416755,2 +339.7406207712995,284.3915278437552,2 +427.1067809575741,450.901940387704,2 +477.3649327450365,133.02724780162868,1 +358.5217830732459,87.06292662200991,1 +464.9152306866656,186.01029520339088,1 +352.54120958785325,290.32901434858206,2 +482.47795539265707,343.05323273929537,2 +105.42386495809394,232.66115960633817,0 +216.2797832281072,205.45784119222185,0 +471.48694533171,56.43185592439944,1 +240.7407109923371,148.35988385723078,0 +495.0505654505161,138.00410737050814,1 +424.142116670629,384.4083826821707,2 +108.23164811542654,187.4280717078823,0 +468.3556233378248,162.92872473739845,1 +487.5595954433536,373.5828448918823,2 +401.99026375437467,134.73258773222432,1 +146.38159755833007,253.2415647801332,0 +354.51200766706336,106.49368457488696,1 +399.8211975719054,399.0817183466514,2 +272.519326288372,438.1544108828985,2 +471.1783178148562,401.93908949476366,2 +404.6243150249356,163.1409145908786,1 +426.2099512410161,186.9801528962987,1 +344.36561905905984,340.7284164253813,2 +317.177376007614,442.2922259538629,2 +171.86629050950765,159.35166486522908,0 +484.4284950977011,128.40561593766392,1 +293.59208366949645,367.259466560777,2 +103.30561197779924,192.9590402518056,0 +453.599859238842,154.89620748885596,1 +425.6889185012368,87.51925040255232,1 +456.3023911656518,377.35469727090697,2 +418.4878347907079,175.05342108105827,1 +468.13841123901545,402.8087155206015,2 +179.75421883868242,134.6966174719375,0 +277.24008802507444,357.253938507732,2 +338.6118603329068,339.9466816890412,2 +322.5203542610964,128.90700915209334,1 +241.44305520473773,99.73841366684796,0 +368.9369166964261,163.3330107035203,1 +369.1908788605759,140.83944073311574,1 +542.6371243016852,299.69312783284784,2 +492.11253605691377,363.012212476496,2 +455.3929879588037,203.97055053675308,1 +141.74136344337217,264.2376676622833,0 +278.6490119141996,379.834739656696,2 +522.5997640784594,80.26433180637375,1 +435.7346389772615,143.63061393077248,1 +439.9173468633878,303.83340493581784,2 +281.21680595803787,313.7670944123464,2 +105.39307049982284,200.2105471334474,0 +386.32822689461887,433.5714629623211,2 +293.42809122623567,389.7363519618398,2 +404.2045708011394,385.7384189482218,2 +480.6258310496252,381.8306629833076,2 +275.3354890942726,371.5491590029612,2 +427.86516659871774,151.75944049328592,1 +334.1936766710167,507.7279478327628,2 +404.8063303164831,319.36830610724286,2 +397.16889604867066,129.73399454395656,1 +192.18191541415823,138.299900470954,0 +311.03430815680406,490.41584389238346,2 +167.6809468958673,187.422788837492,0 +508.6593111557706,425.5501666012357,2 +511.88791606171577,442.0110666068644,2 +411.8842855138233,201.5411164229316,1 +221.13360244759048,208.20645670164544,0 +174.46889125333405,141.15128519670893,0 +408.7664725791605,454.0872325003794,2 +420.7228870575181,100.6103731366529,1 +467.18006893943175,164.96284672104554,1 +470.1602845935876,167.53123429567148,1 +107.3058037793134,144.36918777810268,0 +461.2131803683335,140.96351108873762,1 +460.1065025592437,94.2420690645576,1 +453.3607254595828,62.13620659059721,1 +168.47165390932122,101.58862701545476,0 +357.02533727243315,460.581578013666,2 +173.34648739457597,197.6494843507989,0 +460.1194307521736,462.2500669578926,2 +241.15835809869333,130.1217251536769,0 +206.25285393509773,214.97422400426683,0 +528.1168296720647,300.7401878369359,2 +332.1600722430182,454.0129529078691,2 +388.48238679355615,176.1278429379998,1 +491.82116415478816,180.9410456050965,1 +401.2120318624246,73.04951521515461,1 +272.64756293059486,390.46788531064647,2 +207.21829890011244,247.404639114608,0 +394.6597122215862,126.2256087263878,1 +460.51731907652095,315.6686295909567,2 +419.6692796263487,405.9001669109613,2 +149.66173505837935,298.3571727381597,0 +407.6221082598363,349.66567012234617,2 +373.3106331577088,344.5137402932216,2 +395.8653440354162,368.37572092149713,2 +390.7788463225394,148.18618592506465,1 +477.393465644115,140.4158274474044,1 +488.4746053894765,166.2864524543,1 +415.8945650193349,87.01351496183818,1 +151.82699384413047,153.47823704015133,0 +199.2500178118568,176.22532763927563,0 +433.0062276253566,99.62331281082224,1 +128.14995931262638,166.39631043857514,0 +388.3943309195321,180.37888560080444,1 +503.28128250768174,151.79143178402586,1 +429.1885913335234,413.43512861555774,2 +383.4137292442492,368.8403789017949,2 +164.07546939423858,83.89800169204636,0 +121.74912774413372,137.24888645346095,0 +407.5858684276069,162.0077872666535,1 +91.11138514161988,235.95597404508976,0 +337.8255161464557,330.02771092804994,2 +521.4402476087098,367.69692266633183,2 +360.6467123186816,395.0772809229078,2 +393.1949812840851,320.9721347233317,2 +196.23502223053268,185.0722256483804,0 +233.0583076137779,249.0449907761412,0 +390.70393588147897,397.27389429086105,2 +426.2339682559275,154.197335004693,1 +477.1936488729609,416.98776150789047,2 +518.8880997352612,187.67207618253465,1 +312.7603025704821,339.35438939526324,2 +456.3903300965724,91.86871680618978,1 +373.6393952807529,460.06071666384736,2 +338.0292634700861,481.43332401475794,2 +379.7026675201303,327.82654867647364,2 +380.2940604006896,104.75390947894124,1 +345.1678898325412,438.248864419249,2 +268.67583571304647,382.5525180053983,2 +282.77792579259346,430.02328107956174,2 +246.71690107365043,433.5800324698892,2 +135.25253400342916,104.1038027340222,0 +476.986106621251,205.42427087409004,1 +468.0068711483408,115.16000971846046,1 +465.5218617344016,446.24308160495,2 +342.6805983542533,474.70663409709255,2 +261.88029466777584,174.48124793393842,0 +405.6590293565299,143.6052865431502,1 +134.83409071743472,141.61060424413242,0 +233.39130881628623,400.34558316372994,2 +178.0065556251946,231.5924546565292,0 +112.2390993438628,214.06012328242727,0 +406.6579823807265,147.2008809604397,1 +378.4780049243903,123.33739859581408,1 +167.5391730759437,190.5264261378084,0 +404.129084679752,310.12093717117966,2 +431.74414818012986,218.95839845203795,1 +184.802152991831,118.83793397565958,0 +406.5085495788186,123.1718039592256,1 +383.8235911302683,469.361517627096,2 +270.53949705886754,379.8928396819855,2 +389.6523125760813,177.6328965362668,1 +373.5785496544379,176.44706126647333,1 +475.4699757895988,105.35725625670146,1 +479.3710637601944,103.17118177757558,1 +428.9534922536725,403.29266629952775,2 +465.3915463363931,127.33274138720095,1 +277.89422906374995,429.6257860755746,2 +406.01157244602615,298.22888264695507,2 +495.2574274574148,355.2359112106527,2 +394.6282438370328,94.15435377589536,1 +398.1957348251096,89.12936685328307,1 +364.91681852885984,344.3693127274331,2 +200.5933075180822,152.63460870788572,0 +389.7738367826205,173.78038133583863,1 +508.5282701965203,414.0036175869985,2 +464.84057018541466,88.05446697432153,1 +374.8099469265114,468.1080728749188,2 +158.31669381663392,176.25281388039508,0 +450.81437071363104,80.12516734876853,1 +202.0988456557099,123.02903824746,0 +414.1278569033559,443.3831928012276,2 +524.9634627751678,370.8861829228897,2 +376.09163141212713,481.49380077092593,2 +372.5288680120048,168.3166800708607,1 +358.2683511556894,397.18587178111466,2 +342.3814338713095,442.540528850403,2 +340.9380232467895,401.11803121420087,2 +413.5315212558364,151.62769815913748,1 +466.3845076232753,205.1282005327581,1 +461.4481920626599,191.57116187302503,1 diff --git a/demo/data/tree.csv b/demo/data/tree.csv new file mode 100644 index 0000000..19fb6d0 --- /dev/null +++ b/demo/data/tree.csv @@ -0,0 +1,407 @@ +123,4,18446744073709551615,28+122+140+204+298,337.23235702131194,358.2197148990928,251.90231818033692 +28,3,123,55+196+361+366,139.44625956943923,325.6459878290779,424.985178813892 +122,3,123,226+74+297+343,95.60909857172443,456.9954684071251,170.62371148057522 +140,3,123,139+8+243+254+335,125.4583884791285,159.16065585654053,179.62711658809366 +204,3,123,203+126+61+104+268,168.61279557163664,439.0022350260438,365.80020310444877 +298,3,123,137+235+4+340+383,93.84908836407303,406.96802628498097,112.86947650112238 +4,2,298,349+382,24.456535669707627,426.6469185301702,126.8906076277532 +8,2,140,207+85+113,58.77810827873039,207.89090339767367,134.11203400898046 +55,2,28,347+6+365,39.66543816711015,331.2803080901217,469.9653317287626 +61,2,204,150+51+238+192+332,65.14163100418612,435.755845977828,388.6920037720575 +74,2,122,73+225+175+265+294,59.949981359090835,480.99462897704433,168.01694646346263 +104,2,204,121+159+125+154,71.64464342633649,348.46211748411093,331.0826334031138 +126,2,204,11+60+230+396+406,58.673693270657054,412.2969844630643,341.49048144923603 +137,2,298,132+234+84,56.45730183733055,371.48637021891614,101.0712655150887 +139,2,140,181+14+242,38.75639050610336,101.08773481024384,217.55897421563952 +196,2,28,54+285+284,53.039527762405825,335.5737669172895,391.1371702306472 +203,2,204,202+88+249+282+355,84.03822732308194,505.94979652906176,350.25151724277487 +226,2,122,92+200+279+78,42.197021356998604,432.2693865066921,161.8750946450011 +235,2,298,19+26+107+389,45.646460557182664,384.56402278924025,150.35394466700714 +243,2,140,334+62+300+353,44.68891100987925,172.96117651411842,258.65061332066017 +254,2,140,118+239+179+266+346,58.91746460079178,133.0163543830371,130.50561314704478 +268,2,204,108+267+275,58.196430165497546,473.6505440026662,438.1089946679597 +297,2,122,12+120,39.143382334448006,487.46062895825594,123.08161326454733 +335,2,140,162+1+319,80.40970227901997,179.36275011984634,198.83028613260396 +340,2,298,66+302,33.89043944171564,455.0260390732853,92.38812455319474 +343,2,122,342+232+136+380,51.68870009383792,434.78727139527973,203.2679892316642 +361,2,28,360+195+35+182,88.33098919687635,282.98396329298697,416.938976577146 +366,2,28,166+184+253,75.35282766528744,382.62935016883443,454.3256701135987 +383,2,298,339+296+228+404,42.06434513842204,415.77880930172614,85.35695359504811 +1,1,335,29+289+250+397,20.653100957549018,168.76913683031012,184.40658830208548 +6,1,55,47+91,11.101639937346764,318.0275019190645,481.79368347274533 +11,1,126,31+100+93+351,27.707873999866838,413.03172592719386,310.53338019456214 +12,1,297,138+156+323,30.366988427729364,479.44755584676324,126.6615223857068 +14,1,139,180+57+152+401,13.364369828109439,87.4980862373181,229.18242212292142 +19,1,235,173+111+72+402,17.257870782425538,378.05408713374356,177.98604135113044 +26,1,235,188+0+59,12.09249607242549,395.9070328436223,125.28150289947284 +35,1,361,18+5+183,14.414181474313402,321.1080107156711,444.7598148687942 +51,1,61,65+130,7.022421229584888,402.0128841865224,392.4100686474366 +54,1,196,48+276+251+177,23.910519617610934,330.0367121357737,419.73507469663946 +60,1,126,68+223+148,12.404366391758655,397.00942187812126,357.93195093248806 +62,1,243,208+164+75,16.81962809247788,147.18797392480596,269.2543371297952 +66,1,340,79+80+348+307,20.753111358139897,461.7655270590302,81.11121492043152 +73,1,74,135+293+306+33,8.792263337768619,466.06679171826784,159.47128632382805 +78,1,226,17+198+329,6.331711673131653,435.60868255238984,171.31590675760944 +84,1,137,10+189+49+286,17.08023555225855,381.8677369556335,106.59690388158891 +85,1,8,216+15+7+345+369,23.090321903726498,231.610726605979,145.30935780668193 +88,1,203,134+273+281+367,15.548873959345112,489.88967092877624,372.23158357636567 +92,1,226,368+63+231+390+388,11.638144808675909,411.8558296034541,164.93914001610818 +107,1,235,171+197+373+384,10.951295168960694,369.1799961381354,145.5348166789007 +108,1,268,22+393+270+224,24.634970008643123,462.0212017265903,418.9116287879921 +113,1,8,102+236+129+387,15.513040571669258,186.07992803919035,156.66317972134638 +118,1,254,41+376+400+244+245,17.70573975914165,100.16581049153278,139.66035075831252 +120,1,297,77+46+262+119+304,11.329779166917229,492.26847282515155,120.93366779185165 +121,1,104,99+186+165,24.86081253924692,303.9842882075788,322.57965292197304 +125,1,104,13+37+158+247+241,24.83938143243652,363.9072917997572,354.69563918657167 +132,1,137,50+131,19.53089223672797,338.51618096407987,117.70034686349015 +136,1,343,280+142+106+394+328,13.954240966829122,459.20801320195585,204.18436750248142 +150,1,61,146+69+215+145,31.452219918947097,422.7199956841304,419.7571454326154 +154,1,104,147+193+95,32.105757450034844,369.1093357551171,297.36296730280355 +159,1,104,157+187+372,17.51233130144023,346.55077129689295,333.95027034546825 +162,1,335,219+3+30+299+89,37.94487411129946,219.85405406402887,211.6254389731861 +166,1,366,160+82,30.902574064545604,415.50081062920833,484.2471027726867 +175,1,74,27+143+357+169+240,12.235037748459927,465.4833275428492,184.4180418702193 +179,1,254,214+90+36+375+39,33.94284186755021,146.1853467550767,112.74345239057536 +181,1,139,117+211+128,9.86555963945683,110.69993787529131,190.31405431483108 +182,1,361,32+374+94,13.878745024277368,289.98653924866613,451.1106632943067 +184,1,366,252+53+112,8.03623922806124,367.13131999934063,460.8850505993925 +192,1,61,161+76,7.47577356784029,425.8626686577617,377.13329565195636 +195,1,361,283+194+185+213+96,27.278924879996975,284.07192621143884,415.3605967082593 +200,1,226,52+278+116,20.457497579073983,447.32615885538036,146.19387975167325 +202,1,203,23+259,7.003970118353692,529.2855850711396,303.8102367355449 +207,1,8,151+206+325,8.9394608720813,197.43916519514252,85.38163339632347 +225,1,74,190+201+40+271+362,17.584527032644115,494.5873530112265,180.10662748765532 +228,1,383,44+21+316+109,6.222857082739498,414.6617624475974,102.01988324133998 +230,1,126,229+2+386,5.56338547260282,407.93115441492904,353.1913373692228 +232,1,343,64+127+341+263,21.313542792137714,405.9709596987672,193.66224296640883 +234,1,137,58+98,10.199626079130987,383.6938260003176,73.39090743368678 +238,1,61,331+210+170,4.538921609756002,442.26262603771147,373.02411369312864 +239,1,254,9+105+20,7.755003946141147,167.09467301176224,153.23842082790532 +242,1,139,110+358,8.793186824830766,113.84872735852413,235.17945825228847 +249,1,203,248+103+310+378+38,47.422011250606865,479.998213288583,324.42009051526554 +253,1,366,212+174+97,11.636622468689485,376.2130733647457,427.81866785507947 +265,1,74,264+257,17.790700574512833,522.8633833916758,163.07605989927566 +266,1,254,246+178+221,15.911277087714979,121.11876650804544,89.17790371176669 +267,1,268,87+217+314+320+391,35.8755326467594,475.64915041772355,460.34023481419763 +275,1,268,34+56,15.236719194475482,491.9127125171749,420.9256260623001 +279,1,226,233+153+222,3.941940144165,447.895912951036,163.0087551405422 +282,1,203,260+167+398+399,13.44619481377253,524.0803822970886,385.8475043132166 +284,1,196,272+309+337+295+371,18.135518689644332,318.59436923912426,365.4296005985074 +285,1,196,291+220+124+322+315,20.00813899995141,356.9828084206674,393.96641628999316 +294,1,74,274+163+199+305,7.610901433403966,477.3863107785214,143.41957934650412 +296,1,383,115+326+227+311,6.588706164764839,428.05286450392515,90.18592329006744 +300,1,243,287+258+313,9.221422684482249,166.96415965137717,236.29236641017016 +302,1,340,277+70+256,7.921444908869855,446.0400550922921,107.4240040635457 +319,1,335,292+318+71+344+191,14.880233835668747,147.34633680729274,197.5740915564366 +332,1,61,321+303+114+317+392,13.446218465262644,459.7349138214048,376.3768817888573 +334,1,243,308+327,12.239890327337529,176.61763708586324,282.04323217850936 +339,1,383,25+168+395+338,19.813685284555536,414.79011832624775,74.82472002544792 +342,1,343,336+144+312+379+205,23.88049215478125,429.29861650843156,214.693427526191 +346,1,254,149+16+45,15.766703094290811,143.63820949507814,163.44622014346513 +347,1,55,24+43+42,19.909050276457837,334.1674144595939,487.8189148776749 +349,1,4,255+301+324,9.221542353906049,427.2448475601971,115.34515426673731 +353,1,243,352+333+155,14.959261839208695,202.29375558500894,254.81005718344895 +355,1,203,354+141+288+290+176,18.605480003973373,520.9106962185165,348.5986134499503 +360,1,361,350+359+370+385,31.572349166358542,247.77904211114958,372.41755765664755 +365,1,55,101+364+330,13.247948508566909,337.2284058346877,444.2261807505285 +380,1,343,269+209+81,8.394320997952171,441.65554212424945,195.5059566430979 +382,1,4,377+172+381+261,15.785841089949757,426.19847175765,135.54969764851512 +389,1,235,67+363+133,14.716511259160548,402.4129624769932,145.00909483985222 +396,1,126,237+356,9.231222983272458,429.59464071568493,334.3297357964017 +404,1,383,403+218,12.163913568468425,395.4421745565421,63.43762205162616 +406,1,126,83+405+86,8.547963053732833,419.43895097555634,359.3981214874522 +0,0,26,,0,406.5085495788186,123.1718039592256 +2,0,230,,0,407.6221082598363,349.66567012234617 +3,0,162,,0,216.2797832281072,205.45784119222185 +5,0,35,,0,332.1600722430182,454.0129529078691 +7,0,85,,0,241.15835809869333,130.1217251536769 +9,0,239,,0,166.30442566576198,145.72126856457703 +10,0,84,,0,378.4780049243903,123.33739859581408 +13,0,125,,0,364.91681852885984,344.3693127274331 +15,0,85,,0,230.4435713383629,124.94620201707671 +16,0,346,,0,151.82699384413047,153.47823704015133 +17,0,78,,0,430.9983785797966,166.97589869402674 +18,0,35,,0,317.177376007614,442.2922259538629 +20,0,239,,0,171.86629050950765,159.35166486522908 +21,0,228,,0,412.28962266621664,103.56083122063885 +22,0,108,,0,468.13841123901545,402.8087155206015 +23,0,202,,0,531.97022594828,297.3412104734574 +24,0,347,,0,334.1936766710167,507.7279478327628 +25,0,339,,0,418.6357374442614,74.59371097960926 +27,0,175,,0,464.6008418627993,177.44876298863065 +29,0,1,,0,167.5391730759437,190.5264261378084 +30,0,162,,0,207.21829890011244,247.404639114608 +31,0,11,,0,404.129084679752,310.12093717117966 +32,0,182,,0,277.62370916196267,457.4178017656107 +33,0,73,,0,467.18006893943175,164.96284672104554 +34,0,275,,0,506.6317761613888,424.8634906167098 +36,0,179,,0,164.07546939423858,83.89800169204636 +37,0,125,,0,373.3106331577088,344.5137402932216 +38,0,249,,0,460.6490718766141,313.43885272616257 +39,0,179,,0,133.77098400699185,111.61910694723552 +40,0,225,,0,491.82116415478816,180.9410456050965 +41,0,118,,0,107.3058037793134,144.36918777810268 +42,0,347,,0,342.6805983542533,474.70663409709255 +43,0,347,,0,325.62796835351173,481.0221627031693 +44,0,228,,0,420.7228870575181,100.6103731366529 +45,0,346,,0,128.14995931262638,166.39631043857514 +46,0,120,,0,493.5938673327805,113.48429282022236 +47,0,6,,0,325.0206956813249,473.1715230531072 +48,0,54,,0,345.1678898325412,438.248864419249 +49,0,84,,0,380.2940604006896,104.75390947894124 +50,0,132,,0,322.5203542610964,128.90700915209334 +52,0,200,,0,461.2131803683335,140.96351108873762 +53,0,184,,0,373.6393952807529,460.06071666384736 +56,0,275,,0,477.1936488729609,416.98776150789047 +57,0,14,,0,97.06042678339188,227.65626965277423 +58,0,234,,0,388.40546648841433,64.34475819409226 +59,0,26,,0,397.16889604867066,129.73399454395656 +63,0,92,,0,404.6243150249356,163.1409145908786 +64,0,232,,0,396.83441281262105,195.04341025450373 +65,0,51,,0,404.2045708011394,385.7384189482218 +67,0,389,,0,390.7788463225394,148.18618592506465 +68,0,60,,0,395.8653440354162,368.37572092149713 +69,0,150,,0,419.6692796263487,405.9001669109613 +70,0,302,,0,442.2540630489696,103.571746676162 +71,0,319,,0,148.70316087414636,212.39233664334847 +72,0,19,,0,384.4756054246833,183.65531564317504 +75,0,62,,0,155.33677681382727,274.38764335572625 +76,0,192,,0,424.142116670629,384.4083826821707 +77,0,120,,0,490.469590578802,115.81677281672184 +79,0,66,,0,453.3607254595828,62.13620659059721 +80,0,66,,0,460.1065025592437,94.2420690645576 +81,0,380,,0,442.63325508731145,187.1687687251893 +82,0,166,,0,408.7664725791605,454.0872325003794 +83,0,406,,0,427.9061805805475,360.5701689200855 +86,0,406,,0,414.04238417806994,360.5391356447236 +87,0,267,,0,474.13125062005423,469.4378525557747 +89,0,162,,0,238.9278343497511,193.2505205236532 +90,0,179,,0,140.83614613145858,117.62385173714657 +91,0,6,,0,311.03430815680406,490.41584389238346 +93,0,11,,0,404.8063303164831,319.36830610724286 +94,0,182,,0,298.9817295966399,452.8067839843614 +95,0,154,,0,380.18942352839576,300.98098278267315 +96,0,195,,0,293.42809122623567,389.7363519618398 +97,0,253,,0,386.32822689461887,433.5714629623211 +98,0,234,,0,378.9821855122209,82.43705667328128 +99,0,121,,0,281.21680595803787,313.7670944123464 +100,0,11,,0,439.9173468633878,303.83340493581784 +101,0,365,,0,327.3971174914086,440.6240342178062 +102,0,113,,0,185.1690617970002,164.09511816020006 +103,0,249,,0,480.4754237401369,371.839700606847 +105,0,239,,0,163.11330286001714,154.6423290539098 +106,0,136,,0,455.3929879588037,203.97055053675308 +109,0,228,,0,414.10867609705593,106.17193924768036 +110,0,242,,0,105.42386495809394,232.66115960633817 +111,0,19,,0,368.9369166964261,163.3330107035203 +112,0,184,,0,359.11223156350843,461.40979967174286 +114,0,332,,0,456.3023911656518,377.35469727090697 +115,0,296,,0,425.6889185012368,87.51925040255232 +116,0,200,,0,453.599859238842,154.89620748885596 +117,0,181,,0,103.30561197779924,192.9590402518056 +119,0,120,,0,484.4284950977011,128.40561593766392 +124,0,285,,0,345.7278217133453,398.14359409236096 +127,0,232,,0,426.2099512410161,186.9801528962987 +128,0,181,,0,108.23164811542654,187.4280717078823 +129,0,113,,0,193.43674692791603,159.2712672132954 +130,0,51,,0,399.8211975719054,399.0817183466514 +131,0,132,,0,354.51200766706336,106.49368457488696 +133,0,389,,0,403.3607805853801,151.9502416304573 +134,0,88,,0,487.5595954433536,373.5828448918823 +135,0,73,,0,468.3556233378248,162.92872473739845 +138,0,12,,0,495.0505654505161,138.00410737050814 +141,0,355,,0,519.5639853681644,353.8389670029957 +142,0,136,,0,460.5788782820063,198.1865043868739 +143,0,175,,0,464.9152306866656,186.01029520339088 +144,0,342,,0,428.2276276642137,202.15627332791985 +145,0,150,,0,437.1610131339835,404.3844914483944 +146,0,150,,0,427.1067809575741,450.901940387704 +147,0,154,,0,339.7406207712995,284.3915278437552 +148,0,60,,0,400.2564313144812,345.9600991416755 +149,0,346,,0,150.9376753284776,170.46411295166894 +151,0,207,,0,199.98205172881688,82.94124346006988 +152,0,14,,0,87.22301099503112,238.2193475913096 +153,0,279,,0,448.818587886061,159.22931962546028 +155,0,353,,0,210.0560306725783,267.59780914534224 +156,0,12,,0,492.0820724192613,126.4901549538582 +157,0,159,,0,352.1192266940969,344.97620860735344 +158,0,125,,0,363.026654951499,350.62161467079386 +160,0,166,,0,422.2351486792562,514.406973044994 +161,0,192,,0,427.5832206448943,369.858208621742 +163,0,294,,0,480.73679238621634,146.85117837258932 +164,0,62,,0,140.3007546443187,280.8932786950205 +165,0,121,,0,302.58866986821823,337.24067875579755 +167,0,282,,0,534.366158360098,383.947368678537 +168,0,339,,0,413.620702518353,85.61515674416358 +169,0,175,,0,472.1078595968147,192.516094673388 +170,0,238,,0,445.94861136632926,370.3754684664657 +171,0,107,,0,364.5841843260106,138.78900963943042 +172,0,382,,0,429.0078826432695,129.6096935050652 +173,0,19,,0,394.9999022602636,178.26268420308867 +174,0,253,,0,369.5740459841992,432.9628484618792 +176,0,355,,0,524.7502866367305,332.11996320524844 +177,0,54,,0,343.0643701533032,422.4889051798568 +178,0,266,,0,129.8090258337528,102.50637441976616 +180,0,14,,0,76.49329640309685,234.92519545726012 +183,0,35,,0,313.98658389638126,437.97426574465067 +185,0,195,,0,279.623998255432,393.68990521123374 +186,0,121,,0,328.1473887964802,316.73118559777515 +187,0,159,,0,329.273396719614,331.0912363603958 +188,0,26,,0,384.0436529033775,122.93871019523638 +189,0,84,,0,377.25449535457,94.87258960360764 +190,0,225,,0,482.7284698972424,172.31556887703692 +191,0,319,,0,150.5086992589832,187.60788906169935 +193,0,154,,0,387.397962965656,306.7163912819822 +194,0,195,,0,289.6550408813298,431.3150961294129 +197,0,107,,0,371.16144045307976,150.79752517634878 +198,0,78,,0,436.7820478349354,172.56692914135766 +199,0,294,,0,470.6403410283015,140.44489811201407 +201,0,225,,0,498.513477511849,164.91970386735252 +205,0,342,,0,439.7864842714855,222.41636258777385 +206,0,207,,0,202.39043438920805,92.6955836955823 +208,0,62,,0,145.92639031627195,252.48208933863884 +209,0,380,,0,445.4265210809703,198.95919268479972 +210,0,238,,0,438.3704039565786,371.4412178801774 +211,0,181,,0,120.56255353264818,190.55505098480535 +212,0,253,,0,372.736947215419,416.921692141038 +213,0,195,,0,273.46993404521106,431.6205843835691 +214,0,179,,0,157.65284382485245,125.7772212021002 +215,0,150,,0,406.94290901861547,417.84198298340186 +216,0,85,,0,236.85770963558028,144.86999922247986 +217,0,267,,0,473.85838838251817,440.25885108346114 +218,0,404,,0,396.811716789951,75.52419093498092 +219,0,162,,0,206.90760402938724,206.53330499815797 +220,0,285,,0,357.6214681895822,390.4090898523758 +221,0,266,,0,113.66253850250918,83.17134645798161 +222,0,279,,0,446.3127771825351,162.90197352483688 +223,0,60,,0,394.9064902844664,359.4600327342916 +224,0,108,,0,464.4136205355094,416.4834560234735 +227,0,296,,0,423.3158665170892,88.29232867011864 +229,0,230,,0,406.531996036688,358.575909195224 +231,0,92,,0,414.6501367838347,153.64142954144194 +233,0,279,,0,448.5563737845117,166.89497227132944 +236,0,113,,0,191.42099187542675,156.70882036311684 +237,0,396,,0,438.8218079721212,334.0561262412899 +240,0,175,,0,460.0981454746776,193.93022880945097 +241,0,125,,0,355.8295356490492,378.18488620549016 +244,0,118,,0,90.16137328884145,128.92334153131355 +245,0,118,,0,91.393246009725,125.50958272258542 +246,0,266,,0,119.88473518787433,81.8559902575523 +247,0,125,,0,362.4528167116694,355.7886420359198 +248,0,249,,0,471.0918349331844,311.2845483410806 +250,0,1,,0,153.84906611265404,177.28534541323765 +251,0,54,,0,317.90378565173285,399.7706536117664 +252,0,184,,0,368.6423331537606,461.1846354625873 +255,0,349,,0,429.1717781385488,113.30619390888006 +256,0,302,,0,442.112815345234,113.08051885831966 +257,0,265,,0,505.1009229529137,164.07807092385696 +258,0,300,,0,170.97917930680026,239.40222098876964 +259,0,202,,0,526.6009441939992,310.27926299763243 +260,0,282,,0,528.6628166467968,374.2975673782873 +261,0,382,,0,416.4462532905093,147.96287668880555 +262,0,120,,0,489.5218325178779,128.48521490833653 +263,0,232,,0,394.0683877971658,197.19073151575063 +264,0,265,,0,540.6258438304378,162.07404887469437 +269,0,380,,0,436.9068502044666,200.38990851930464 +270,0,108,,0,472.5884957428221,421.85577216096925 +271,0,225,,0,490.3662586803688,192.9438527061447 +272,0,284,,0,311.19238953275743,361.7605192668958 +273,0,88,,0,494.6918604629075,368.8895603801452 +274,0,294,,0,483.2458819600859,138.5624972803692 +276,0,54,,0,314.01080290551755,418.4318755756857 +277,0,302,,0,453.7532868826727,105.61974665615544 +278,0,200,,0,427.16543695896576,142.72192067742614 +280,0,136,,0,455.21724392043006,192.14291967627997 +281,0,88,,0,493.65806060787946,387.3168983105709 +283,0,195,,0,284.18256664898576,430.4410458552411 +286,0,84,,0,391.4443871428842,103.42371784799268 +287,0,300,,0,159.78139617884085,241.8427679152569 +288,0,355,,0,530.074293660455,359.5626972386202 +289,0,1,,0,164.38993425228472,187.6665338807828 +290,0,355,,0,509.0058362572503,362.8968019883944 +291,0,285,,0,353.7763792150048,396.13317646158623 +292,0,319,,0,138.56425306258438,194.02579207862368 +293,0,73,,0,460.0516220499396,158.91727457151114 +295,0,284,,0,301.93600699148226,358.2604998620988 +299,0,162,,0,229.93674981278636,205.4808890372896 +301,0,349,,0,433.8293830648768,113.83562360695235 +303,0,332,,0,461.0445996424092,371.5065506567357 +304,0,120,,0,503.3285785985965,118.47644247631352 +305,0,294,,0,474.9222277394821,147.81974362104393 +306,0,73,,0,468.6798525458752,151.07629926535714 +307,0,66,,0,475.7896465031374,87.84522669843089 +308,0,334,,0,185.90707242093293,274.07320668414553 +309,0,284,,0,326.76590574218386,373.5901768727766 +310,0,249,,0,490.4534228150477,318.1361837612505 +311,0,296,,0,431.8902801128049,95.54178833414169 +312,0,342,,0,439.5139499068096,213.47036576321565 +313,0,300,,0,170.1319034684904,227.6321103264839 +314,0,267,,0,485.0988432503182,451.2155620374061 +315,0,285,,0,376.5791427033611,398.0048948305208 +316,0,228,,0,411.52586396959913,97.7363893603878 +317,0,332,,0,460.55986035595504,376.433530142359 +318,0,319,,0,156.78663242377888,192.7762092817314 +320,0,267,,0,458.1434553144955,491.6548397397195 +321,0,332,,0,465.82700381005776,367.65024772770414 +322,0,285,,0,351.2092302820438,387.14132621312217 +323,0,12,,0,451.2100296705124,115.49030483275406 +324,0,349,,0,418.7333814771656,118.89364528437952 +325,0,207,,0,189.94500946740263,80.50807303331821 +326,0,296,,0,431.3163928845697,89.39032575345709 +327,0,334,,0,167.3282017507935,290.0132576728731 +328,0,136,,0,458.8240903972122,218.1333260385274 +329,0,78,,0,439.0456212424375,174.40489243744383 +330,0,365,,0,350.47271097474766,444.53685910932 +331,0,238,,0,442.4688627902266,377.25565473274287 +333,0,353,,0,192.207717942689,255.9978341673592 +336,0,342,,0,413.7178254348391,232.79085427906472 +337,0,284,,0,325.4245805156956,378.90780265396006 +338,0,339,,0,403.9527899348756,82.3201576792233 +341,0,232,,0,406.7710869442661,195.43467719908224 +344,0,319,,0,142.16893841697106,201.06823071678 +345,0,85,,0,227.35907098387088,168.0048725637591 +348,0,66,,0,457.8052337141568,80.22135732814034 +350,0,360,,0,235.78305434630025,373.5686806447727 +351,0,11,,0,403.2741418491526,308.81087256400826 +352,0,353,,0,204.6175181397595,240.83452823764537 +354,0,355,,0,521.1590791699823,334.5746378144926 +356,0,396,,0,420.3674734592487,334.60334535151344 +357,0,175,,0,465.6945600932888,172.184827676236 +358,0,242,,0,122.27358975895432,237.69775689823877 +359,0,360,,0,247.81963416489975,403.989880728725 +362,0,225,,0,509.5073948118843,189.412966382646 +363,0,389,,0,413.09926052306,134.89085696403475 +364,0,365,,0,333.8153890379068,447.5176489244592 +367,0,88,,0,483.6491672009646,359.1370307228643 +368,0,92,,0,407.18399443246415,163.6115582974022 +369,0,85,,0,222.2349229733876,158.6039900764171 +370,0,360,,0,267.92541379507503,362.4562154827748 +371,0,284,,0,327.65296341350216,354.62900433680574 +372,0,159,,0,358.259690476968,325.7833660686555 +373,0,107,,0,371.8644735290759,136.06684447507808 +374,0,182,,0,293.3541789873959,443.107404132948 +375,0,179,,0,134.5912904178421,124.79908037434816 +376,0,118,,0,96.83866404377515,150.37529130719793 +377,0,382,,0,427.8943399100643,134.56942896620865 +378,0,249,,0,497.3213130779319,307.4011671409872 +379,0,342,,0,425.24719526481016,202.633281672981 +381,0,382,,0,431.4454111867569,130.05679143398106 +384,0,107,,0,369.1098862443752,156.48588742474556 +385,0,360,,0,239.5880661383232,349.65545377031765 +386,0,230,,0,409.6393589482627,351.33243279009827 +387,0,113,,0,174.29291155641846,146.5775131487732 +388,0,92,,0,412.8826289439411,175.3214846661254 +390,0,92,,0,419.9380728320948,168.98031298469283 +391,0,267,,0,487.0138145212319,449.13406865462673 +392,0,332,,0,454.9407141329503,388.9393831465803 +393,0,108,,0,442.9442793890141,434.4985714469241 +394,0,136,,0,466.0268654513272,208.4885368739728 +395,0,339,,0,422.9512434075008,56.76985469879551 +397,0,1,,0,189.298373880358,182.14804777651307 +398,0,282,,0,513.6246625754995,394.30197532407806 +399,0,282,,0,519.6678916059598,390.8431058719641 +400,0,118,,0,115.12996533600892,149.12435045236316 +401,0,14,,0,89.21561076775261,215.9288757903417 +402,0,19,,0,363.8039241536012,186.69315485473777 +403,0,404,,0,394.0726323231333,51.35105316827139 +405,0,406,,0,416.36828816805166,357.08505989754764 diff --git a/demo/data/tree.png b/demo/data/tree.png new file mode 100644 index 0000000..b215b1c Binary files /dev/null and b/demo/data/tree.png differ diff --git a/demo/src/main.rs b/demo/src/main.rs new file mode 100644 index 0000000..5ea8991 --- /dev/null +++ b/demo/src/main.rs @@ -0,0 +1,56 @@ +use rand::{rngs::StdRng, Rng, SeedableRng}; +use rindex::Rindex; +use std::io::{BufRead, Write}; + +fn main() { + // Read the sparse dataset (https://www.kaggle.com/datasets/joonasyoon/clustering-exercises) + let mut data = Vec::new(); + let file = std::fs::File::open("demo/data/sparse.csv").unwrap(); + let mut skip_header = true; + for line in std::io::BufReader::new(file).lines() { + if skip_header { + skip_header = false; + continue; + } + let line = line.unwrap(); + let mut iter = line.split(','); + let x = iter.next().unwrap().parse::().unwrap(); + let y = iter.next().unwrap().parse::().unwrap(); + data.push([x, y]); + } + + // Configure the tree + let mut tree = Rindex::new(5).expect("Invalid fanout"); + let mut point_ids = Vec::new(); + let deletion_probability = 0.25; // 25% + + // Perform random insertions and deletions + let mut rng = StdRng::seed_from_u64(0); + let mut num_insertions = 0; + let mut num_deletions = 0; + while !data.is_empty() { + let random = rng.gen::(); + if random <= deletion_probability && !point_ids.is_empty() { + let random_id = rng.gen_range(0..point_ids.len()); + let point_id = point_ids.swap_remove(random_id); + tree.delete(point_id); + num_deletions += 1; + } else { + let point = data.pop().unwrap(); + let point_id = tree.insert(point); + point_ids.push(point_id); + num_insertions += 1; + } + } + + println!("Insertions: {num_insertions}"); + println!("Deletions: {num_deletions}"); + println!("Tree height: {}", tree.height()); + + let csv_rows = tree.nodes_to_string_rows(); + let filename = "demo/data/tree.csv"; + let mut file = std::fs::File::create(filename).unwrap(); + for row in csv_rows { + file.write_all(row.as_bytes()).unwrap(); + } +} diff --git a/lib/Cargo.toml b/lib/Cargo.toml new file mode 100644 index 0000000..62cd663 --- /dev/null +++ b/lib/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "rindex" +version = "0.1.0" +edition = "2021" +rust-version = "1.63" +description = "Rindex: reverse nearest neighbor search index for high-dimensional clustered datasets." +readme = "README.md" +documentation = "https://docs.rs/rindex" +homepage = "https://github.com/azizkayumov/rindex" +repository = "https://github.com/azizkayumov/rindex" +license = "Apache-2.0" +keywords = ["tree", "dynamic-connectivity"] +categories = ["algorithms", "data-structures"] +authors = ["Kayumov A.I. "] +exclude = ["./github"] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +conv = "0.3.3" +ordered-float = "4.1.1" + +[dev-dependencies] +rand = "0.8" diff --git a/lib/LICENSE b/lib/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/lib/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/lib/README.md b/lib/README.md new file mode 100644 index 0000000..9b1e217 --- /dev/null +++ b/lib/README.md @@ -0,0 +1,38 @@ +# rindex +Rindex: fully dynamic nearest neighbor search index for high-dimensional clustered datasets. + +## Usage + +The following example shows how to update and query Rindex: +``` +let mut rindex = Rindex::default(); + +// Insert some points +let a = rindex.insert([1.0, 1.0]); +let b = rindex.insert([2.0, 2.0]); +let c = rindex.insert([3.0, 3.0]); +let d = rindex.insert([20.0, 20.0]); + +// Query the tree for 3 nearest neighbors of the query point +let query_point = [0.0, 0.0]; +let result = rindex.query_neighbors(query_point, 3); + +// The result should contain the points a, b, and c +assert_eq!(result.len(), 3); +assert!(result.contains(&a)); +assert!(result.contains(&b)); +assert!(result.contains(&c)); + +// Delete the point c +rindex.delete(c); + +// Query the tree again (c should not be in the result) +let result = rindex.query_neighbors(query_point, 3); +assert_eq!(result.len(), 3); +assert!(result.contains(&a)); +assert!(result.contains(&b)); +assert!(result.contains(&d)); +``` + +## License +This project is licensed under the [Apache License, Version 2.0](LICENSE.md) - See the [LICENSE.md](https://github.com/azizkayumov/rindex/blob/main/LICENSE) file for details. \ No newline at end of file diff --git a/src/distance.rs b/lib/src/distance.rs similarity index 100% rename from src/distance.rs rename to lib/src/distance.rs diff --git a/lib/src/index.rs b/lib/src/index.rs new file mode 100644 index 0000000..2227163 --- /dev/null +++ b/lib/src/index.rs @@ -0,0 +1,36 @@ +pub struct Index { + num_slots: usize, + deleted_slots: Vec, +} + +impl Index { + #[must_use] + pub fn new() -> Index { + Index { + num_slots: 0, + deleted_slots: Vec::new(), + } + } + + // Allocate or reuse a slot of the index. + pub fn insert(&mut self) -> usize { + if self.deleted_slots.is_empty() { + let slot_id = self.num_slots; + self.num_slots += 1; + slot_id + } else { + self.deleted_slots.pop().unwrap() + } + } + + // Delete a slot from the index. + pub fn delete(&mut self, slot_id: usize) { + self.deleted_slots.push(slot_id); + } +} + +impl Default for Index { + fn default() -> Self { + Self::new() + } +} diff --git a/lib/src/lib.rs b/lib/src/lib.rs new file mode 100644 index 0000000..24b4107 --- /dev/null +++ b/lib/src/lib.rs @@ -0,0 +1,9 @@ +mod distance; +mod index; +#[allow(dead_code)] +mod node; +#[allow(dead_code)] +mod rindex; +#[allow(dead_code)] +mod sphere; +pub use rindex::Rindex; diff --git a/lib/src/node.rs b/lib/src/node.rs new file mode 100644 index 0000000..42cfda3 --- /dev/null +++ b/lib/src/node.rs @@ -0,0 +1,90 @@ +use crate::sphere::Sphere; + +pub struct Node { + pub slot_id: usize, + pub height: usize, + pub parent: usize, + pub sphere: Sphere, + pub children: Vec, +} + +impl Node { + #[must_use] + pub fn new( + slot_id: usize, + height: usize, + parent: usize, + sphere: Sphere, + children: Vec, + ) -> Node { + Node { + slot_id, + height, + parent, + sphere, + children, + } + } + + #[must_use] + pub fn point(point: [f64; D]) -> Node { + Self::new(usize::MAX, 0, usize::MAX, Sphere::point(point), Vec::new()) + } + + #[must_use] + pub fn leaf() -> Node { + Self::new(usize::MAX, 1, usize::MAX, Sphere::default(), Vec::new()) + } + + #[must_use] + pub fn internal(children: Vec) -> Node { + Self::new(usize::MAX, 0, usize::MAX, Sphere::default(), children) + } + + #[must_use] + pub fn is_point(&self) -> bool { + self.height == 0 + } + + #[must_use] + pub fn is_leaf(&self) -> bool { + self.height == 1 + } + + #[must_use] + pub fn is_deleted(&self) -> bool { + self.slot_id == usize::MAX + } + + #[allow(clippy::inherent_to_string)] + pub fn to_string(&self) -> String { + let children = self + .children + .iter() + .map(std::string::ToString::to_string) + .collect::>() + .join("+"); + format!( + "{},{},{},{},{},{},{}\n", + self.slot_id, + self.height, + self.parent, + children, + self.sphere.radius, + self.sphere.center[0], + self.sphere.center[1], + ) + } +} + +impl Default for Node { + fn default() -> Self { + Node { + slot_id: usize::MAX, + height: 0, + parent: usize::MAX, + sphere: Sphere::default(), + children: Vec::new(), + } + } +} diff --git a/lib/src/rindex.rs b/lib/src/rindex.rs new file mode 100644 index 0000000..e4c4071 --- /dev/null +++ b/lib/src/rindex.rs @@ -0,0 +1,533 @@ +use std::{collections::BinaryHeap, vec}; + +use crate::{distance::euclidean, index::Index, node::Node}; +use ordered_float::OrderedFloat; + +pub struct Rindex { + min_fanout: usize, + max_fanout: usize, + reinsert_fanout: usize, + reinsert_height: usize, + root: usize, + nodes: Vec>, + index: Index, +} + +impl Rindex { + #[must_use] + pub fn new(fanout: usize) -> Option { + if fanout < 4 { + return None; + } + Some(Rindex { + min_fanout: fanout / 2, + max_fanout: fanout, + reinsert_fanout: fanout / 3, + reinsert_height: 1, + root: usize::MAX, + nodes: Vec::new(), + index: Index::new(), + }) + } + + #[must_use] + pub fn insert(&mut self, point: [f64; D]) -> usize { + // Create the root node if it doesn't exist + if self.root == usize::MAX { + let node = Node::leaf(); + self.root = self.add_slot(node); + } + + // Create a point node (reuse a slot in the node vector if possible) + let point = Node::point(point); + let slot_id = self.add_slot(point); + + // Insert the point node into the tree + let mut reinsert_list = vec![slot_id]; + self.reinsert_height = 1; + self.reinsert_nodes(&mut reinsert_list); + + slot_id + } + + pub fn delete(&mut self, point_id: usize) { + let mut reinsert_list = self.delete_entry(point_id); + self.reinsert_height = 1; + self.reinsert_nodes(&mut reinsert_list); + } + + #[must_use] + pub fn query(&self, point: [f64; D], radius: f64) -> Vec { + let mut result = Vec::new(); + let mut queue = vec![self.root]; + while let Some(node_id) = queue.pop() { + let node = &self.nodes[node_id]; + if node.is_point() { + result.push(node_id); + continue; + } + for &child_id in &node.children { + let child = &self.nodes[child_id]; + let distance = + (euclidean(&child.sphere.center, &point) - child.sphere.radius).max(0.0); + if distance <= radius { + queue.push(child_id); + } + } + } + result + } + + #[must_use] + pub fn query_neighbors(&self, point: [f64; D], k: usize) -> Vec { + let mut result = BinaryHeap::from(vec![(OrderedFloat(f64::INFINITY), usize::MAX); k]); + let mut queue = BinaryHeap::from(vec![(OrderedFloat(0.0), self.root)]); + + while let Some((distance, node_id)) = queue.pop() { + let node = &self.nodes[node_id]; + let current_kth = result.peek().unwrap_or(&(OrderedFloat(f64::INFINITY), 0)).0; + if distance >= current_kth { + continue; + } + if node.is_point() { + result.push((distance, node_id)); + result.pop(); + continue; + } + for &child_id in &node.children { + let child = &self.nodes[child_id]; + let distance = + (euclidean(&child.sphere.center, &point) - child.sphere.radius).max(0.0); + queue.push((OrderedFloat(distance), child_id)); + } + } + result + .into_sorted_vec() + .into_iter() + .map(|(_, id)| id) + .collect() + } + + #[must_use] + pub fn height(&self) -> usize { + self.nodes.get(self.root).map_or(0, |node| node.height) + } + + fn reinsert_nodes(&mut self, reinsert_list: &mut Vec) { + while let Some(entry_id) = reinsert_list.pop() { + let res = self.insert_entry(self.root, entry_id); + reinsert_list.extend(res); + self.reinsert_height += 1; + } + self.adjust_tree(); + } + + // Insert a node into the tree + // Split the node if it has too many children + fn insert_entry(&mut self, node: usize, entry: usize) -> Vec { + if self.nodes[node].height == self.nodes[entry].height + 1 { + self.nodes[node].children.push(entry); + self.reshape(node); + + let mut to_be_reinserted = Vec::new(); + if self.nodes[node].children.len() > self.max_fanout && node != self.root { + if self.reinsert_height == self.nodes[node].height && self.reinsert_fanout > 0 { + to_be_reinserted.extend(self.pop_farthest_children(node)); + } else { + to_be_reinserted.push(self.split(node)); + } + } + to_be_reinserted + } else { + let best_child = self.choose_subtree(node, entry); + let result = self.insert_entry(best_child, entry); + self.reshape(node); + result + } + } + + fn delete_entry(&mut self, entry: usize) -> Vec { + let mut current = self.nodes[entry].parent; + self.nodes[current].children.retain(|&x| x != entry); + self.delete_slot(entry); + + let mut reinsert_list = Vec::new(); + while current != usize::MAX { + self.reshape(current); + let parent = self.nodes[current].parent; + if current != self.root && self.nodes[current].children.len() < self.min_fanout { + self.nodes[parent].children.retain(|&x| x != current); + reinsert_list.extend(self.nodes[current].children.clone()); + self.delete_slot(current); + } + current = parent; + } + reinsert_list + } + + fn adjust_tree(&mut self) { + self.reshape(self.root); + if self.nodes[self.root].children.len() > self.max_fanout { + let sibling = self.split(self.root); + let new_root = Node::internal(vec![self.root, sibling]); + self.root = self.add_slot(new_root); + self.reshape(self.root); + } else if self.nodes[self.root].height > 1 && self.nodes[self.root].children.len() == 1 { + let new_root = self.nodes[self.root].children[0]; + self.delete_slot(self.root); + self.root = new_root; + self.nodes[self.root].parent = usize::MAX; + } else if self.nodes[self.root].children.is_empty() { + self.delete_slot(self.root); + self.root = usize::MAX; + } + } + + fn choose_subtree(&self, node: usize, entry: usize) -> usize { + let mut best_distance = f64::INFINITY; + let mut best_child = usize::MAX; + for &child_id in &self.nodes[node].children { + let child = &self.nodes[child_id]; + let distance = euclidean(&child.sphere.center, &self.nodes[entry].sphere.center); + if distance < best_distance { + best_distance = distance; + best_child = child_id; + } + } + best_child + } + + fn pop_farthest_children(&mut self, node: usize) -> Vec { + let mut children = self.nodes[node].children.clone(); + children.sort_by_key(|child| { + let child_sphere = self.nodes[*child].sphere; + let dist = euclidean(&self.nodes[node].sphere.center, &child_sphere.center); + OrderedFloat(dist + child_sphere.radius) + }); + let to_be_reinserted = children.split_off(children.len() - self.reinsert_fanout); + self.nodes[node].children = children; + self.reshape(node); + to_be_reinserted + } + + fn split(&mut self, slot_id: usize) -> usize { + // Find the farthest child from the centroid as the sibling seed + let mut sibling_seed: usize = self.nodes[slot_id].children[0]; + let mut max_dist = 0.0; + for child in &self.nodes[slot_id].children { + let child_sphere = &self.nodes[*child].sphere; + let distance = euclidean(&self.nodes[slot_id].sphere.center, &child_sphere.center) + + child_sphere.radius; + if distance > max_dist { + sibling_seed = *child; + max_dist = distance; + } + } + + // Give minimum fanout children to the sibling + let mut children = self.nodes[slot_id].children.clone(); + children.sort_by_key(|child| { + let child_sphere = self.nodes[*child].sphere; + let distance = euclidean( + &self.nodes[sibling_seed].sphere.center, + &child_sphere.center, + ) + child_sphere.radius; + OrderedFloat(distance) + }); + let mut remaining = children.split_off(self.min_fanout); + + // Both nodes should have at least min_fanout children + let sibling = self.add_slot(Node::internal(children)); + self.reshape(sibling); + self.nodes[slot_id].children = remaining.split_off(remaining.len() - self.min_fanout); + self.reshape(slot_id); + + // Distribute the remaining children to whichever node is closer + for r in remaining { + let dist_sibling = euclidean( + &self.nodes[sibling].sphere.center, + &self.nodes[r].sphere.center, + ); + let dist_node = euclidean( + &self.nodes[slot_id].sphere.center, + &self.nodes[r].sphere.center, + ); + if dist_sibling < dist_node { + self.nodes[sibling].children.push(r); + } else { + self.nodes[slot_id].children.push(r); + } + } + + // Finally, reshape both nodes + self.reshape(sibling); + self.reshape(slot_id); + + sibling + } + + #[allow(clippy::similar_names)] + fn reshape(&mut self, slot_id: usize) { + // Calculate the centroid, weight and height of the parent + let mut centroid = [0.0; D]; + let mut weight = 0.0; + let mut height = 0; + for child_id in &self.nodes[slot_id].children { + let child = &self.nodes[*child_id].sphere; + for (i, x) in child.center.iter().enumerate() { + centroid[i] += x * child.weight; + } + weight += child.weight; + height = height.max(self.nodes[*child_id].height); + } + for x in &mut centroid { + *x /= weight; + } + + // Calculate the radius of the new sphere + let mut radius: f64 = 0.0; + for child_id in &self.nodes[slot_id].children { + let child = &self.nodes[*child_id].sphere; + let distance = euclidean(¢roid, &child.center); + radius = radius.max(distance + child.radius); + } + + // Update the sphere & height of the parent + self.nodes[slot_id].sphere.center = centroid; + self.nodes[slot_id].sphere.radius = radius; + self.nodes[slot_id].sphere.weight = weight; + self.nodes[slot_id].height = height + 1; + + // Update parent of the children + for child_id in self.nodes[slot_id].children.clone() { + self.nodes[child_id].parent = slot_id; + } + } + + fn add_slot(&mut self, mut node: Node) -> usize { + let slot_id = self.index.insert(); + node.slot_id = slot_id; + if slot_id == self.nodes.len() { + self.nodes.push(node); + } else { + self.nodes[slot_id] = node; + } + slot_id + } + + fn delete_slot(&mut self, slot_id: usize) { + self.index.delete(slot_id); + self.nodes[slot_id] = Node::default(); + } + + #[must_use] + pub fn nodes_to_string_rows(&self) -> Vec { + let mut rows = Vec::new(); + let height = self.height(); + for h in (0..=height).rev() { + for node in &self.nodes { + if node.height == h { + rows.push(node.to_string()); + } + } + } + rows + } +} + +impl Default for Rindex { + fn default() -> Self { + Rindex::new(10).expect("Invalid fanout") + } +} + +#[cfg(test)] +mod tests { + use rand::{rngs::StdRng, Rng, SeedableRng}; + + use super::Rindex; + use crate::{distance::euclidean, node::Node}; + + #[test] + fn reshape() { + let mut rindex = Rindex::default(); + + // Create some point nodes + let node_a = rindex.add_slot(Node::point([0.0, 0.0])); + let node_b = rindex.add_slot(Node::point([0.0, 2.0])); + let node_c = rindex.add_slot(Node::point([2.0, 0.0])); + let node_d = rindex.add_slot(Node::point([2.0, 2.0])); + + // Create a parent node + let mut parent = Node::leaf(); + parent.children = vec![node_a, node_b, node_c, node_d]; + let node_parent = rindex.add_slot(parent); + + // Reshape the parent node + rindex.reshape(node_parent); + + // Check the parent node's sphere + let parent = &rindex.nodes[node_parent]; + assert_eq!(parent.sphere.center, [1., 1.]); + assert_eq!(parent.sphere.radius, 2.0_f64.sqrt()); + assert_eq!(parent.sphere.weight, 4.0); + assert_eq!(parent.height, 1); + + // Check the parent-child relationship + assert_eq!(rindex.nodes[node_a].parent, node_parent); + assert_eq!(rindex.nodes[node_b].parent, node_parent); + assert_eq!(rindex.nodes[node_c].parent, node_parent); + assert_eq!(rindex.nodes[node_d].parent, node_parent); + } + + #[test] + fn split() { + let fanout = 8; + let mut rindex = Rindex::new(fanout).expect("Invalid fanout"); + + // Create 9 point nodes, as the fanout of 8 will trigger a split + let node_a = rindex.add_slot(Node::point([0.0, 0.0])); + let node_b = rindex.add_slot(Node::point([0.0, 2.0])); + let node_c = rindex.add_slot(Node::point([1.0, 1.0])); + let node_d = rindex.add_slot(Node::point([2.0, 0.0])); + let node_e = rindex.add_slot(Node::point([2.0, 2.0])); + let node_w = rindex.add_slot(Node::point([10.0, 10.0])); + let node_x = rindex.add_slot(Node::point([10.0, 12.0])); + let node_y = rindex.add_slot(Node::point([12.0, 10.0])); + let node_z = rindex.add_slot(Node::point([12.0, 12.0])); + let point_nodes = vec![ + node_a, node_b, node_c, node_d, node_e, node_w, node_x, node_y, node_z, + ]; + + // Create a parent node + let mut node = Node::default(); + node.children = point_nodes.clone(); + let node = rindex.add_slot(node); + rindex.reshape(node); + + // Split the parent node + let sibling = rindex.split(node); + + // Check the node's sphere + let node = &rindex.nodes[node]; + assert_eq!(node.sphere.center, [1., 1.]); + assert_eq!(node.sphere.radius, 2.0_f64.sqrt()); + assert_eq!(node.sphere.weight, 5.0); + assert_eq!(node.height, 1); + + // Check the sibling's sphere + let sibling = &rindex.nodes[sibling]; + assert_eq!(sibling.sphere.center, [11., 11.]); + assert_eq!(sibling.sphere.radius, 2.0_f64.sqrt()); + assert_eq!(sibling.sphere.weight, 4.0); + assert_eq!(sibling.height, 1); + } + + #[test] + fn update() { + let fanout = 8; + let mut rindex = Rindex::new(fanout).expect("Invalid fanout"); + // The tree should be empty + assert_eq!(rindex.height(), 0); + + // Insert 8 points to fill the root node + let mut point_ids = Vec::new(); + for i in 0..fanout { + let point_id = rindex.insert([i as f64, i as f64]); + point_ids.push(point_id); + } + + // The tree should be of height 1 since + // the number of points is equal to the fanout + assert_eq!(rindex.height(), 1); + + // Insert one more point to trigger a split (so the tree grows in height) + let last_inserted = rindex.insert([fanout as f64, fanout as f64]); + point_ids.push(last_inserted); + assert_eq!(rindex.height(), 2); + + // Delete two points to trigger a merge (so the tree shrinks in height) + rindex.delete(point_ids.pop().unwrap()); + rindex.delete(point_ids.pop().unwrap()); + + // The tree should be of height 1 again + assert_eq!(rindex.height(), 1); + + // Delete all remaining points + for point_id in point_ids { + rindex.delete(point_id); + } + + // The tree should be empty again + assert_eq!(rindex.height(), 0); + } + + #[test] + fn query() { + let mut rindex = Rindex::default(); + + // Insert some points + let mut point_ids = Vec::new(); + for i in 0..100 { + let point_id = rindex.insert([i as f64, i as f64]); + point_ids.push(point_id); + } + + // Set the query point to the center of the data layout + let query_point = [50.0, 50.0]; + let query_radius = 5.0; + + // Find the expected points within the radius + let mut expected = Vec::new(); + for p in point_ids { + let point = rindex.nodes[p].sphere.center; + let distance = euclidean(&point, &query_point); + if distance <= query_radius { + expected.push(p); + } + } + + // Query the tree for the points within the radius + let mut range_query_result = rindex.query(query_point, query_radius); + range_query_result.sort(); + assert_eq!(expected, range_query_result); + + // Query the tree for k nearest neighbors of the query point + let mut knn_query_result = rindex.query_neighbors(query_point, range_query_result.len()); + knn_query_result.sort(); + + // The results of the range query and the kNN query should be the same + assert_eq!(expected, knn_query_result); + } + + #[test] + fn verify_fanout_params() { + let mut rindex = Rindex::default(); + let n = 1000; + let mut rng = StdRng::seed_from_u64(0); + let deletion_probability = 0.2; + + // Perform a random sequence of insertions and deletions + let mut point_ids = Vec::new(); + for _ in 0..n { + let should_delete = rng.gen_bool(deletion_probability); + if should_delete && !point_ids.is_empty() { + let random_index = rng.gen_range(0..point_ids.len()); + let point_id = point_ids.swap_remove(random_index); + rindex.delete(point_id); + } else { + let point = [rng.gen_range(0.0..100.0), rng.gen_range(0.0..100.0)]; + let point_id = rindex.insert(point); + point_ids.push(point_id); + } + + // Check the fanout constraints after each operation + for node in &rindex.nodes { + if !node.is_point() && node.slot_id != rindex.root && !node.is_deleted() { + assert!(node.children.len() >= rindex.min_fanout); + assert!(node.children.len() <= rindex.max_fanout); + } + } + } + } +} diff --git a/lib/src/sphere.rs b/lib/src/sphere.rs new file mode 100644 index 0000000..5947cc6 --- /dev/null +++ b/lib/src/sphere.rs @@ -0,0 +1,30 @@ +#[derive(Clone, Copy)] +pub struct Sphere { + pub center: [f64; D], + pub radius: f64, + pub weight: f64, +} + +impl Sphere { + pub fn new(center: [f64; D], radius: f64, weight: f64) -> Sphere { + Sphere { + center, + radius, + weight, + } + } + + pub fn point(point: [f64; D]) -> Sphere { + Self::new(point, 0.0, 1.0) + } +} + +impl Default for Sphere { + fn default() -> Self { + Sphere { + center: [0.0; D], + radius: 0.0, + weight: 0.0, + } + } +} diff --git a/lib/tests/random.rs b/lib/tests/random.rs new file mode 100644 index 0000000..a21b4d0 --- /dev/null +++ b/lib/tests/random.rs @@ -0,0 +1,56 @@ +use rand::{rngs::StdRng, Rng, SeedableRng}; +use rindex::Rindex; + +#[test] +fn test_random() { + let mut rindex = Rindex::default(); + + let mut rng = StdRng::seed_from_u64(0); + let deletion_probability = 0.2; + + let num_ops = 1000; + let mut points = Vec::new(); + + for _ in 0..num_ops { + // Randomly insert or delete a point + let should_delete = rng.gen_bool(deletion_probability); + if should_delete && !points.is_empty() { + let idx = rng.gen_range(0..points.len()); + let (point_id, _) = points.swap_remove(idx); + rindex.delete(point_id); + } else { + let x = rng.gen_range(-100.0..100.0); + let y = rng.gen_range(-100.0..100.0); + let point_id = rindex.insert([x, y]); + points.push((point_id, [x, y])); + } + + // Creata a random query point and radius + let x = rng.gen_range(-100.0..100.0); + let y = rng.gen_range(-100.0..100.0); + let query_point = [x, y]; + let query_radius = rng.gen_range(5.0..10.0); + + // Compute the expected results + let mut expected = Vec::new(); + for (id, point) in &points { + let dx = point[0] - query_point[0]; + let dy = point[1] - query_point[1]; + let distance = (dx * dx + dy * dy).sqrt(); + if distance <= query_radius { + expected.push(*id); + } + } + expected.sort(); + + // Compute the actual results using the range query + let mut actual = rindex.query(query_point, query_radius); + actual.sort(); + assert_eq!(expected, actual); + + // Compute the actual results using the k nearest neighbors query + let mut actual = rindex.query_neighbors(query_point, expected.len()); + actual.sort(); + assert_eq!(expected, actual); + } +} diff --git a/lib/tests/usage.rs b/lib/tests/usage.rs new file mode 100644 index 0000000..4bbc290 --- /dev/null +++ b/lib/tests/usage.rs @@ -0,0 +1,32 @@ +use rindex::Rindex; + +#[test] +fn basic_usage() { + let mut rindex = Rindex::default(); + + // Insert some points + let a = rindex.insert([1.0, 1.0]); + let b = rindex.insert([2.0, 2.0]); + let c = rindex.insert([3.0, 3.0]); + let d = rindex.insert([20.0, 20.0]); + + // Query the tree for nearest neighbors of the query point + let query_point = [0.0, 0.0]; + let result = rindex.query_neighbors(query_point, 3); + + // The result should contain the points a, b, and c + assert_eq!(result.len(), 3); + assert!(result.contains(&a)); + assert!(result.contains(&b)); + assert!(result.contains(&c)); + + // Delete the point c + rindex.delete(c); + + // Query the tree again (c should not be in the result) + let result = rindex.query_neighbors(query_point, 3); + assert_eq!(result.len(), 3); + assert!(result.contains(&a)); + assert!(result.contains(&b)); + assert!(result.contains(&d)); +} diff --git a/src/index.rs b/src/index.rs deleted file mode 100644 index 07016ad..0000000 --- a/src/index.rs +++ /dev/null @@ -1,9 +0,0 @@ -pub trait Index { - fn insert(&mut self, point: [f64; D]); - fn rknn(&self, point: [f64; D]) -> Vec; - fn query_range(&self, point_index: usize, range: f64) -> Vec; - - fn core_distance_of(&self, point_index: usize) -> f64; - fn neighbors_of(&self, point_index: usize) -> Vec; - fn num_points(&self) -> usize; -} diff --git a/src/lib.rs b/src/lib.rs deleted file mode 100644 index 2a47e49..0000000 --- a/src/lib.rs +++ /dev/null @@ -1,9 +0,0 @@ -mod distance; -mod index; -#[allow(clippy::module_name_repetitions)] -mod linear; -mod tree; - -pub use index::Index; -pub use linear::LinearIndex; -pub use tree::sstree::SSTree; diff --git a/src/linear.rs b/src/linear.rs deleted file mode 100644 index 9c4633d..0000000 --- a/src/linear.rs +++ /dev/null @@ -1,113 +0,0 @@ -use crate::{distance::euclidean, index::Index}; -use ordered_float::OrderedFloat; -use std::collections::BinaryHeap; - -pub struct LinearIndex { - k: usize, - data: Vec<[f64; D]>, - neighbors: Vec, usize)>>, -} - -impl LinearIndex { - #[must_use] - pub fn new(k: usize) -> Self { - Self { - k, - data: Vec::new(), - neighbors: Vec::new(), - } - } - - fn add_neighbor(&mut self, a: usize, b: usize) { - let distance = euclidean(&self.data[a], &self.data[b]); - let core_a = self.core_distance_of(a); - let core_b = self.core_distance_of(b); - if a != b && distance < core_b { - self.neighbors[b].push((OrderedFloat(distance), a)); - if self.neighbors[b].len() > self.k { - self.neighbors[b].pop(); - } - } - - if distance < core_a { - self.neighbors[a].push((OrderedFloat(distance), b)); - if self.neighbors[a].len() > self.k { - self.neighbors[a].pop(); - } - } - } -} - -impl Index for LinearIndex { - fn insert(&mut self, point: [f64; D]) { - // Append the point to the data. - let new_point_index = self.data.len(); - self.data.push(point); - self.neighbors.push(BinaryHeap::new()); - - // Find the reverse k-nearest neighbors of the point and update its core distance. - for neighbor_idx in 0..self.num_points() { - self.add_neighbor(neighbor_idx, new_point_index); - } - } - - fn rknn(&self, point: [f64; D]) -> Vec { - // Find the reverse k-nearest neighbors of the point and update its core distance. - let mut rknns = Vec::new(); - for neighbor_idx in 0..self.num_points() { - let dist = euclidean(&self.data[neighbor_idx], &point); - if dist < self.core_distance_of(neighbor_idx) { - rknns.push(neighbor_idx); - } - } - rknns - } - - fn query_range(&self, point_index: usize, range: f64) -> Vec { - let mut result = Vec::new(); - for i in 0..self.num_points() { - if euclidean(&self.data[point_index], &self.data[i]) <= range { - result.push(i); - } - } - result - } - - fn core_distance_of(&self, point_index: usize) -> f64 { - if self.neighbors[point_index].len() != self.k { - return f64::INFINITY; - } - self.neighbors[point_index].peek().unwrap().0.into_inner() - } - - fn neighbors_of(&self, point_index: usize) -> Vec { - self.neighbors[point_index] - .iter() - .map(|(_, neighbor_index)| *neighbor_index) - .collect() - } - - fn num_points(&self) -> usize { - self.data.len() - } -} - -#[cfg(test)] -pub mod tests { - use crate::Index; - - #[test] - pub fn test_linear() { - let mut index = super::LinearIndex::new(3); - index.insert([0.0, 0.0]); - - for i in 1..100 { - let point = [0., i as f64]; - let rknns = index.rknn(point); - // Since points are inserted in order, the reverse k-nearest neighbors should - // be the previously inserted point. - assert!(rknns.contains(&(i - 1))); - index.insert(point); - } - } -} diff --git a/src/tree/mod.rs b/src/tree/mod.rs deleted file mode 100644 index 202ddd3..0000000 --- a/src/tree/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -mod node; -mod sphere; -pub mod sstree; diff --git a/src/tree/node.rs b/src/tree/node.rs deleted file mode 100644 index ea3b65c..0000000 --- a/src/tree/node.rs +++ /dev/null @@ -1,35 +0,0 @@ -use super::sphere::Sphere; - -pub struct InsertionEntry { - pub idx: usize, - pub parent_height: usize, - pub sphere: Sphere, -} - -pub struct Node { - pub idx: usize, - pub parent: usize, - pub height: usize, - pub sphere: Sphere, - pub variance: [f64; D], - pub children: Vec, - pub bound: f64, -} - -impl Node { - pub fn new(idx: usize, parent: usize, height: usize, sphere: Sphere) -> Node { - Node { - idx, - parent, - height, - sphere, - variance: [f64::INFINITY; D], - children: Vec::new(), - bound: f64::INFINITY, - } - } - - pub fn is_leaf(&self) -> bool { - self.height == 0 - } -} diff --git a/src/tree/sphere.rs b/src/tree/sphere.rs deleted file mode 100644 index 5751a85..0000000 --- a/src/tree/sphere.rs +++ /dev/null @@ -1,17 +0,0 @@ -use crate::distance::euclidean; - -#[derive(Clone, Copy)] -pub struct Sphere { - pub center: [f64; D], - pub radius: f64, -} - -impl Sphere { - pub fn new(center: [f64; D], radius: f64) -> Sphere { - Sphere { center, radius } - } - - pub fn min_distance(&self, other: &[f64; D]) -> f64 { - (euclidean(&self.center, other) - self.radius).max(0.) - } -} diff --git a/src/tree/sstree.rs b/src/tree/sstree.rs deleted file mode 100644 index 077b0af..0000000 --- a/src/tree/sstree.rs +++ /dev/null @@ -1,548 +0,0 @@ -use crate::{distance::euclidean, Index}; -use conv::ValueFrom; -use ordered_float::{Float, OrderedFloat}; -use std::collections::BinaryHeap; - -use super::{ - node::{InsertionEntry, Node}, - sphere::Sphere, -}; - -pub struct SSTree { - k: usize, - data: Vec<[f64; D]>, - neighbors: Vec, usize)>>, - root: usize, - node_max_entries: usize, - nodes: Vec>, -} - -impl Index for SSTree { - fn insert(&mut self, point: [f64; D]) { - // Insert the new point - let new_point_index = self.data.len(); - self.data.push(point); - - // Compute the core distance of the new point - self.neighbors - .push(BinaryHeap::from(vec![(OrderedFloat(0.), new_point_index)])); - let new_point_neighbors = self.query(&point); - for (neighbor_idx, _) in new_point_neighbors { - self.add_core(new_point_index, neighbor_idx); - } - - let mut reinsert_entries = vec![InsertionEntry { - idx: new_point_index, - parent_height: 0, - sphere: Sphere::new(point, 0.0), - }]; - let mut reinsert_height = 0; - while let Some(entry) = reinsert_entries.pop() { - let new_reinsert_entries = self.insert_recursive(entry, self.root, reinsert_height); - reinsert_entries.extend(new_reinsert_entries); - reinsert_height += 1; - } - - if self.nodes[self.root].children.len() > self.node_max_entries { - let old_root_idx = self.root; - let sibling_entry = self.split(self.root); - let new_root_idx = self.nodes.len(); - let mut new_root = Node::new( - new_root_idx, - usize::MAX, - self.nodes[old_root_idx].height + 1, - Sphere::new(self.nodes[sibling_entry.idx].sphere.center, 0.0), - ); - new_root.children = vec![old_root_idx, sibling_entry.idx]; - self.nodes[old_root_idx].parent = new_root_idx; - self.nodes[sibling_entry.idx].parent = new_root_idx; - self.nodes.push(new_root); - self.reshape(new_root_idx); - self.root = new_root_idx; - } - self.update_core(self.root, new_point_index); - } - - fn rknn(&self, point: [f64; D]) -> Vec { - let mut result = Vec::new(); - self.rknn_recursive(self.root, &point, &mut result); - result - } - - fn query_range(&self, point_index: usize, range: f64) -> Vec { - let mut neighbors = Vec::new(); - self.query_range_recursive(self.root, point_index, range, &mut neighbors); - neighbors - } - - fn core_distance_of(&self, point_index: usize) -> f64 { - if self.neighbors[point_index].len() != self.k { - return f64::INFINITY; - } - self.neighbors[point_index].peek().unwrap().0.into_inner() - } - - fn neighbors_of(&self, point_index: usize) -> Vec { - self.neighbors[point_index] - .iter() - .map(|(_, neighbor_index)| *neighbor_index) - .collect() - } - - fn num_points(&self) -> usize { - self.data.len() - } -} - -impl SSTree { - #[must_use] - pub fn new(k: usize) -> Self { - let root = Node::new(0, usize::MAX, 0, Sphere::new([f64::INFINITY; D], 0.0)); - Self { - k, - data: Vec::new(), - neighbors: Vec::new(), - root: 0, - node_max_entries: 2 * k + 1, - nodes: vec![root], - } - } - - fn rknn_recursive(&self, node_idx: usize, point: &[f64; D], rneighbors: &mut Vec) { - let distance_to_node = self.nodes[node_idx].sphere.min_distance(point); - if distance_to_node > self.nodes[node_idx].bound { - return; - } - - if self.nodes[node_idx].is_leaf() { - for neighbor_idx in &self.nodes[node_idx].children { - let distance = euclidean(point, &self.data[*neighbor_idx]); - if distance < self.core_distance_of(*neighbor_idx) { - rneighbors.push(*neighbor_idx); - } - } - } else { - for child_idx in &self.nodes[node_idx].children { - self.rknn_recursive(*child_idx, point, rneighbors); - } - } - } - - fn query_range_recursive( - &self, - node_idx: usize, - point_index: usize, - range: f64, - neighbors: &mut Vec, - ) { - let distance_to_node = self.nodes[node_idx] - .sphere - .min_distance(&self.data[point_index]); - if distance_to_node > range { - return; - } - - if self.nodes[node_idx].is_leaf() { - for neighbor_idx in &self.nodes[node_idx].children { - let distance_to_neighbor = - euclidean(&self.data[point_index], &self.data[*neighbor_idx]); - if distance_to_neighbor <= range { - neighbors.push(*neighbor_idx); - } - } - } else { - for child_idx in &self.nodes[node_idx].children { - self.query_range_recursive(*child_idx, point_index, range, neighbors); - } - } - } - - fn query(&mut self, point: &[f64; D]) -> Vec<(usize, f64)> { - if self.root == usize::MAX { - return Vec::new(); - } - let mut neighbors = BinaryHeap::new(); - self.query_recursive(point, self.root, &mut neighbors); - neighbors - .into_sorted_vec() - .into_iter() - .map(|(distance, idx)| (idx, distance.into_inner())) - .collect::>() - } - - fn query_recursive( - &mut self, - point: &[f64; D], - node_idx: usize, - neighbors: &mut BinaryHeap<(OrderedFloat, usize)>, - ) { - let distance_to_node = self.nodes[node_idx].sphere.min_distance(point); - let mut kth_distance = f64::INFINITY; - if neighbors.len() == self.k { - kth_distance = *neighbors.peek().unwrap().0; - } - if distance_to_node >= kth_distance { - return; - } - - if self.nodes[node_idx].is_leaf() { - for point_idx in &self.nodes[node_idx].children { - let distance_to_neighbor = euclidean(point, &self.data[*point_idx]); - if neighbors.len() == self.k { - kth_distance = *neighbors.peek().unwrap().0; - } - if neighbors.len() < self.k { - neighbors.push((OrderedFloat(distance_to_neighbor), *point_idx)); - } else if distance_to_neighbor < kth_distance { - neighbors.pop(); - neighbors.push((OrderedFloat(distance_to_neighbor), *point_idx)); - } - } - } else { - let mut to_visit = Vec::new(); - for child_idx in &self.nodes[node_idx].children { - let distance_to_child = self.nodes[*child_idx].sphere.min_distance(point); - if distance_to_child < kth_distance { - to_visit.push((OrderedFloat(distance_to_child), *child_idx)); - } - } - to_visit.sort(); - - for (child_distance, child_index) in to_visit { - if neighbors.len() == self.k { - kth_distance = *neighbors.peek().unwrap().0; - } - if child_distance.0 >= kth_distance { - break; - } - self.query_recursive(point, child_index, neighbors); - } - } - } - - fn update_bound(&mut self, node_idx: usize) { - let mut bound: f64 = 0.0; - for child_idx in &self.nodes[node_idx].children { - let child_bound = if self.nodes[node_idx].is_leaf() { - self.core_distance_of(*child_idx) - } else { - self.nodes[*child_idx].bound - }; - bound = bound.max(child_bound); - } - self.nodes[node_idx].bound = bound; - } - - fn add_core(&mut self, point_index: usize, neighbor_index: usize) -> bool { - let cur_core_distance = self.core_distance_of(point_index); - let distance = OrderedFloat(euclidean( - &self.data[point_index], - &self.data[neighbor_index], - )); - if distance.0 >= cur_core_distance { - return false; - } - self.neighbors[point_index].push((distance, neighbor_index)); - if self.neighbors[point_index].len() > self.k { - self.neighbors[point_index].pop(); - } - true - } - - fn update_core(&mut self, node_idx: usize, point_index: usize) -> bool { - let distance_to_node = self.nodes[node_idx] - .sphere - .min_distance(&self.data[point_index]); - if distance_to_node > self.nodes[node_idx].bound { - return false; - } - - let mut updated = false; - let to_visit = self.nodes[node_idx].children.clone(); - if self.nodes[node_idx].is_leaf() { - for neighbor_idx in to_visit { - if neighbor_idx != point_index { - let cur = self.add_core(neighbor_idx, point_index); - updated = updated || cur; - } - } - } else { - for child_idx in to_visit { - let cur = self.update_core(child_idx, point_index); - updated = updated || cur; - } - } - if updated { - self.update_bound(node_idx); - } - - updated - } - - fn insert_recursive( - &mut self, - entry: InsertionEntry, - node_idx: usize, - reinsert_height: usize, - ) -> Vec> { - assert!( - self.nodes[node_idx].height >= entry.parent_height, - "Node height is lower than the insertion height" - ); - if self.nodes[node_idx].height == entry.parent_height { - self.nodes[node_idx].children.push(entry.idx); - self.reshape(node_idx); - if self.nodes[node_idx].children.len() <= self.node_max_entries || node_idx == self.root - { - return Vec::new(); - } - - if self.nodes[node_idx].height == reinsert_height { - // reinsert - self.pop_farthest_children(node_idx, self.k) - } else { - // split - vec![self.split(node_idx)] - } - } else { - let closest_child_idx = self.choose_subtree(node_idx, &entry.sphere.center); - let reinsert_entries = self.insert_recursive(entry, closest_child_idx, reinsert_height); - self.reshape(node_idx); - reinsert_entries - } - } - - fn choose_subtree(&self, node_idx: usize, point: &[f64; D]) -> usize { - let mut closest_child_idx = usize::MAX; - let mut closest_child_distance = OrderedFloat::max_value(); - let mut closest_center_distance = OrderedFloat::max_value(); - for child_idx in &self.nodes[node_idx].children { - let distance = OrderedFloat(self.nodes[*child_idx].sphere.min_distance(point)); - let center_distance = - OrderedFloat(euclidean(&self.nodes[*child_idx].sphere.center, point)); - if distance < closest_child_distance - || distance == closest_child_distance && center_distance < closest_center_distance - { - closest_child_distance = distance; - closest_child_idx = *child_idx; - closest_center_distance = center_distance; - } - } - closest_child_idx - } - - fn pop_farthest_children(&mut self, node_idx: usize, count: usize) -> Vec> { - assert!(2 * count <= self.nodes[node_idx].children.len()); - let parent_centroid = self.nodes[node_idx].sphere.center; - let mut children = self.nodes[node_idx].children.clone(); - children.sort_by_key(|child_idx| { - OrderedFloat( - self.child_sphere(node_idx, *child_idx) - .min_distance(&parent_centroid), - ) - }); - let mut far_children = Vec::new(); - while far_children.len() < count { - far_children.push(children.pop().unwrap()); - } - self.nodes[node_idx].children = children; - self.reshape(node_idx); - - let mut reinsert_entries = Vec::new(); - for far_child in far_children { - let sphere = if self.nodes[node_idx].is_leaf() { - Sphere::new(self.data[far_child], 0.0) - } else { - self.nodes[far_child].sphere - }; - let entry = InsertionEntry { - idx: far_child, - sphere, - parent_height: self.nodes[node_idx].height, - }; - reinsert_entries.push(entry); - } - - reinsert_entries - } - - fn split(&mut self, node_idx: usize) -> InsertionEntry { - let parent = self.nodes[node_idx].parent; - - // choose split axis - let split_axis = self.choose_split_axis(node_idx); - - // sort the children by the split axis - let mut children = self.nodes[node_idx].children.clone(); - children.sort_by_key(|child_idx| { - OrderedFloat(self.child_centroid(node_idx, *child_idx)[split_axis]) - }); - self.nodes[node_idx].children = children; - - // choose split index - let mut split_index = self.choose_split_index(node_idx); - - let left_centroid = self.calculate_center(node_idx, 0, split_index); - let right_centroid = - self.calculate_center(node_idx, split_index, self.nodes[node_idx].children.len()); - let left_distance = euclidean(&left_centroid, &self.nodes[node_idx].sphere.center); - let right_distance = euclidean(&right_centroid, &self.nodes[node_idx].sphere.center); - if left_distance > right_distance { - self.nodes[node_idx].children.reverse(); - split_index = self.nodes[node_idx].children.len() - split_index; - } - - // create the sibling node - let mut sibling_children = Vec::new(); - while self.nodes[node_idx].children.len() > split_index { - sibling_children.push(self.nodes[node_idx].children.pop().unwrap()); - } - self.reshape(node_idx); - - let sibling_idx = self.nodes.len(); - let sibling_sphere = Sphere::new(left_centroid, 0.); - let mut sibling = Node::new( - sibling_idx, - parent, - self.nodes[node_idx].height, - sibling_sphere, - ); - sibling.children = sibling_children; - self.nodes.push(sibling); - self.reshape(sibling_idx); - - let sibling_sphere = self.nodes[sibling_idx].sphere; - InsertionEntry { - idx: sibling_idx, - sphere: sibling_sphere, - parent_height: self.nodes[node_idx].height + 1, - } - } - - fn choose_split_index(&self, node_idx: usize) -> usize { - assert!(self.nodes[node_idx].children.len() >= self.node_max_entries); - let num_children = self.nodes[node_idx].children.len(); - let mut selected_index = num_children / 2; - let mut min_variance = f64::INFINITY; - - let start = self.k; - let end = num_children - self.k; - for index in start..end { - let left_variance = self.calculate_variance(node_idx, 0, index); - let right_variance = self.calculate_variance(node_idx, index, num_children); - let mut cur_variance = 0.0; - for d in 0..D { - cur_variance += left_variance[d] + right_variance[d]; - } - if cur_variance < min_variance { - min_variance = cur_variance; - selected_index = index; - } - } - selected_index - } - - fn choose_split_axis(&self, node_idx: usize) -> usize { - assert!(self.nodes[node_idx].children.len() >= self.node_max_entries); - let variance = self.calculate_variance(node_idx, 0, self.nodes[node_idx].children.len()); - variance - .iter() - .enumerate() - .max_by_key(|(_, variance)| OrderedFloat(**variance)) - .unwrap() - .0 - } - - fn calculate_center(&self, node_idx: usize, from: usize, to: usize) -> [f64; D] { - let mut center = [0.0; D]; - let mut weight = 0; - for i in from..to { - let child_idx = self.nodes[node_idx].children[i]; - let child_weight = self.child_weight(node_idx, child_idx); - let child_center = self.child_centroid(node_idx, child_idx); - weight += child_weight; - for dim in 0..D { - center[dim] += child_center[dim] * f64::value_from(child_weight).unwrap(); - } - } - for dim in center.iter_mut().take(D) { - *dim /= f64::value_from(weight).unwrap(); - } - center - } - - fn calculate_variance(&self, node_idx: usize, from: usize, to: usize) -> [f64; D] { - let mean = self.calculate_center(node_idx, from, to); - let mut variance = [0.0; D]; - let mut num_entries = 0; - for i in from..to { - let child_idx = self.nodes[node_idx].children[i]; - let child_centroid = self.child_centroid(node_idx, child_idx); - let child_num_entries = self.child_weight(node_idx, child_idx); - for axis in 0..D { - variance[axis] += (child_centroid[axis] - mean[axis]).powi(2) - * f64::value_from(child_num_entries).unwrap(); - if !self.nodes[node_idx].is_leaf() { - variance[axis] += self.nodes[child_idx].variance[axis] - * f64::value_from(child_num_entries).unwrap(); - } - } - num_entries += child_num_entries; - } - for var in variance.iter_mut().take(D) { - *var /= f64::value_from(num_entries).unwrap(); - } - variance - } - - fn child_sphere(&self, node_idx: usize, child_idx: usize) -> Sphere { - if self.nodes[node_idx].is_leaf() { - Sphere::new(self.data[child_idx], 0.0) - } else { - self.nodes[child_idx].sphere - } - } - - fn child_centroid(&self, node_idx: usize, child_idx: usize) -> [f64; D] { - if self.nodes[node_idx].is_leaf() { - self.data[child_idx] - } else { - self.nodes[child_idx].sphere.center - } - } - - fn child_radius(&self, node_idx: usize, child_idx: usize) -> f64 { - if self.nodes[node_idx].is_leaf() { - 0.0 - } else { - self.nodes[child_idx].sphere.radius - } - } - - fn child_weight(&self, node_idx: usize, child_idx: usize) -> usize { - if self.nodes[node_idx].is_leaf() { - 1 - } else { - self.nodes[child_idx].children.len() - } - } - - fn reshape(&mut self, node_idx: usize) { - let center = self.calculate_center(node_idx, 0, self.nodes[node_idx].children.len()); - let mut radius: f64 = 0.; - let children = self.nodes[node_idx].children.clone(); - - for child_idx in children { - let child_centroid = self.child_centroid(node_idx, child_idx); - let child_radius = self.child_radius(node_idx, child_idx); - radius = radius.max(euclidean(¢er, &child_centroid) + child_radius); - if !self.nodes[node_idx].is_leaf() { - self.nodes[child_idx].parent = node_idx; - } - } - self.nodes[node_idx].sphere = Sphere::new(center, radius); - self.nodes[node_idx].variance = - self.calculate_variance(node_idx, 0, self.nodes[node_idx].children.len()); - self.update_bound(node_idx); - } -} diff --git a/tests/test_sstree.rs b/tests/test_sstree.rs deleted file mode 100644 index eb4a467..0000000 --- a/tests/test_sstree.rs +++ /dev/null @@ -1,30 +0,0 @@ -use rand::{rngs::StdRng, Rng, SeedableRng}; -use rindex::{Index, LinearIndex, SSTree}; - -#[test] -pub fn test_random() { - let min_pts = 3; - let mut tree = SSTree::new(min_pts); - let mut linear = LinearIndex::new(min_pts); - - let mut rng = StdRng::seed_from_u64(0); - - let n = 1000; - for i in 0..n { - let point = [rng.gen(), rng.gen()]; - let mut tree_rknns = tree.rknn(point); - let mut linear_rknns = linear.rknn(point); - - tree_rknns.sort(); - linear_rknns.sort(); - assert_eq!(tree_rknns, linear_rknns); - - tree.insert(point); - linear.insert(point); - for j in 0..=i { - let actual = linear.core_distance_of(j); - let expected = tree.core_distance_of(j); - assert_eq!(actual, expected); - } - } -}