Fix speed of "song to song" sorting method

This commit is contained in:
Polochon-street 2021-08-23 17:33:17 +02:00
parent 833d8b020b
commit dd997510d3
6 changed files with 43 additions and 23 deletions

View File

@ -23,7 +23,7 @@ jobs:
toolchain: nightly-2021-04-01
override: false
- name: Packages
run: sudo apt-get install build-essential yasm libavutil-dev libavcodec-dev libavformat-dev libavfilter-dev libavfilter-dev libavdevice-dev libswresample-dev libfftw3-dev ffmpeg
run: sudo apt-get update && sudo apt-get install build-essential yasm libavutil-dev libavcodec-dev libavformat-dev libavfilter-dev libavfilter-dev libavdevice-dev libswresample-dev libfftw3-dev ffmpeg
- name: Build
run: cargo build --verbose
- name: Run tests

View File

@ -1,5 +1,10 @@
# Changelog
## bliss 0.4.0
* Make the song-to-song custom sorting method faster.
* Rename `to_vec` and `to_arr1` to `as_vec` and `as_arr1` .
* Add a playlist_dedup function.
## bliss 0.3.5
* Add custom sorting methods for playlist-making.

4
Cargo.lock generated
View File

@ -882,9 +882,9 @@ dependencies = [
[[package]]
name = "rand"
version = "0.8.3"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e"
checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8"
dependencies = [
"libc",
"rand_chacha",

View File

@ -12,6 +12,7 @@ use crate::Library;
use crate::Song;
use crate::NUMBER_FEATURES;
use ndarray::{Array, Array1};
use ndarray_stats::QuantileExt;
use noisy_float::prelude::*;
/// Convenience trait for user-defined distance metrics.
@ -39,34 +40,35 @@ pub fn cosine_distance(a: &Array1<f32>, b: &Array1<f32>) -> f32 {
}
/// Sort `songs` in place by putting songs close to `first_song` first
/// using the `distance` metric. Deduplicate identical songs.
/// using the `distance` metric.
pub fn closest_to_first_song(
first_song: &Song,
songs: &mut Vec<Song>,
distance: impl DistanceMetric,
) {
songs.sort_by_cached_key(|song| n32(first_song.custom_distance(song, &distance)));
songs.dedup_by_key(|song| n32(first_song.custom_distance(song, &distance)));
}
/// Sort `songs` in place using the `distance` metric and ordering by
/// the smallest distance between each song. Deduplicate identical songs.
/// the smallest distance between each song.
///
/// If the generated playlist is `[song1, song2, song3, song4]`, it means
/// song2 is closest to song1, song3 is closest to song2, and song4 is closest
/// to song3.
///
/// Note that this has a tendency to go from one style to the other very fast,
/// and it can be slow on big libraries.
pub fn song_to_song(first_song: &Song, songs: &mut Vec<Song>, distance: impl DistanceMetric) {
let mut new_songs = vec![first_song.to_owned()];
let mut new_songs = Vec::with_capacity(songs.len());
let mut song = first_song.to_owned();
loop {
if songs.is_empty() {
break;
}
songs
.retain(|s| n32(song.custom_distance(s, &distance)) != 0.);
songs.sort_by_key(|s| n32(song.custom_distance(s, &distance)));
song = songs.remove(0);
while !songs.is_empty() {
let distances: Array1<f32> =
Array::from_shape_fn(songs.len(), |i| song.custom_distance(&songs[i], &distance));
let idx = distances.argmin().unwrap();
song = songs[idx].to_owned();
new_songs.push(song.to_owned());
songs.retain(|s| s != &song);
}
*songs = new_songs;
}
@ -126,7 +128,13 @@ mod test {
song_to_song(&first_song, &mut songs, euclidean_distance);
assert_eq!(
songs,
vec![first_song, second_song, third_song, fourth_song],
vec![
first_song,
first_song_dupe.to_owned(),
second_song,
third_song,
fourth_song
],
);
}
@ -187,7 +195,14 @@ mod test {
closest_to_first_song(&first_song, &mut songs, euclidean_distance);
assert_eq!(
songs,
vec![first_song, second_song, fourth_song, third_song],
vec![
first_song,
first_song_dupe,
second_song,
fourth_song,
fifth_song,
third_song
],
);
}

View File

@ -5,7 +5,7 @@
//! MPD](https://github.com/Polochon-street/blissify-rs) could also be useful.
#[cfg(doc)]
use crate::distance;
use crate::distance::{closest_to_first_song, DistanceMetric, euclidean_distance};
use crate::distance::{closest_to_first_song, euclidean_distance, DistanceMetric};
use crate::{BlissError, BlissResult, Song};
use log::{debug, error, info};
use std::sync::mpsc;

View File

@ -142,7 +142,7 @@ impl fmt::Debug for Analysis {
debug_struct.field(&format!("{:?}", feature), &self[feature]);
}
debug_struct.finish()?;
f.write_str(&format!(" /* {:?} */", &self.to_vec()))
f.write_str(&format!(" /* {:?} */", &self.as_vec()))
}
}
@ -161,7 +161,7 @@ impl Analysis {
/// Return an ndarray `Array1` representing the analysis' features.
///
/// Particularly useful if you want to make a custom distance metric.
pub fn to_arr1(&self) -> Array1<f32> {
pub fn as_arr1(&self) -> Array1<f32> {
arr1(&self.internal_analysis)
}
@ -169,7 +169,7 @@ impl Analysis {
///
/// Particularly useful if you want iterate through the values to store
/// them somewhere.
pub fn to_vec(&self) -> Vec<f32> {
pub fn as_vec(&self) -> Vec<f32> {
self.internal_analysis.to_vec()
}
@ -187,7 +187,7 @@ impl Analysis {
/// Note that almost all distance metrics you will find obey these
/// properties, so don't sweat it too much.
pub fn custom_distance(&self, other: &Self, distance: impl DistanceMetric) -> f32 {
distance(&self.to_arr1(), &other.to_arr1())
distance(&self.as_arr1(), &other.as_arr1())
}
}
@ -654,7 +654,7 @@ mod tests {
-0.9820945,
-0.95968974,
];
for (x, y) in song.analysis.to_vec().iter().zip(expected_analysis) {
for (x, y) in song.analysis.as_vec().iter().zip(expected_analysis) {
assert!(0.01 > (x - y).abs());
}
}