From: Damyan Ivanov Date: Mon, 15 Nov 2021 06:12:09 +0000 (+0000) Subject: rework DB update using COPY and a PostgreSQL function for maximum speed X-Git-Url: https://git.ktnx.net/?a=commitdiff_plain;h=77d28a4deff0003254af5bd402cdaa4ad5470b30;p=mpd-feeder.git rework DB update using COPY and a PostgreSQL function for maximum speed COPY is the most effective data loading mechanism, and using a function reduces the number of requests to just one --- diff --git a/lib/App/MPD/Feeder.pm b/lib/App/MPD/Feeder.pm index 3808c91..66f3ce1 100644 --- a/lib/App/MPD/Feeder.pm +++ b/lib/App/MPD/Feeder.pm @@ -132,6 +132,8 @@ use constant DEFAULT_CONFIG_FILE => '/etc/mpd-feeder/mpd-feeder.conf'; my $rows = $mpd->send('listallinfo')->get; + $log->trace('got all songs from MPD'); + $db->start_update; try { my $song_count; @@ -142,14 +144,20 @@ use constant DEFAULT_CONFIG_FILE => '/etc/mpd-feeder/mpd-feeder.conf'; $self->db->store_song( $entry->{file}, $entry->{AlbumArtist} // $entry->{Artist}, $entry->{Album} ); + $song_count++; } - $log->info("Updated data about $song_count songs"); + my ($total_songs, $total_artists, $total_albums, + $new_songs, $new_artists, $new_albums + ) = $self->db->finish_update; - $self->db->remove_stale_entries; + $log->info( + "Updated data about $song_count songs (including $new_songs new), " + . "$total_artists artists (including $new_artists new) " - $self->db->finish_update; + . "and $total_albums albums (including $new_albums new)" + ); $db_needs_update = 0; } diff --git a/lib/App/MPD/Feeder/DB.pm b/lib/App/MPD/Feeder/DB.pm index e38279a..6216436 100644 --- a/lib/App/MPD/Feeder/DB.pm +++ b/lib/App/MPD/Feeder/DB.pm @@ -29,46 +29,61 @@ class App::MPD::Feeder::DB { } method start_update { - $generation++; + $log->trace('starting DB update'); $db->begin_work; + $db->do(<do('COPY tmp_songs(path, artist, album) FROM STDIN'); + $generation++; } method finish_update { + $log->trace('finishing DB update'); + $db->pg_putcopyend; + my $sth = $db->prepare_cached(<<'SQL'); +SELECT total_songs, total_artists, total_albums + , new_songs, new_artists, new_albums +FROM update_song_data($1) +SQL + $sth->execute($generation); + my @update_result = $sth->fetchrow_array(); + $sth->finish; + + $self->remove_stale_entries; + $self->set_option(generation => $generation); $db->commit; + + $log->trace('DB update finished'); + + return @update_result; } method cancel_update { + $db->pg_putcopyend; $generation--; $db->rollback; } method store_song($song, $artist, $album) { - return unless length($song) and length($artist) and length($album); - - utf8::decode($_) for $song, $artist, $album; + return + unless length($song) + and length($artist) + and length($album); + + for ($song, $artist, $album) { + utf8::decode($_); + s/\\/\\\\/g; + s/\t/\\\t/g; + s/\n/\\\n/g; + } - $db->prepare_cached( - <<'SQL')->execute( $song, $artist, $album, $generation ); -INSERT INTO songs(path, artist, album, generation) -VALUES($1, $2, $3, $4) -ON CONFLICT ON CONSTRAINT songs_pkey DO -UPDATE SET artist = $2 - , album = $3 - , generation = $4 -SQL - $db->prepare_cached(<<'SQL')->execute( $artist, $album, $generation ); -INSERT INTO albums(artist, album, generation) -VALUES($1, $2, $3) -ON CONFLICT ON CONSTRAINT albums_pkey DO -UPDATE SET generation = $3 -SQL - $db->prepare_cached(<<'SQL')->execute( $artist, $generation ); -INSERT INTO artists(artist, generation) -VALUES($1, $2) -ON CONFLICT ON CONSTRAINT artists_pkey DO -UPDATE SET generation = $2 -SQL + $db->pg_putcopydata(join("\t", $song, $artist, $album)."\n"); } method remove_stale_entries { diff --git a/sql/pgsql/init.sql b/sql/pgsql/init.sql index 8cdce3d..9f8df5b 100644 --- a/sql/pgsql/init.sql +++ b/sql/pgsql/init.sql @@ -39,4 +39,68 @@ create table options( insert into options(generation) values(0); +CREATE FUNCTION update_song_data( + new_generation bigint + , OUT total_songs bigint + , OUT total_artists bigint + , OUT total_albums bigint + , OUT new_songs bigint + , OUT new_artists bigint + , OUT new_albums bigint) +AS +$$ +DECLARE + song_data RECORD; +BEGIN + total_songs = 0; + new_songs = 0; + new_artists = 0; + new_albums = 0; + + FOR song_data IN + SELECT path, artist, album + FROM tmp_songs + LOOP + total_songs = total_songs + 1; + + UPDATE songs + SET generation = new_generation + WHERE path = song_data.path; + + IF ( NOT FOUND ) THEN + new_songs = new_songs + 1; + INSERT INTO songs(path, artist, album, generation) + values(song_data.path, song_data.artist, + song_data.album, new_generation); + END IF; + ----------------------------- + UPDATE artists + SET generation = new_generation + WHERE artist = song_data.artist; + + IF ( NOT FOUND ) THEN + new_artists = new_artists + 1; + INSERT INTO artists(artist, generation) + VALUES(song_data.artist, new_generation); + END IF; + ----------------------------- + UPDATE albums + SET generation = new_generation + WHERE album = song_data.album + AND artist = song_data.artist; + + IF ( NOT FOUND ) THEN + new_albums = new_albums + 1; + INSERT INTO albums(album, artist, generation) + VALUES(song_data.album, song_data.artist, new_generation); + END IF; + END LOOP; + + SELECT COUNT(*) INTO total_artists FROM artists WHERE generation = new_generation; + SELECT COUNT(*) INTO total_albums FROM albums WHERE generation = new_generation; + + RETURN; +END; +$$ LANGUAGE plpgsql; + commit;