Skip to content

Commit

Permalink
0.0.3 version:
Browse files Browse the repository at this point in the history
- page calculation fixed
- pagination tests added
- general CSV schema defined
- code refactored
  • Loading branch information
unrec committed Mar 1, 2023
1 parent a209239 commit 03d4589
Show file tree
Hide file tree
Showing 9 changed files with 286 additions and 162 deletions.
15 changes: 9 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ java -jar lastfm-tracks-dumper.jar --user %user% --token %token% --strategy defa

Due to scrobbling issues duplicated tracks can appear in the library 2 or more times. The application determine
duplicates with two rules:
1. Duplicated tracks go in sequential order.
2. Difference in the scrobbled time is less than 5 sec.
1. Duplicated tracks go in **sequential order**.
2. Difference in the scrobbled time is **less than 5 sec**.

![duplicates](docs/duplicates_720x330.png)

Expand All @@ -34,9 +34,12 @@ Depending on the strategy there will be different output result:

### Exported .csv data

Currently only `date`, `artist`, `track` and `album` values are saved to .csv.
Generated .csv has tab as separator as some names can contain `;` symbol in it.

Besides there is an [issue](
https://support.last.fm/t/invalid-mbids-in-responses-to-user-gettoptracks-and-user-getrecenttracks/2011) with *track/artist/album* ids and that's why this id data is not valuable right now.
Currently only `date`, `artist`, `track` and `album` values are saved to .csv.

Additional fields `page`, `pageLink` and `index` were added for easy navigation in the library.

ID's for **track/artist/album** are not saved due to this [Last.fm issue](
https://support.last.fm/t/invalid-mbids-in-responses-to-user-gettoptracks-and-user-getrecenttracks/2011).

For `only-duplicates` strategy 2 more fields added: `page` and `pageLink` for easy navigation in the library.
9 changes: 5 additions & 4 deletions build.gradle.kts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import org.jetbrains.kotlin.gradle.tasks.KotlinCompile

version = "0.0.2"
version = "0.0.3"
group = "com.unrec"
description = "lastfm-tracks-dumper"
java.sourceCompatibility = JavaVersion.VERSION_11
Expand All @@ -22,13 +22,14 @@ dependencies {
implementation(platform("org.jetbrains.kotlin:kotlin-bom"))
implementation("org.jetbrains.kotlin:kotlin-stdlib-jdk8")
testImplementation("org.jetbrains.kotlin:kotlin-test")
testImplementation(group = "io.kotest", name = "kotest-assertions-core-jvm", version = "5.5.1")
testImplementation("org.junit.jupiter", "junit-jupiter-params", "5.9.2")
testImplementation("io.kotest", "kotest-assertions-core-jvm", "5.5.1")

implementation("org.jetbrains.kotlinx", "kotlinx-coroutines-core", Versions.COROUTINES)
implementation("ru.gildor.coroutines:kotlin-coroutines-okhttp:1.0")

implementation("com.squareup.okhttp3:okhttp:4.9.0")
implementation("me.tongfei","progressbar","0.9.4")
implementation("com.squareup.okhttp3", "okhttp", "4.9.0")
implementation("me.tongfei", "progressbar", "0.9.4")

implementation("com.fasterxml.jackson.module", "jackson-module-kotlin", Versions.JACKSON)
implementation("com.fasterxml.jackson.dataformat", "jackson-dataformat-csv", Versions.JACKSON)
Expand Down
220 changes: 127 additions & 93 deletions src/main/kotlin/com/unrec/lastfm/tracks/dumper/App.kt
Original file line number Diff line number Diff line change
@@ -1,38 +1,28 @@
package com.unrec.lastfm.tracks.dumper

import com.fasterxml.jackson.core.JsonGenerator
import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.dataformat.csv.CsvGenerator
import com.fasterxml.jackson.dataformat.csv.CsvMapper
import com.fasterxml.jackson.module.kotlin.registerKotlinModule
import com.unrec.lastfm.tracks.dumper.Constants.baseUrl
import com.unrec.lastfm.tracks.dumper.Constants.defaultPageSize
import com.unrec.lastfm.tracks.dumper.Constants.fetchPageSize
import com.unrec.lastfm.tracks.dumper.Constants.strategyKey
import com.unrec.lastfm.tracks.dumper.Constants.tokenKey
import com.unrec.lastfm.tracks.dumper.Constants.userKey
import com.unrec.lastfm.tracks.dumper.CsvSchemas.schemaMap
import com.unrec.lastfm.tracks.dumper.CsvSchemas.defaultSchema
import com.unrec.lastfm.tracks.dumper.UtilObjects.client
import com.unrec.lastfm.tracks.dumper.UtilObjects.csvMapper
import com.unrec.lastfm.tracks.dumper.UtilObjects.mapper
import com.unrec.lastfm.tracks.dumper.model.Track
import com.unrec.lastfm.tracks.dumper.model.UserInfo
import com.unrec.lastfm.tracks.dumper.utils.asConfig
import com.unrec.lastfm.tracks.dumper.utils.countPages
import com.unrec.lastfm.tracks.dumper.utils.Paginator
import com.unrec.lastfm.tracks.dumper.utils.extractTracks
import com.unrec.lastfm.tracks.dumper.utils.extractUser
import com.unrec.lastfm.tracks.dumper.utils.recentTracksGetRequest
import com.unrec.lastfm.tracks.dumper.utils.toSitePage
import com.unrec.lastfm.tracks.dumper.utils.userInfoGetRequest
import com.unrec.lastfm.tracks.dumper.utils.userPageUrl
import kotlinx.coroutines.runBlocking
import me.tongfei.progressbar.ProgressBar
import okhttp3.ConnectionPool
import okhttp3.OkHttpClient
import ru.gildor.coroutines.okhttp.await
import java.io.File
import java.net.SocketTimeoutException
import java.time.LocalDate
import java.time.format.DateTimeFormatter
import java.util.concurrent.ConcurrentHashMap
import java.util.concurrent.TimeUnit
import kotlin.system.exitProcess
import kotlin.system.measureTimeMillis
import kotlin.time.DurationUnit
Expand All @@ -42,92 +32,136 @@ fun main(args: Array<String>) {

val measureTimeMillis = measureTimeMillis {

// define the settings
val settings = args.asConfig()
val user = settings[userKey]!!
val token = settings[tokenKey]!!
val filterStrategy = when (val strategy = settings[strategyKey]) {
null -> defaultStrategy
else -> strategiesMap[strategy] ?: throw IllegalArgumentException("Incorrect strategy is provided")
}
defineSettingsFrom(args)
checkIfUserExists()

// check if the user exists
val userInfoRequest = userInfoGetRequest(baseUrl, user, token)
val userResponse = client.newCall(userInfoRequest).execute()
if (userResponse.code == 404) {
println("Failed to get data for the '$user' user")
exitProcess(1)
}
val totalScrobbles = getTotalScrobbles()
paginator = Paginator(totalScrobbles)

// get the user info for a total pages amount
val userInfoResponse = client.newCall(userInfoRequest).execute().body?.string()
val userInfo: UserInfo = mapper.extractUser(userInfoResponse!!)
val totalScrobbles = userInfo.playCount
println("Total scrobbles: $totalScrobbles, last.fm pages: ${countPages(totalScrobbles, defaultPageSize)} ")
val pagesToFetch = countPages(totalScrobbles, fetchPageSize)

// starting to consume tracks
val map = ConcurrentHashMap<Int, List<Track>>()
val progressBar = ProgressBar("Pages processed:", pagesToFetch.toLong())

println("Starting to load Last.fm data for '$user' user. \nTotal pages to fetch: $pagesToFetch")

runBlocking {
for (page in pagesToFetch downTo 1) {
runCatching {
val request = recentTracksGetRequest(baseUrl, user, token, page, fetchPageSize)
val response = client.newCall(request).await()
val tracks = mapper.extractTracks(response.body?.string()!!)
val refinedTracks = tracks.let(filterStrategy)

for ((index, track) in refinedTracks.withIndex()) {
val sitePage = index.toSitePage()
track.page = sitePage
track.pageUrl = userPageUrl(user, sitePage)
}
map[page] = refinedTracks
progressBar.step()
}.onFailure {
when (it) {
is SocketTimeoutException -> {
println("Failed to fetch data from Last.fm due to ${it.javaClass}: ${it.message}")
exitProcess(1)
}

else -> throw it
println("Total scrobbles: $totalScrobbles")
println("Last.fm pages: ${paginator.defaultPages}")
println("Total pages to fetch: ${paginator.fetchPages}")

val tracks = fetchTracks()
writeToCsv(tracks)
println("Tracks were found: ${tracks.size}")
}
println("Total dump time: ${measureTimeMillis.toDuration(DurationUnit.MILLISECONDS)}")
exitProcess(0)
}

private lateinit var user: String
private lateinit var token: String
private lateinit var filterStrategy: (List<Track>) -> List<Track>
private lateinit var paginator: Paginator

private val userInfoRequest by lazy { userInfoGetRequest(baseUrl, user, token) }
private val appender by lazy { if (filterStrategy == defaultStrategy) "full" else "duplicates" }

private fun defineSettingsFrom(args: Array<String>) {
val settings = args.asConfig()
user = settings[userKey]!!
token = settings[tokenKey]!!
filterStrategy = when (val strategy = settings[strategyKey]) {
null -> defaultStrategy
else -> strategiesMap[strategy] ?: throw IllegalArgumentException("Incorrect strategy is provided")
}
}

private fun Array<String>.asConfig(): Map<String, String> {

if (this.size % 2 != 0) {
println("Incorrect parameters are provided")
exitProcess(1)
}

val map = this.toList().chunked(2).associate { it[0] to it[1] }

if (!map.keys.contains(userKey)) {
println("User is not specified")
exitProcess(1)
}

if (!map.keys.contains(tokenKey)) {
println("API token is not provided")
exitProcess(1)
}

if (map[strategyKey] == null) {
println("Strategy is not specified, tracks will not be filtered.")
}

return map
}

private fun checkIfUserExists() {
val response = client.newCall(userInfoRequest).execute()
if (response.code == 404) {
println("Failed to get data for the '$user' user")
exitProcess(1)
}
response.close()
}

private fun getTotalScrobbles(): Int {
val response = client.newCall(userInfoRequest).execute()
val responseBody = response.body?.string()!!
response.close()
return mapper.extractUser(responseBody).playCount
}

private fun fetchTracks(): List<Track> {
val map = ConcurrentHashMap<Int, List<Track>>()
val pagesToFetch = paginator.fetchPages
val progressBar = ProgressBar("Pages processed:", pagesToFetch.toLong())

runBlocking {
for (page in pagesToFetch downTo 1) {
runCatching {
val request = recentTracksGetRequest(baseUrl, user, token, page, fetchPageSize)
val response = client.newCall(request).await()
val tracks = mapper.extractTracks(response.body!!.string())

response.close()

tracks.withIndex().forEach { (index, track) ->
track.index = paginator.countNormalizedIndex(index, page)
}

val refinedTracks = tracks.let(filterStrategy)
for (track in refinedTracks) {
val sitePage = paginator.countNormalizedPage(track.index)
track.page = sitePage
track.pageUrl = userPageUrl(user, sitePage)
}
map[page] = refinedTracks
progressBar.step()
}.onFailure {
when (it) {
is SocketTimeoutException -> {
println("Failed to fetch data from Last.fm due to ${it.javaClass}: ${it.message}")
exitProcess(1)
}

else -> throw it
}
}
}
progressBar.close()
}
progressBar.close()

val tracks = mutableListOf<Track>()
for (page in 1..pagesToFetch) {
tracks.addAll(map[page]!!)
}
println("Tracks found = ${tracks.size}")

// save tracks to .csv file
val schema = schemaMap[filterStrategy]
val formatter = DateTimeFormatter.ofPattern("yyyy_MM_dd")
val outputFile = File("${user}_${LocalDate.now().format(formatter)}.csv")
val objectWriter = csvMapper.writerFor(Track::class.java).with(schema)
objectWriter.writeValues(outputFile.bufferedWriter()).writeAll(tracks)
val result = mutableListOf<Track>()
for (page in 1..pagesToFetch) {
result.addAll(map[page]!!)
}
println("Total dump time = ${measureTimeMillis.toDuration(DurationUnit.MILLISECONDS)}")
exitProcess(0)
return result
}

val mapper = ObjectMapper().registerKotlinModule()
private fun userPageUrl(user: String, page: Int) = "https://www.last.fm/user/$user/library?page=$page"

private val csvMapper: ObjectMapper = CsvMapper()
.configure(CsvGenerator.Feature.ALWAYS_QUOTE_STRINGS, false)
.configure(JsonGenerator.Feature.IGNORE_UNKNOWN, true)

private val client = OkHttpClient.Builder()
.connectionPool(ConnectionPool(20, 5, TimeUnit.MINUTES))
.readTimeout(60, TimeUnit.SECONDS)
.connectTimeout(30, TimeUnit.SECONDS)
.writeTimeout(30, TimeUnit.SECONDS)
.retryOnConnectionFailure(true)
.build()
private fun writeToCsv(tracks: List<Track>) {
val formatter = DateTimeFormatter.ofPattern("yyyy_MM_dd")
val fileName = "${user}_${LocalDate.now().format(formatter)}_${appender}.csv"
val objectWriter = csvMapper.writerFor(Track::class.java).with(defaultSchema)
objectWriter.writeValues(File(fileName).bufferedWriter()).writeAll(tracks)
}
24 changes: 4 additions & 20 deletions src/main/kotlin/com/unrec/lastfm/tracks/dumper/CsvSchemas.kt
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,16 @@ import com.fasterxml.jackson.dataformat.csv.CsvSchema

object CsvSchemas {

private val defaultSchema: CsvSchema = CsvSchema.builder()
.setColumnSeparator(';')
.disableQuoteChar()
.setUseHeader(true)
.addColumn("date")
.addColumn("artist")
.addColumn("track")
.addColumn("album")
.build()

private val schemaWithPages: CsvSchema = CsvSchema.builder()
.setColumnSeparator(';')
val defaultSchema: CsvSchema = CsvSchema.builder()
.setColumnSeparator('\t')
.disableQuoteChar()
.setUseHeader(true)
.addColumn("date")
.addColumn("artist")
.addColumn("track")
.addColumn("album")
.addColumn("page")
.addColumn("pageLink")
.addColumn("pageUrl")
.addColumn("index")
.build()

val schemaMap = mapOf(
defaultStrategy to defaultSchema,
withoutDuplicatesStrategy to defaultSchema,
duplicatesOnlyStrategy to schemaWithPages
)
}

28 changes: 28 additions & 0 deletions src/main/kotlin/com/unrec/lastfm/tracks/dumper/UtilObjects.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package com.unrec.lastfm.tracks.dumper

import com.fasterxml.jackson.core.JsonGenerator
import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.dataformat.csv.CsvGenerator
import com.fasterxml.jackson.dataformat.csv.CsvMapper
import com.fasterxml.jackson.module.kotlin.registerKotlinModule
import okhttp3.ConnectionPool
import okhttp3.OkHttpClient
import java.util.concurrent.TimeUnit

object UtilObjects {

val mapper = ObjectMapper().registerKotlinModule()

val csvMapper: ObjectMapper = CsvMapper()
.configure(CsvGenerator.Feature.ALWAYS_QUOTE_STRINGS, false)
.configure(JsonGenerator.Feature.IGNORE_UNKNOWN, true)

val client =
OkHttpClient.Builder()
.connectionPool(ConnectionPool(20, 5, TimeUnit.MINUTES))
.readTimeout(120, TimeUnit.SECONDS)
.connectTimeout(120, TimeUnit.SECONDS)
.writeTimeout(120, TimeUnit.SECONDS)
.retryOnConnectionFailure(true)
.build()
}
2 changes: 2 additions & 0 deletions src/main/kotlin/com/unrec/lastfm/tracks/dumper/model/Track.kt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ data class Track(
@field:JsonProperty("date")
val textDate: String,

var index: Int = 0,

var page: Int = 0,

var pageUrl: String = ""
Expand Down
Loading

0 comments on commit 03d4589

Please sign in to comment.