/
CoubSkraper.kt
120 lines (108 loc) · 4.66 KB
/
CoubSkraper.kt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
/*
* Copyright (c) 2019-present Mikhael Sokolov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ru.sokomishalov.skraper.provider.coub
import com.fasterxml.jackson.databind.JsonNode
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.flow
import org.jsoup.nodes.Document
import ru.sokomishalov.skraper.Skraper
import ru.sokomishalov.skraper.Skrapers
import ru.sokomishalov.skraper.client.*
import ru.sokomishalov.skraper.internal.iterable.emitBatch
import ru.sokomishalov.skraper.internal.jsoup.getFirstElementByClass
import ru.sokomishalov.skraper.internal.jsoup.getFirstElementByTag
import ru.sokomishalov.skraper.internal.jsoup.getMetaPropertyMap
import ru.sokomishalov.skraper.internal.net.host
import ru.sokomishalov.skraper.internal.number.div
import ru.sokomishalov.skraper.internal.serialization.*
import ru.sokomishalov.skraper.model.*
import java.time.Duration
import java.time.Instant
class CoubSkraper(
override val client: SkraperClient = Skrapers.client
) : Skraper {
override fun getPosts(path: String): Flow<Post> = flow {
val username = path.removePrefix("/").substringBefore("/")
(1..Int.MAX_VALUE).forEach { page ->
val posts = fetchPosts(username, page)
val coubs = posts?.get("coubs")?.toList().orEmpty().ifEmpty { return@flow }
emitBatch(coubs) {
Post(
id = getString("id").orEmpty(),
text = getString("title"),
publishedAt = getString("published_at")?.let { runCatching { Instant.parse(it) }.getOrNull() },
statistics = PostStatistics(
views = getInt("views_count"),
likes = getInt("likes_count"),
comments = getInt("comments_count"),
reposts = getInt("recoubs_count"),
),
media = listOf(
Video(
url = getByPath("file_versions.html5.video")?.getFirstByPath("higher", "high", "med", "low")?.getString("url").orEmpty(),
aspectRatio = get("size").toList().let { it.getOrNull(0)?.asInt() / it.getOrNull(1)?.asInt() },
duration = getLong("duration")?.let { Duration.ofSeconds(it) },
thumbnail = getString("picture")?.toImage()
)
)
)
}
}
}
override suspend fun getPageInfo(path: String): PageInfo? {
val page = fetchPage(path) ?: return null
val metadata = page.getMetaPropertyMap()
return PageInfo(
nick = metadata["og:url"]?.removeSuffix("/")?.substringAfterLast("/"),
name = metadata["og:title"],
description = metadata["og:description"]?.split(".")?.lastOrNull()?.removePrefix(" ")?.takeIf { it.isNotBlank() },
avatar = metadata["og:image"]?.toImage(),
statistics = PageStatistics(
followers = page.getFirstElementByClass("follows-counter")?.getFirstElementByTag("span")?.text()?.toIntOrNull(),
)
)
}
override fun supports(url: String): Boolean {
return "coub.com" in url.host
}
override suspend fun resolve(media: Media): Media {
return when (media) {
is Video -> client.fetchOpenGraphMedia(media)
else -> media
}
}
private suspend fun fetchPage(path: String): Document? {
return client.fetchDocument(HttpRequest(url = BASE_URL.buildFullURL(path = path)))
}
private suspend fun fetchPosts(username: String, page: Int): JsonNode? {
return client.fetchJson(
HttpRequest(
url = BASE_URL.buildFullURL(
path = "/api/v2/timeline/channel/$username",
queryParams = mapOf(
"order_by" to "newest",
"page" to page.toString(),
"scope" to "all",
"permalink" to username
)
),
)
)
}
companion object {
const val BASE_URL = "https://coub.com"
}
}