From 40f68ca46bbe0ca9c5e043e633ab5c91db297958 Mon Sep 17 00:00:00 2001 From: Arnas Udovic Date: Fri, 20 Jun 2025 06:31:18 +0300 Subject: [PATCH] collecting prototype --- collector.go | 33 +++++++++++++++++++++++++++++++++ db.go | 26 ++++++++++++++++++++++++++ instances.db | Bin 28672 -> 28672 bytes main.go | 30 +++++++++++++++++++++++++++++- 4 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 collector.go diff --git a/collector.go b/collector.go new file mode 100644 index 0000000..78636b8 --- /dev/null +++ b/collector.go @@ -0,0 +1,33 @@ +// peertube-instance-index-filter +// Copyright (C) 2025 Arns Udovič +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package main + +type Instance struct { + Url string + Data string + CreatedAt string +} + +func getNewHosts(url string, lastFetched string, start int, count int) ([]string, error) { + return []string{}, nil +} + +func fetchInstance(url string, host string) Instance { + return Instance{} +} + + diff --git a/db.go b/db.go index 99d98a9..2eb2b62 100644 --- a/db.go +++ b/db.go @@ -24,6 +24,7 @@ import ( var migrations = []string{ "SELECT 1;", "ALTER TABLE instances ADD COLUMN rejected INTEGER NOT NULL DEFAULT 0;", + "ALTER TABLE instances ADD COLUMN reject_reason TEXT;", } func connectDB() *sql.DB { @@ -109,3 +110,28 @@ func addIndex(db *sql.DB, url string) { func rejectHost(db *sql.DB, host string) { db.Exec("UPDATE instances SET rejected = 1 WHERE url = ?", host) } + +func getIndexUrls(db *sql.DB) map[string]string { + urls := make(map[string]string) + rows, err := db.Query("SELECT url, last_fetched_at FROM index_host") + if err != nil { + panic(err) + } + + for rows.Next() { + var url string + var lastFetchedAt string + rows.Scan(&url, &lastFetchedAt) + urls[url] = lastFetchedAt + } + + return urls +} + +func updateLastFetched(db *sql.DB, url string) { + db.Exec("UPDATE index_host SET last_fetched_at = datetime('now') WHERE url = ?", url) +} + +func addInstance(db *sql.DB, instance Instance) { + db.Exec("INSERT INTO instances (url, data, created_at) VALUES (?, ?, ?);", instance.Url, instance.Data, instance.CreatedAt) +} diff --git a/instances.db b/instances.db index b3f9a84847b40caea7dc4e822330990655c446d8..3c320f18f8af8142b62117b1e47bc614e5ab63f8 100644 GIT binary patch delta 156 zcmZp8z}WDBae}lU4+8@OI}pPF`$QdMejWzBHceiBcLp{dRt6r{jd|0#xf=5s*~R7M z8Jl${2lB6*e33_0L`R`0H7hl_B)%v$u{b|ZA;dKzWb-#30b#bM4E$gCpKcZuxXCZg z%fzh6nVFZ8S`nX-UtGe)$Slp7oL`z(l3KLMfJ*?VXe~dJJLBe^{%!({%!?8P01VM6 AWdHyG delta 165 zcmZp8z}WDBae}m<5(5JRI}pPF+e95>ekBIIHceiBcLp{dNd_Ltjd|0#Cr%LIYK&%N z7nhf3Y-XJt$iHs#ULMuW-*^Os*&Z|Sf98L@Sy13QzbqdUvm#?=UP@|3d`5n82^%*f zvovFJeraAwYLU<;11z(CiXZSzilHvvYb HMF|1`GOQ@1 diff --git a/main.go b/main.go index e689f0a..b8ca0d0 100644 --- a/main.go +++ b/main.go @@ -75,7 +75,35 @@ func reject(host string) { } func collect() { - // TODO: implement + db := connectDB() + defer db.Close() + + urls := getIndexUrls(db) + for url, lastFetched := range urls { + fmt.Println(url) + + start := 0 + count := 20 + for { + hosts, err := getNewHosts(url, lastFetched, start, count) + if err != nil { + panic(err) + } + + if len(hosts) == 0 { + break + } + + for _, host := range hosts { + instance := fetchInstance(url, host) + addInstance(db, instance) + } + + start += count + } + + updateLastFetched(db, url) + } } func serve() {