peertube-instance-index-filter/main.go

284 lines
6 KiB
Go
Raw Normal View History

// peertube-instance-index-filter
// Copyright (C) 2025 Arns Udovič <zordsdavini@arns.lt>
2025-06-20 19:04:57 +03:00
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
2025-06-20 19:04:57 +03:00
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
2025-06-20 19:04:57 +03:00
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package main
import (
2025-06-23 08:37:39 +03:00
"encoding/json"
"flag"
"fmt"
2025-06-23 08:11:14 +03:00
"net/http"
"net/url"
2025-06-23 08:11:14 +03:00
"regexp"
"strconv"
"strings"
2025-06-20 19:04:57 +03:00
"time"
"github.com/gin-gonic/gin"
)
2025-06-23 08:11:14 +03:00
type HostColection struct {
Host string `json:"host"`
}
type HostsResponse struct {
Total int `json:"total"`
Data []HostColection `json:"data"`
}
2025-06-23 08:37:39 +03:00
type InstancesResponse struct {
Total int `json:"total"`
Data []json.RawMessage `json:"data"`
}
func main() {
var command string
2025-06-20 19:04:57 +03:00
var url string
var instanceUrl string
2025-06-23 08:11:14 +03:00
var host string
var reason string
2025-06-20 19:04:57 +03:00
flag.StringVar(&command, "command", "", "Command to execute: index, reject, collect, serve")
flag.StringVar(&url, "url", "", "Url to index hosts")
flag.StringVar(&instanceUrl, "instance-url", "", "Url to fetch instance information")
2025-06-23 08:11:14 +03:00
flag.StringVar(&host, "host", "", "Host to reject")
flag.StringVar(&reason, "reject-reason", "", "Reject reason (optional)")
flag.Parse()
fmt.Println(command, host)
switch command {
case "index":
2025-06-20 19:04:57 +03:00
index(url, instanceUrl)
case "reject":
2025-06-23 08:11:14 +03:00
reject(host, reason)
case "collect":
collect()
2025-06-23 08:11:14 +03:00
case "serve":
default:
serve()
}
}
2025-06-20 19:04:57 +03:00
func index(url string, instanceUrl string) {
db := connectDB()
defer db.Close()
exists, err := indexExists(db, url)
if err != nil {
panic(err)
}
if !exists {
2025-06-20 19:04:57 +03:00
indexHost := IndexHost{
Url: url,
InstanceUrl: instanceUrl,
}
addIndex(db, indexHost)
fmt.Println(url, "added to index")
} else {
fmt.Println(url, "already added")
}
}
2025-06-23 08:11:14 +03:00
func reject(host string, reason string) {
host = formatHost(host)
db := connectDB()
defer db.Close()
2025-06-23 08:11:14 +03:00
rejectHost(db, host, reason)
fmt.Println(host, "rejected")
}
func collect() {
2025-06-20 06:31:18 +03:00
db := connectDB()
defer db.Close()
2025-06-20 19:04:57 +03:00
indexHosts := getIndexHosts(db)
for _, indexHost := range indexHosts {
fmt.Println(indexHost.Url)
fmt.Println("==========================================")
2025-06-20 06:31:18 +03:00
start := 0
count := 20
for {
2025-06-20 19:04:57 +03:00
hosts, err := getNewHosts(indexHost.Url, indexHost.LastFetchedAt, start, count)
2025-06-20 06:31:18 +03:00
if err != nil {
panic(err)
}
if len(hosts) == 0 {
break
}
2025-06-20 19:04:57 +03:00
fmt.Println("New hosts:", len(hosts), hosts)
2025-06-20 06:31:18 +03:00
for _, host := range hosts {
2025-06-20 19:04:57 +03:00
fmt.Println(host)
time.Sleep(1 * time.Second)
instance := fetchInstance(indexHost.InstanceUrl, host)
2025-06-20 06:31:18 +03:00
addInstance(db, instance)
}
start += count
}
2025-06-20 19:04:57 +03:00
updateLastFetched(db, indexHost)
2025-06-20 06:31:18 +03:00
}
}
func serve() {
2025-06-23 08:11:14 +03:00
db := connectDB()
defer db.Close()
r := gin.Default()
2025-06-23 09:15:58 +03:00
r.LoadHTMLGlob("templates/*")
2025-06-20 19:04:57 +03:00
r.GET("/", func(c *gin.Context) {
2025-06-23 09:15:58 +03:00
filteredInstancesCount, err := getHostsTotal(db, "", "")
if err != nil {
panic(err)
}
rejectedInstancesCount, err := getRejectedHostsTotal(db)
if err != nil {
panic(err)
}
c.HTML(http.StatusOK, "index.tmpl", gin.H{
"FilteredInstancesCount": filteredInstancesCount,
"RejectedInstancesCount": rejectedInstancesCount,
})
2025-06-20 19:04:57 +03:00
})
r.GET("/instances", func(c *gin.Context) {
2025-06-23 08:37:39 +03:00
start := c.DefaultQuery("start", "0")
count := c.DefaultQuery("count", "20")
since := c.DefaultQuery("since", "")
search := c.DefaultQuery("search", "")
if since != "" {
regex := regexp.MustCompile(`^\d{4}-\d{2}-\d{2}$`)
if !regex.MatchString(since) {
generateErrorResponse(c, "since must be in YYYY-MM-DD format")
return
}
}
starti, err := strconv.Atoi(start)
if err != nil {
generateErrorResponse(c, "start should be an integer")
return
}
counti, err := strconv.Atoi(count)
if err != nil {
generateErrorResponse(c, "count should be an integer")
return
}
hosts, err := getHosts(db, starti, counti, since, search, "data")
if err != nil {
generateErrorResponse(c, "error getting hosts")
return
}
total, err := getHostsTotal(db, since, search)
if err != nil {
generateErrorResponse(c, "error getting hosts")
return
}
response := InstancesResponse{
Total: total,
}
for _, data := range hosts {
response.Data = append(response.Data, []byte(data))
}
c.JSON(http.StatusOK, response)
})
r.GET("/instances/hosts", func(c *gin.Context) {
2025-06-23 08:11:14 +03:00
start := c.DefaultQuery("start", "0")
count := c.DefaultQuery("count", "20")
since := c.DefaultQuery("since", "")
if since != "" {
regex := regexp.MustCompile(`^\d{4}-\d{2}-\d{2}$`)
if !regex.MatchString(since) {
generateErrorResponse(c, "since must be in YYYY-MM-DD format")
return
}
}
starti, err := strconv.Atoi(start)
if err != nil {
generateErrorResponse(c, "start should be an integer")
return
}
counti, err := strconv.Atoi(count)
if err != nil {
generateErrorResponse(c, "count should be an integer")
return
}
2025-06-23 08:37:39 +03:00
hosts, err := getHosts(db, starti, counti, since, "", "url")
2025-06-23 08:11:14 +03:00
if err != nil {
generateErrorResponse(c, "error getting hosts")
return
}
2025-06-23 08:37:39 +03:00
total, err := getHostsTotal(db, since, "")
2025-06-23 08:11:14 +03:00
if err != nil {
generateErrorResponse(c, "error getting hosts")
return
}
response := HostsResponse{
Total: total,
}
for _, host := range hosts {
response.Data = append(response.Data, HostColection{
Host: host,
})
}
c.JSON(http.StatusOK, response)
})
2025-06-20 19:04:57 +03:00
r.Run(":8081")
}
func formatHost(host string) string {
host = strings.Trim(host, " ")
u, _ := url.Parse(host)
if u.Host == "" {
return host
}
return u.Host
}
2025-06-23 08:11:14 +03:00
func generateErrorResponse(c *gin.Context, err string) {
c.JSON(http.StatusBadRequest, gin.H{"error": err})
}