2025-06-19 07:16:34 +03:00
|
|
|
// peertube-instance-index-filter
|
|
|
|
// Copyright (C) 2025 Arns Udovič <zordsdavini@arns.lt>
|
2025-06-20 19:04:57 +03:00
|
|
|
//
|
2025-06-19 07:16:34 +03:00
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
2025-06-20 19:04:57 +03:00
|
|
|
//
|
2025-06-19 07:16:34 +03:00
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU Affero General Public License for more details.
|
2025-06-20 19:04:57 +03:00
|
|
|
//
|
2025-06-19 07:16:34 +03:00
|
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
2025-06-23 08:37:39 +03:00
|
|
|
"encoding/json"
|
2025-06-19 07:16:34 +03:00
|
|
|
"flag"
|
|
|
|
"fmt"
|
2025-06-23 08:11:14 +03:00
|
|
|
"net/http"
|
2025-06-19 07:16:34 +03:00
|
|
|
"net/url"
|
2025-06-23 08:11:14 +03:00
|
|
|
"regexp"
|
|
|
|
"strconv"
|
2025-06-19 07:16:34 +03:00
|
|
|
"strings"
|
2025-06-20 19:04:57 +03:00
|
|
|
"time"
|
2025-06-19 07:16:34 +03:00
|
|
|
|
|
|
|
"github.com/gin-gonic/gin"
|
|
|
|
)
|
|
|
|
|
2025-06-23 08:11:14 +03:00
|
|
|
type HostColection struct {
|
|
|
|
Host string `json:"host"`
|
|
|
|
}
|
|
|
|
type HostsResponse struct {
|
|
|
|
Total int `json:"total"`
|
|
|
|
Data []HostColection `json:"data"`
|
|
|
|
}
|
|
|
|
|
2025-06-23 08:37:39 +03:00
|
|
|
type InstancesResponse struct {
|
|
|
|
Total int `json:"total"`
|
|
|
|
Data []json.RawMessage `json:"data"`
|
|
|
|
}
|
|
|
|
|
2025-06-19 07:16:34 +03:00
|
|
|
func main() {
|
|
|
|
var command string
|
2025-06-20 19:04:57 +03:00
|
|
|
var url string
|
|
|
|
var instanceUrl string
|
2025-06-23 08:11:14 +03:00
|
|
|
var host string
|
|
|
|
var reason string
|
2025-06-23 12:50:46 +03:00
|
|
|
var filter string
|
2025-06-23 14:06:56 +03:00
|
|
|
var lastId string
|
2025-06-19 07:16:34 +03:00
|
|
|
|
2025-06-23 12:50:46 +03:00
|
|
|
flag.StringVar(&command, "command", "", "Command to execute: index, reject, collect, serve, audit")
|
2025-06-20 19:04:57 +03:00
|
|
|
flag.StringVar(&url, "url", "", "Url to index hosts")
|
|
|
|
flag.StringVar(&instanceUrl, "instance-url", "", "Url to fetch instance information")
|
2025-06-23 08:11:14 +03:00
|
|
|
flag.StringVar(&host, "host", "", "Host to reject")
|
|
|
|
flag.StringVar(&reason, "reject-reason", "", "Reject reason (optional)")
|
2025-06-23 12:50:46 +03:00
|
|
|
flag.StringVar(&filter, "filter", "", "filter for audit")
|
2025-06-23 14:06:56 +03:00
|
|
|
flag.StringVar(&lastId, "last-id", "", "last id")
|
2025-06-19 07:16:34 +03:00
|
|
|
|
|
|
|
flag.Parse()
|
|
|
|
fmt.Println(command, host)
|
2025-06-23 14:06:56 +03:00
|
|
|
|
|
|
|
start, err := strconv.Atoi(lastId)
|
|
|
|
if err != nil {
|
|
|
|
start = 0
|
|
|
|
}
|
2025-06-19 07:16:34 +03:00
|
|
|
|
|
|
|
switch command {
|
|
|
|
case "index":
|
2025-06-20 19:04:57 +03:00
|
|
|
index(url, instanceUrl)
|
2025-06-19 07:16:34 +03:00
|
|
|
case "reject":
|
2025-06-23 08:11:14 +03:00
|
|
|
reject(host, reason)
|
2025-06-19 07:16:34 +03:00
|
|
|
case "collect":
|
|
|
|
collect()
|
2025-06-23 12:50:46 +03:00
|
|
|
case "audit":
|
2025-06-23 14:06:56 +03:00
|
|
|
audit(filter, start)
|
2025-06-23 08:11:14 +03:00
|
|
|
case "serve":
|
2025-06-19 07:16:34 +03:00
|
|
|
default:
|
|
|
|
serve()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-06-20 19:04:57 +03:00
|
|
|
func index(url string, instanceUrl string) {
|
2025-06-19 07:16:34 +03:00
|
|
|
db := connectDB()
|
|
|
|
defer db.Close()
|
|
|
|
|
|
|
|
exists, err := indexExists(db, url)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if !exists {
|
2025-06-20 19:04:57 +03:00
|
|
|
indexHost := IndexHost{
|
|
|
|
Url: url,
|
|
|
|
InstanceUrl: instanceUrl,
|
|
|
|
}
|
|
|
|
addIndex(db, indexHost)
|
2025-06-19 07:16:34 +03:00
|
|
|
fmt.Println(url, "added to index")
|
|
|
|
} else {
|
|
|
|
fmt.Println(url, "already added")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-06-23 08:11:14 +03:00
|
|
|
func reject(host string, reason string) {
|
2025-06-19 07:16:34 +03:00
|
|
|
host = formatHost(host)
|
|
|
|
|
|
|
|
db := connectDB()
|
|
|
|
defer db.Close()
|
|
|
|
|
2025-06-23 08:11:14 +03:00
|
|
|
rejectHost(db, host, reason)
|
2025-06-19 07:16:34 +03:00
|
|
|
fmt.Println(host, "rejected")
|
|
|
|
}
|
|
|
|
|
|
|
|
func collect() {
|
2025-06-20 06:31:18 +03:00
|
|
|
db := connectDB()
|
|
|
|
defer db.Close()
|
|
|
|
|
2025-06-20 19:04:57 +03:00
|
|
|
indexHosts := getIndexHosts(db)
|
|
|
|
for _, indexHost := range indexHosts {
|
|
|
|
fmt.Println(indexHost.Url)
|
|
|
|
fmt.Println("==========================================")
|
2025-06-20 06:31:18 +03:00
|
|
|
|
|
|
|
start := 0
|
|
|
|
count := 20
|
|
|
|
for {
|
2025-06-20 19:04:57 +03:00
|
|
|
hosts, err := getNewHosts(indexHost.Url, indexHost.LastFetchedAt, start, count)
|
2025-06-20 06:31:18 +03:00
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(hosts) == 0 {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
2025-06-20 19:04:57 +03:00
|
|
|
fmt.Println("New hosts:", len(hosts), hosts)
|
2025-06-20 06:31:18 +03:00
|
|
|
for _, host := range hosts {
|
2025-06-20 19:04:57 +03:00
|
|
|
fmt.Println(host)
|
|
|
|
|
|
|
|
time.Sleep(1 * time.Second)
|
|
|
|
instance := fetchInstance(indexHost.InstanceUrl, host)
|
2025-06-20 06:31:18 +03:00
|
|
|
addInstance(db, instance)
|
|
|
|
}
|
|
|
|
|
|
|
|
start += count
|
|
|
|
}
|
|
|
|
|
2025-06-20 19:04:57 +03:00
|
|
|
updateLastFetched(db, indexHost)
|
2025-06-20 06:31:18 +03:00
|
|
|
}
|
2025-06-19 07:16:34 +03:00
|
|
|
}
|
|
|
|
|
2025-06-23 14:06:56 +03:00
|
|
|
func audit(filter string, start int) {
|
2025-06-23 12:50:46 +03:00
|
|
|
db := connectDB()
|
|
|
|
defer db.Close()
|
|
|
|
|
2025-06-23 13:45:56 +03:00
|
|
|
if filter == "duplicates" {
|
|
|
|
removeDuplicates()
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2025-06-23 14:06:56 +03:00
|
|
|
hosts, err := getHosts(db, start, 10000, "", "", "url")
|
2025-06-23 12:50:46 +03:00
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, url := range hosts {
|
|
|
|
filtered := doAudit(url, filter)
|
|
|
|
if filtered {
|
|
|
|
reject(url, filter)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-06-19 07:16:34 +03:00
|
|
|
func serve() {
|
2025-06-23 08:11:14 +03:00
|
|
|
db := connectDB()
|
|
|
|
defer db.Close()
|
|
|
|
|
2025-06-19 07:16:34 +03:00
|
|
|
r := gin.Default()
|
2025-06-23 09:15:58 +03:00
|
|
|
r.LoadHTMLGlob("templates/*")
|
2025-06-19 07:16:34 +03:00
|
|
|
|
2025-06-20 19:04:57 +03:00
|
|
|
r.GET("/", func(c *gin.Context) {
|
2025-06-23 09:15:58 +03:00
|
|
|
filteredInstancesCount, err := getHostsTotal(db, "", "")
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
rejectedInstancesCount, err := getRejectedHostsTotal(db)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
c.HTML(http.StatusOK, "index.tmpl", gin.H{
|
|
|
|
"FilteredInstancesCount": filteredInstancesCount,
|
|
|
|
"RejectedInstancesCount": rejectedInstancesCount,
|
|
|
|
})
|
2025-06-20 19:04:57 +03:00
|
|
|
})
|
|
|
|
|
2025-06-19 07:16:34 +03:00
|
|
|
r.GET("/instances", func(c *gin.Context) {
|
2025-06-23 08:37:39 +03:00
|
|
|
start := c.DefaultQuery("start", "0")
|
|
|
|
count := c.DefaultQuery("count", "20")
|
|
|
|
since := c.DefaultQuery("since", "")
|
|
|
|
search := c.DefaultQuery("search", "")
|
|
|
|
|
|
|
|
if since != "" {
|
|
|
|
regex := regexp.MustCompile(`^\d{4}-\d{2}-\d{2}$`)
|
|
|
|
if !regex.MatchString(since) {
|
|
|
|
generateErrorResponse(c, "since must be in YYYY-MM-DD format")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
starti, err := strconv.Atoi(start)
|
|
|
|
if err != nil {
|
|
|
|
generateErrorResponse(c, "start should be an integer")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
counti, err := strconv.Atoi(count)
|
|
|
|
if err != nil {
|
|
|
|
generateErrorResponse(c, "count should be an integer")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
hosts, err := getHosts(db, starti, counti, since, search, "data")
|
|
|
|
if err != nil {
|
|
|
|
generateErrorResponse(c, "error getting hosts")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
total, err := getHostsTotal(db, since, search)
|
|
|
|
if err != nil {
|
|
|
|
generateErrorResponse(c, "error getting hosts")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
response := InstancesResponse{
|
|
|
|
Total: total,
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, data := range hosts {
|
|
|
|
response.Data = append(response.Data, []byte(data))
|
|
|
|
}
|
|
|
|
|
|
|
|
c.JSON(http.StatusOK, response)
|
2025-06-19 07:16:34 +03:00
|
|
|
})
|
|
|
|
|
|
|
|
r.GET("/instances/hosts", func(c *gin.Context) {
|
2025-06-23 08:11:14 +03:00
|
|
|
start := c.DefaultQuery("start", "0")
|
|
|
|
count := c.DefaultQuery("count", "20")
|
|
|
|
since := c.DefaultQuery("since", "")
|
|
|
|
|
|
|
|
if since != "" {
|
|
|
|
regex := regexp.MustCompile(`^\d{4}-\d{2}-\d{2}$`)
|
|
|
|
if !regex.MatchString(since) {
|
|
|
|
generateErrorResponse(c, "since must be in YYYY-MM-DD format")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
starti, err := strconv.Atoi(start)
|
|
|
|
if err != nil {
|
|
|
|
generateErrorResponse(c, "start should be an integer")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
counti, err := strconv.Atoi(count)
|
|
|
|
if err != nil {
|
|
|
|
generateErrorResponse(c, "count should be an integer")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2025-06-23 08:37:39 +03:00
|
|
|
hosts, err := getHosts(db, starti, counti, since, "", "url")
|
2025-06-23 08:11:14 +03:00
|
|
|
if err != nil {
|
|
|
|
generateErrorResponse(c, "error getting hosts")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2025-06-23 08:37:39 +03:00
|
|
|
total, err := getHostsTotal(db, since, "")
|
2025-06-23 08:11:14 +03:00
|
|
|
if err != nil {
|
|
|
|
generateErrorResponse(c, "error getting hosts")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
response := HostsResponse{
|
|
|
|
Total: total,
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, host := range hosts {
|
|
|
|
response.Data = append(response.Data, HostColection{
|
|
|
|
Host: host,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
c.JSON(http.StatusOK, response)
|
2025-06-19 07:16:34 +03:00
|
|
|
})
|
|
|
|
|
2025-06-20 19:04:57 +03:00
|
|
|
r.Run(":8081")
|
2025-06-19 07:16:34 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
func formatHost(host string) string {
|
|
|
|
host = strings.Trim(host, " ")
|
|
|
|
|
|
|
|
u, _ := url.Parse(host)
|
|
|
|
|
|
|
|
if u.Host == "" {
|
|
|
|
return host
|
|
|
|
}
|
|
|
|
|
|
|
|
return u.Host
|
|
|
|
}
|
2025-06-23 08:11:14 +03:00
|
|
|
|
|
|
|
func generateErrorResponse(c *gin.Context, err string) {
|
|
|
|
c.JSON(http.StatusBadRequest, gin.H{"error": err})
|
|
|
|
}
|