audit: remove duplicates

This commit is contained in:
Arnas Udovic 2025-06-23 13:45:56 +03:00
parent 1df66dfd7b
commit eacb662498
4 changed files with 53 additions and 2 deletions

View file

@ -11,7 +11,7 @@ App can be called directly and do management in console.
- `peertube-instance-index-filter -command index -url https://instances.joinpeertube.org/api/v1/instances/hosts -instance-url https://instances.joinpeertube.org/api/v1/instances` - add url to index of hosts. Later it will be used to collect instances
- `peertube-instance-index-filter -command collect` - collect instances from index of hosts urls
- `peertube-instance-index-filter -command reject -host www.example.com` - reject instance from index to exclude it from global index
- `peertube-instance-index-filter -command audit -filter dead` - go through not rejected instances and check by filters: dead
- `peertube-instance-index-filter -command audit -filter dead` - go through not rejected instances and check by filters: dead, duplicates
## Links

View file

@ -16,7 +16,10 @@
package main
import "net/http"
import (
"fmt"
"net/http"
)
func doAudit(host string, filter string) bool {
switch filter {
@ -29,3 +32,25 @@ func doAudit(host string, filter string) bool {
return false
}
func removeDuplicates() {
db := connectDB()
defer db.Close()
for {
ids, err := getDuplicateIds(db)
if err != nil {
panic(err)
}
if len(ids) == 0 {
break
}
fmt.Println("Removing duplicates:", len(ids), ids)
for _, id := range ids {
removeInstance(db, id)
}
}
}

21
db.go
View file

@ -210,3 +210,24 @@ func getRejectedHostsTotal(db *sql.DB) (int, error) {
return totali, nil
}
func getDuplicateIds(db *sql.DB) ([]string, error) {
ids := []string{}
query := "SELECT id FROM instances GROUP BY url HAVING COUNT(*) > 1"
rows, err := db.Query(query)
if err != nil {
return ids, err
}
for rows.Next() {
var id string
rows.Scan(&id)
ids = append(ids, id)
}
return ids, nil
}
func removeInstance(db *sql.DB, id string) {
db.Exec("DELETE FROM instances WHERE id = ?", id)
}

View file

@ -148,6 +148,11 @@ func audit(filter string) {
db := connectDB()
defer db.Close()
if filter == "duplicates" {
removeDuplicates()
return
}
hosts, err := getHosts(db, 0, 10000, "", "", "url")
if err != nil {
panic(err)