diff --git a/README.md b/README.md index 2b1f499..7f9eee1 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ App can be called directly and do management in console. - `peertube-instance-index-filter -command index -url https://instances.joinpeertube.org/api/v1/instances/hosts -instance-url https://instances.joinpeertube.org/api/v1/instances` - add url to index of hosts. Later it will be used to collect instances - `peertube-instance-index-filter -command collect` - collect instances from index of hosts urls - `peertube-instance-index-filter -command reject -host www.example.com` - reject instance from index to exclude it from global index -- `peertube-instance-index-filter -command audit -filter dead` - go through not rejected instances and check by filters: dead +- `peertube-instance-index-filter -command audit -filter dead` - go through not rejected instances and check by filters: dead, duplicates ## Links diff --git a/audit.go b/audit.go index 6fee428..7a68a39 100644 --- a/audit.go +++ b/audit.go @@ -16,7 +16,10 @@ package main -import "net/http" +import ( + "fmt" + "net/http" +) func doAudit(host string, filter string) bool { switch filter { @@ -29,3 +32,25 @@ func doAudit(host string, filter string) bool { return false } + +func removeDuplicates() { + db := connectDB() + defer db.Close() + + for { + ids, err := getDuplicateIds(db) + if err != nil { + panic(err) + } + + if len(ids) == 0 { + break + } + + fmt.Println("Removing duplicates:", len(ids), ids) + + for _, id := range ids { + removeInstance(db, id) + } + } +} diff --git a/db.go b/db.go index 33ca177..e897088 100644 --- a/db.go +++ b/db.go @@ -210,3 +210,24 @@ func getRejectedHostsTotal(db *sql.DB) (int, error) { return totali, nil } + +func getDuplicateIds(db *sql.DB) ([]string, error) { + ids := []string{} + query := "SELECT id FROM instances GROUP BY url HAVING COUNT(*) > 1" + rows, err := db.Query(query) + if err != nil { + return ids, err + } + + for rows.Next() { + var id string + rows.Scan(&id) + ids = append(ids, id) + } + + return ids, nil +} + +func removeInstance(db *sql.DB, id string) { + db.Exec("DELETE FROM instances WHERE id = ?", id) +} diff --git a/main.go b/main.go index 772f43f..01911ff 100644 --- a/main.go +++ b/main.go @@ -148,6 +148,11 @@ func audit(filter string) { db := connectDB() defer db.Close() + if filter == "duplicates" { + removeDuplicates() + return + } + hosts, err := getHosts(db, 0, 10000, "", "", "url") if err != nil { panic(err)