peertube-instance-index-filter/collector.go

144 lines
3.4 KiB
Go
Raw Permalink Normal View History

2025-06-20 06:31:18 +03:00
// peertube-instance-index-filter
// Copyright (C) 2025 Arns Udovič <zordsdavini@arns.lt>
2025-06-20 19:04:57 +03:00
//
2025-06-20 06:31:18 +03:00
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
2025-06-20 19:04:57 +03:00
//
2025-06-20 06:31:18 +03:00
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
2025-06-20 19:04:57 +03:00
//
2025-06-20 06:31:18 +03:00
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package main
2025-06-20 19:04:57 +03:00
import (
"encoding/json"
"fmt"
"net/http"
"net/url"
"regexp"
"time"
)
2025-06-20 06:31:18 +03:00
type Instance struct {
Url string
Data string
CreatedAt string
2025-06-20 19:04:57 +03:00
Rejected bool
RejectReason string
}
type HostRequest struct {
Total int `json:"total"`
Hosts []Host `json:"data"`
}
type InstanceRequest struct {
Total int `json:"total"`
Data []json.RawMessage `json:"data"`
}
type Host struct {
Host string `json:"host"`
2025-06-20 06:31:18 +03:00
}
2025-06-20 19:04:57 +03:00
func getNewHosts(indexUrl string, lastFetched string, start int, count int) ([]string, error) {
hosts := []string{}
reqURL, err := url.Parse(indexUrl)
if err != nil {
return nil, fmt.Errorf("error parsing URL: %w", err)
}
fmt.Println(lastFetched)
query := reqURL.Query()
query.Set("start", fmt.Sprintf("%d", start))
query.Set("count", fmt.Sprintf("%d", count))
query.Set("since", lastFetched)
query.Set("sort", "createdAt")
reqURL.RawQuery = query.Encode()
resp, err := http.Get(reqURL.String())
if err != nil {
return nil, fmt.Errorf("error making request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
fmt.Println(resp)
return nil, fmt.Errorf("HTTP error: %s", resp.Status)
}
var hostsRequest HostRequest
if err := json.NewDecoder(resp.Body).Decode(&hostsRequest); err != nil {
return nil, fmt.Errorf("error decoding JSON: %w", err)
}
for _, host := range hostsRequest.Hosts {
hosts = append(hosts, host.Host)
}
return hosts, nil
2025-06-20 06:31:18 +03:00
}
2025-06-20 19:04:57 +03:00
func fetchInstance(instanceUrl string, host string) Instance {
instance := Instance{
Url: host,
CreatedAt: time.Now().Format("2006-01-02T15:04:05Z"),
Rejected: false,
RejectReason: "",
}
if instanceUrl == "" {
return instance
}
reqURL, err := url.Parse(instanceUrl)
if err != nil {
panic(err)
}
query := reqURL.Query()
query.Set("search", host)
reqURL.RawQuery = query.Encode()
resp, err := http.Get(reqURL.String())
if err != nil {
panic(err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
panic("not OK")
}
var instanceRequest InstanceRequest
if err := json.NewDecoder(resp.Body).Decode(&instanceRequest); err != nil {
panic(err)
}
if len(instanceRequest.Data) == 0 {
return instance
}
instance.Data = string(instanceRequest.Data[0])
re := regexp.MustCompile(`"createdAt":"[\d\-T:\.Z]+"`)
results := re.FindAll(instanceRequest.Data[0], -1)
if len(results) > 0 {
re := regexp.MustCompile(`\d{4}[\d\-T:\.Z]+`)
result := re.Find(results[0])
instance.CreatedAt = string(result)
fmt.Println(instance.CreatedAt)
}
return instance
2025-06-20 06:31:18 +03:00
}