added queue, changed fav api endpoint for e621, fixed db query and added a check if fav already exists
This commit is contained in:
parent
e838b7b724
commit
b49fdea71c
24
docker-compose.dgraph.yml
Normal file
24
docker-compose.dgraph.yml
Normal file
@ -0,0 +1,24 @@
|
||||
version: "3.2"
|
||||
services:
|
||||
zero:
|
||||
image: dgraph/dgraph:latest
|
||||
volumes:
|
||||
- /tmp/data:/dgraph
|
||||
ports:
|
||||
- 5080:5080
|
||||
- 6080:6080
|
||||
restart: on-failure
|
||||
command: dgraph zero --my=zero:5080
|
||||
alpha:
|
||||
image: dgraph/dgraph:latest
|
||||
volumes:
|
||||
- /tmp/data:/dgraph
|
||||
ports:
|
||||
- 8080:8080
|
||||
- 9080:9080
|
||||
restart: on-failure
|
||||
command: dgraph alpha --my=alpha:7080 --zero=zero:5080 --security whitelist=<IP_ADDRESS>
|
||||
ratel:
|
||||
image: dgraph/ratel:latest
|
||||
ports:
|
||||
- 8000:8000
|
@ -10,19 +10,20 @@ import (
|
||||
)
|
||||
|
||||
// GetFavorites retrieves all favorites from the e621 API.
|
||||
func (c *Client) GetFavorites(user string) ([]models.Post, error) {
|
||||
time.Sleep(2 * time.Second)
|
||||
func (c *Client) GetFavorites(user models.E621User) ([]models.Post, error) {
|
||||
time.Sleep(1 * time.Second)
|
||||
var lastPostID int64
|
||||
var allFavorites []models.Post
|
||||
var url string
|
||||
|
||||
for {
|
||||
url := fmt.Sprintf("%s/posts.json?tags=fav:%s+status:any&page=b%d", baseURL, user, lastPostID)
|
||||
url = fmt.Sprintf("%s/favorites.json?user_id=%d&page=%d", baseURL, user.ID, lastPostID)
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", "FavGetter (by Selloo)")
|
||||
req.Header.Set("User-Agent", "e621 to GraphDB (by Selloo)")
|
||||
req.Header.Add("Accept", "application/json")
|
||||
req.SetBasicAuth(c.username, c.apiKey)
|
||||
|
||||
@ -30,7 +31,6 @@ func (c *Client) GetFavorites(user string) ([]models.Post, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
@ -54,8 +54,7 @@ func (c *Client) GetFavorites(user string) ([]models.Post, error) {
|
||||
}
|
||||
|
||||
// Update the last post ID for the next page request
|
||||
lastPostID = fetchedFavorites.Posts[len(fetchedFavorites.Posts)-1].ID
|
||||
lastPostID = lastPostID + 1
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -63,9 +63,9 @@ func UserToPostRelationship(ctx context.Context, driver neo4j.DriverWithContext,
|
||||
}
|
||||
|
||||
// CheckUserToPostRelationship gives back a bool if the connection between the post and the user exists
|
||||
func CheckUserToPostRelationship(ctx context.Context, driver neo4j.DriverWithContext, e621PostID int64, e621UserID int64) (error, bool) {
|
||||
func CheckUserToPostRelationship(ctx context.Context, driver neo4j.DriverWithContext, e621PostID int64, e621UserID int64) (bool, error) {
|
||||
query := `
|
||||
MATCH (user:e621User {e621ID: $e621PostID})-[favorite:IS_FAVORITE]->(post:e621Post {e621PostID: $e621ID})
|
||||
MATCH (user:e621User {e621ID: $e621ID})-[favorite:IS_FAVORITE]->(post:e621Post {e621PostID: $e621PostID})
|
||||
RETURN COUNT(favorite) > 0 AS isFavorite
|
||||
`
|
||||
params := map[string]interface{}{
|
||||
@ -75,12 +75,12 @@ func CheckUserToPostRelationship(ctx context.Context, driver neo4j.DriverWithCon
|
||||
|
||||
result, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer)
|
||||
if err != nil {
|
||||
return err, false
|
||||
return false, err
|
||||
}
|
||||
|
||||
exists, _, err := neo4j.GetRecordValue[bool](result.Records[0], "isFavorite")
|
||||
if err != nil {
|
||||
return err, false
|
||||
return false, err
|
||||
}
|
||||
return nil, exists
|
||||
return exists, nil
|
||||
}
|
||||
|
@ -33,7 +33,7 @@ func ScrapeUser(ctx context.Context, driver neo4j.DriverWithContext, e621Client
|
||||
|
||||
log.Printf("Getting favorites for user %s with id %d", e621User.Name, e621User.ID)
|
||||
start := time.Now()
|
||||
userFavorites, err := e621Client.GetFavorites(e621User.Name)
|
||||
userFavorites, err := e621Client.GetFavorites(e621User)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
@ -43,6 +43,15 @@ func ScrapeUser(ctx context.Context, driver neo4j.DriverWithContext, e621Client
|
||||
|
||||
// Uploads all Tags, Posts as Nodes to Neo4j
|
||||
for i, post := range userFavorites {
|
||||
|
||||
if exists, err := neo4jAPI.CheckUserToPostRelationship(ctx, driver, post.ID, e621User.ID); err == nil && exists {
|
||||
log.Printf("No new posts found for user %s with id %d", e621User.Name, e621User.ID)
|
||||
log.Printf("Last Post ID Found: %d", post.ID)
|
||||
break
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
start = time.Now()
|
||||
err = uploadNodes(ctx, driver, post)
|
||||
if err != nil {
|
||||
@ -50,10 +59,6 @@ func ScrapeUser(ctx context.Context, driver neo4j.DriverWithContext, e621Client
|
||||
}
|
||||
log.Printf("Uploading post for user %s with id %d, %d of %d with ID: %d took: %v", e621User.Name, e621User.ID, i, len(userFavorites), post.ID, time.Since(start))
|
||||
|
||||
}
|
||||
|
||||
// Makes relationships between different nodes
|
||||
for i, post := range userFavorites {
|
||||
start := time.Now()
|
||||
err = uploadPostToUserRelationship(ctx, driver, post, e621User)
|
||||
if err != nil {
|
||||
@ -91,15 +96,15 @@ func ScrapeUser(ctx context.Context, driver neo4j.DriverWithContext, e621Client
|
||||
return err
|
||||
}
|
||||
log.Printf("Making relationship for user %s with id %d, %d for Post: %d with ID: %d took: %v", e621User.Name, e621User.ID, i, len(userFavorites), post.ID, time.Since(start))
|
||||
|
||||
}
|
||||
|
||||
log.Printf("Uploading all posts for user %s took: %v", username, time.Since(startUploadPosts))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// uploadNodes uploads the post to the database and creates the nodes
|
||||
func uploadNodes(ctx context.Context, driver neo4j.DriverWithContext, post models.Post) error {
|
||||
|
||||
uniqueGeneralTags := make([]string, 0)
|
||||
uniqueCharacterTags := make([]string, 0)
|
||||
uniqueCopyrightTags := make([]string, 0)
|
||||
@ -120,7 +125,6 @@ func uploadNodes(ctx context.Context, driver neo4j.DriverWithContext, post model
|
||||
uniqueCopyrightTags = utils.UniqueNonEmptyElementsOf(allCopyrightTags)
|
||||
uniqueArtistTags = utils.UniqueNonEmptyElementsOf(allArtistTags)
|
||||
|
||||
// Uploads post to database
|
||||
err := neo4jAPI.CreatePostNode(ctx, driver, post.ID)
|
||||
if err != nil {
|
||||
return err
|
||||
@ -160,6 +164,7 @@ func uploadNodes(ctx context.Context, driver neo4j.DriverWithContext, post model
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -239,3 +244,4 @@ func uploadArtistTagRelationship(ctx context.Context, driver neo4j.DriverWithCon
|
||||
}
|
||||
|
||||
//11min für Selloo, simultan mit mutt_jake
|
||||
//1h58m53 für mutt_jake, mit Selloo
|
||||
|
33
utils/queue.go
Normal file
33
utils/queue.go
Normal file
@ -0,0 +1,33 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"errors"
|
||||
)
|
||||
|
||||
type Task struct {
|
||||
URL string `json:"url,omitempty" :"url"`
|
||||
Methode string `json:"method,omitempty" :"method"`
|
||||
Channel chan any `:"channel"`
|
||||
}
|
||||
|
||||
type Queue struct {
|
||||
elements []Task
|
||||
}
|
||||
|
||||
func (queue *Queue) Pop() (Task, error) {
|
||||
if len(queue.elements) == 0 {
|
||||
return Task{}, errors.New("try to remove an element of a empty queue")
|
||||
}
|
||||
task := queue.elements[0]
|
||||
queue.elements = queue.elements[1:]
|
||||
return task, nil
|
||||
}
|
||||
|
||||
func (queue *Queue) Push(task Task) error {
|
||||
empty := Task{}
|
||||
if task == empty {
|
||||
return errors.New("try to add task but task is empty")
|
||||
}
|
||||
queue.elements = append(queue.elements, task)
|
||||
return nil
|
||||
}
|
97
utils/queue_test.go
Normal file
97
utils/queue_test.go
Normal file
@ -0,0 +1,97 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestQueue_Pop(t *testing.T) {
|
||||
type fields struct {
|
||||
elements []Task
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
fields fields
|
||||
want Task
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "Pop element of empty list",
|
||||
fields: fields{},
|
||||
want: Task{},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "Pop element of a filled list with three elements",
|
||||
fields: fields{elements: []Task{
|
||||
{URL: "https://e621.net0....", Methode: "GET", Channel: nil},
|
||||
{URL: "https://e621.net1....", Methode: "GET", Channel: nil},
|
||||
{URL: "https://e621.net2....", Methode: "GET", Channel: nil},
|
||||
}},
|
||||
want: Task{
|
||||
URL: "https://e621.net0....",
|
||||
Methode: "GET",
|
||||
Channel: nil,
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
queue := &Queue{
|
||||
elements: tt.fields.elements,
|
||||
}
|
||||
got, err := queue.Pop()
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Errorf("Pop() error = %v, wantErr %v", err, tt.wantErr)
|
||||
return
|
||||
}
|
||||
if !reflect.DeepEqual(got, tt.want) {
|
||||
t.Errorf("Pop() got = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestQueue_Push(t *testing.T) {
|
||||
t.Run("Push tasks to empty queue", func(t *testing.T) {
|
||||
queue := Queue{elements: []Task{}}
|
||||
task := Task{
|
||||
URL: "http://e621.net0....",
|
||||
Methode: "GET",
|
||||
Channel: nil,
|
||||
}
|
||||
err := queue.Push(task)
|
||||
if err != nil {
|
||||
t.Errorf("Push() error = %v", err)
|
||||
}
|
||||
if len(queue.elements) != 1 {
|
||||
t.Errorf("Push() error = queue is not one")
|
||||
}
|
||||
if queue.elements[0] != task {
|
||||
t.Errorf("Push() error = wrong queue task in queue")
|
||||
}
|
||||
})
|
||||
t.Run("Push tasks to filled queue", func(t *testing.T) {
|
||||
queue := Queue{elements: []Task{{
|
||||
URL: "http://e621.net0....",
|
||||
Methode: "GET",
|
||||
Channel: nil,
|
||||
}}}
|
||||
task := Task{
|
||||
URL: "http://e621.net1....",
|
||||
Methode: "GET",
|
||||
Channel: nil,
|
||||
}
|
||||
err := queue.Push(task)
|
||||
if err != nil {
|
||||
t.Errorf("Push() error = %v", err)
|
||||
}
|
||||
if len(queue.elements) != 2 {
|
||||
t.Errorf("Push() error = queue is not two")
|
||||
}
|
||||
if queue.elements[1] != task {
|
||||
t.Errorf("Push() error = wrong queue task in queue")
|
||||
}
|
||||
})
|
||||
}
|
Reference in New Issue
Block a user