added queue, changed fav api endpoint for e621, fixed db query and added a check if fav already exists

This commit is contained in:
David Janowski 2023-06-17 19:13:25 +02:00
parent e838b7b724
commit b49fdea71c
7 changed files with 181 additions and 20 deletions

2
README.MD Normal file
View File

@ -0,0 +1,2 @@
## TODO:
-[ ] Que & Channels

24
docker-compose.dgraph.yml Normal file
View File

@ -0,0 +1,24 @@
version: "3.2"
services:
zero:
image: dgraph/dgraph:latest
volumes:
- /tmp/data:/dgraph
ports:
- 5080:5080
- 6080:6080
restart: on-failure
command: dgraph zero --my=zero:5080
alpha:
image: dgraph/dgraph:latest
volumes:
- /tmp/data:/dgraph
ports:
- 8080:8080
- 9080:9080
restart: on-failure
command: dgraph alpha --my=alpha:7080 --zero=zero:5080 --security whitelist=<IP_ADDRESS>
ratel:
image: dgraph/ratel:latest
ports:
- 8000:8000

View File

@ -10,19 +10,20 @@ import (
) )
// GetFavorites retrieves all favorites from the e621 API. // GetFavorites retrieves all favorites from the e621 API.
func (c *Client) GetFavorites(user string) ([]models.Post, error) { func (c *Client) GetFavorites(user models.E621User) ([]models.Post, error) {
time.Sleep(2 * time.Second) time.Sleep(1 * time.Second)
var lastPostID int64 var lastPostID int64
var allFavorites []models.Post var allFavorites []models.Post
var url string
for { for {
url := fmt.Sprintf("%s/posts.json?tags=fav:%s+status:any&page=b%d", baseURL, user, lastPostID) url = fmt.Sprintf("%s/favorites.json?user_id=%d&page=%d", baseURL, user.ID, lastPostID)
req, err := http.NewRequest("GET", url, nil) req, err := http.NewRequest("GET", url, nil)
if err != nil { if err != nil {
return nil, err return nil, err
} }
req.Header.Set("User-Agent", "FavGetter (by Selloo)") req.Header.Set("User-Agent", "e621 to GraphDB (by Selloo)")
req.Header.Add("Accept", "application/json") req.Header.Add("Accept", "application/json")
req.SetBasicAuth(c.username, c.apiKey) req.SetBasicAuth(c.username, c.apiKey)
@ -30,7 +31,6 @@ func (c *Client) GetFavorites(user string) ([]models.Post, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body) body, err := io.ReadAll(resp.Body)
if err != nil { if err != nil {
@ -54,8 +54,7 @@ func (c *Client) GetFavorites(user string) ([]models.Post, error) {
} }
// Update the last post ID for the next page request // Update the last post ID for the next page request
lastPostID = fetchedFavorites.Posts[len(fetchedFavorites.Posts)-1].ID lastPostID = lastPostID + 1
} }
} }

View File

@ -63,9 +63,9 @@ func UserToPostRelationship(ctx context.Context, driver neo4j.DriverWithContext,
} }
// CheckUserToPostRelationship gives back a bool if the connection between the post and the user exists // CheckUserToPostRelationship gives back a bool if the connection between the post and the user exists
func CheckUserToPostRelationship(ctx context.Context, driver neo4j.DriverWithContext, e621PostID int64, e621UserID int64) (error, bool) { func CheckUserToPostRelationship(ctx context.Context, driver neo4j.DriverWithContext, e621PostID int64, e621UserID int64) (bool, error) {
query := ` query := `
MATCH (user:e621User {e621ID: $e621PostID})-[favorite:IS_FAVORITE]->(post:e621Post {e621PostID: $e621ID}) MATCH (user:e621User {e621ID: $e621ID})-[favorite:IS_FAVORITE]->(post:e621Post {e621PostID: $e621PostID})
RETURN COUNT(favorite) > 0 AS isFavorite RETURN COUNT(favorite) > 0 AS isFavorite
` `
params := map[string]interface{}{ params := map[string]interface{}{
@ -75,12 +75,12 @@ func CheckUserToPostRelationship(ctx context.Context, driver neo4j.DriverWithCon
result, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer) result, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer)
if err != nil { if err != nil {
return err, false return false, err
} }
exists, _, err := neo4j.GetRecordValue[bool](result.Records[0], "isFavorite") exists, _, err := neo4j.GetRecordValue[bool](result.Records[0], "isFavorite")
if err != nil { if err != nil {
return err, false return false, err
} }
return nil, exists return exists, nil
} }

View File

@ -33,7 +33,7 @@ func ScrapeUser(ctx context.Context, driver neo4j.DriverWithContext, e621Client
log.Printf("Getting favorites for user %s with id %d", e621User.Name, e621User.ID) log.Printf("Getting favorites for user %s with id %d", e621User.Name, e621User.ID)
start := time.Now() start := time.Now()
userFavorites, err := e621Client.GetFavorites(e621User.Name) userFavorites, err := e621Client.GetFavorites(e621User)
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }
@ -43,6 +43,15 @@ func ScrapeUser(ctx context.Context, driver neo4j.DriverWithContext, e621Client
// Uploads all Tags, Posts as Nodes to Neo4j // Uploads all Tags, Posts as Nodes to Neo4j
for i, post := range userFavorites { for i, post := range userFavorites {
if exists, err := neo4jAPI.CheckUserToPostRelationship(ctx, driver, post.ID, e621User.ID); err == nil && exists {
log.Printf("No new posts found for user %s with id %d", e621User.Name, e621User.ID)
log.Printf("Last Post ID Found: %d", post.ID)
break
} else if err != nil {
return err
}
start = time.Now() start = time.Now()
err = uploadNodes(ctx, driver, post) err = uploadNodes(ctx, driver, post)
if err != nil { if err != nil {
@ -50,10 +59,6 @@ func ScrapeUser(ctx context.Context, driver neo4j.DriverWithContext, e621Client
} }
log.Printf("Uploading post for user %s with id %d, %d of %d with ID: %d took: %v", e621User.Name, e621User.ID, i, len(userFavorites), post.ID, time.Since(start)) log.Printf("Uploading post for user %s with id %d, %d of %d with ID: %d took: %v", e621User.Name, e621User.ID, i, len(userFavorites), post.ID, time.Since(start))
}
// Makes relationships between different nodes
for i, post := range userFavorites {
start := time.Now() start := time.Now()
err = uploadPostToUserRelationship(ctx, driver, post, e621User) err = uploadPostToUserRelationship(ctx, driver, post, e621User)
if err != nil { if err != nil {
@ -91,15 +96,15 @@ func ScrapeUser(ctx context.Context, driver neo4j.DriverWithContext, e621Client
return err return err
} }
log.Printf("Making relationship for user %s with id %d, %d for Post: %d with ID: %d took: %v", e621User.Name, e621User.ID, i, len(userFavorites), post.ID, time.Since(start)) log.Printf("Making relationship for user %s with id %d, %d for Post: %d with ID: %d took: %v", e621User.Name, e621User.ID, i, len(userFavorites), post.ID, time.Since(start))
} }
log.Printf("Uploading all posts for user %s took: %v", username, time.Since(startUploadPosts)) log.Printf("Uploading all posts for user %s took: %v", username, time.Since(startUploadPosts))
return nil return nil
} }
// uploadNodes uploads the post to the database and creates the nodes // uploadNodes uploads the post to the database and creates the nodes
func uploadNodes(ctx context.Context, driver neo4j.DriverWithContext, post models.Post) error { func uploadNodes(ctx context.Context, driver neo4j.DriverWithContext, post models.Post) error {
uniqueGeneralTags := make([]string, 0) uniqueGeneralTags := make([]string, 0)
uniqueCharacterTags := make([]string, 0) uniqueCharacterTags := make([]string, 0)
uniqueCopyrightTags := make([]string, 0) uniqueCopyrightTags := make([]string, 0)
@ -120,7 +125,6 @@ func uploadNodes(ctx context.Context, driver neo4j.DriverWithContext, post model
uniqueCopyrightTags = utils.UniqueNonEmptyElementsOf(allCopyrightTags) uniqueCopyrightTags = utils.UniqueNonEmptyElementsOf(allCopyrightTags)
uniqueArtistTags = utils.UniqueNonEmptyElementsOf(allArtistTags) uniqueArtistTags = utils.UniqueNonEmptyElementsOf(allArtistTags)
// Uploads post to database
err := neo4jAPI.CreatePostNode(ctx, driver, post.ID) err := neo4jAPI.CreatePostNode(ctx, driver, post.ID)
if err != nil { if err != nil {
return err return err
@ -160,6 +164,7 @@ func uploadNodes(ctx context.Context, driver neo4j.DriverWithContext, post model
return err return err
} }
} }
return nil return nil
} }
@ -239,3 +244,4 @@ func uploadArtistTagRelationship(ctx context.Context, driver neo4j.DriverWithCon
} }
//11min für Selloo, simultan mit mutt_jake //11min für Selloo, simultan mit mutt_jake
//1h58m53 für mutt_jake, mit Selloo

33
utils/queue.go Normal file
View File

@ -0,0 +1,33 @@
package utils
import (
"errors"
)
type Task struct {
URL string `json:"url,omitempty" :"url"`
Methode string `json:"method,omitempty" :"method"`
Channel chan any `:"channel"`
}
type Queue struct {
elements []Task
}
func (queue *Queue) Pop() (Task, error) {
if len(queue.elements) == 0 {
return Task{}, errors.New("try to remove an element of a empty queue")
}
task := queue.elements[0]
queue.elements = queue.elements[1:]
return task, nil
}
func (queue *Queue) Push(task Task) error {
empty := Task{}
if task == empty {
return errors.New("try to add task but task is empty")
}
queue.elements = append(queue.elements, task)
return nil
}

97
utils/queue_test.go Normal file
View File

@ -0,0 +1,97 @@
package utils
import (
"reflect"
"testing"
)
func TestQueue_Pop(t *testing.T) {
type fields struct {
elements []Task
}
tests := []struct {
name string
fields fields
want Task
wantErr bool
}{
{
name: "Pop element of empty list",
fields: fields{},
want: Task{},
wantErr: true,
},
{
name: "Pop element of a filled list with three elements",
fields: fields{elements: []Task{
{URL: "https://e621.net0....", Methode: "GET", Channel: nil},
{URL: "https://e621.net1....", Methode: "GET", Channel: nil},
{URL: "https://e621.net2....", Methode: "GET", Channel: nil},
}},
want: Task{
URL: "https://e621.net0....",
Methode: "GET",
Channel: nil,
},
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
queue := &Queue{
elements: tt.fields.elements,
}
got, err := queue.Pop()
if (err != nil) != tt.wantErr {
t.Errorf("Pop() error = %v, wantErr %v", err, tt.wantErr)
return
}
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("Pop() got = %v, want %v", got, tt.want)
}
})
}
}
func TestQueue_Push(t *testing.T) {
t.Run("Push tasks to empty queue", func(t *testing.T) {
queue := Queue{elements: []Task{}}
task := Task{
URL: "http://e621.net0....",
Methode: "GET",
Channel: nil,
}
err := queue.Push(task)
if err != nil {
t.Errorf("Push() error = %v", err)
}
if len(queue.elements) != 1 {
t.Errorf("Push() error = queue is not one")
}
if queue.elements[0] != task {
t.Errorf("Push() error = wrong queue task in queue")
}
})
t.Run("Push tasks to filled queue", func(t *testing.T) {
queue := Queue{elements: []Task{{
URL: "http://e621.net0....",
Methode: "GET",
Channel: nil,
}}}
task := Task{
URL: "http://e621.net1....",
Methode: "GET",
Channel: nil,
}
err := queue.Push(task)
if err != nil {
t.Errorf("Push() error = %v", err)
}
if len(queue.elements) != 2 {
t.Errorf("Push() error = queue is not two")
}
if queue.elements[1] != task {
t.Errorf("Push() error = wrong queue task in queue")
}
})
}