added queue, changed fav api endpoint for e621, fixed db query and added a check if fav already exists
This commit is contained in:
parent
e838b7b724
commit
b49fdea71c
24
docker-compose.dgraph.yml
Normal file
24
docker-compose.dgraph.yml
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
version: "3.2"
|
||||||
|
services:
|
||||||
|
zero:
|
||||||
|
image: dgraph/dgraph:latest
|
||||||
|
volumes:
|
||||||
|
- /tmp/data:/dgraph
|
||||||
|
ports:
|
||||||
|
- 5080:5080
|
||||||
|
- 6080:6080
|
||||||
|
restart: on-failure
|
||||||
|
command: dgraph zero --my=zero:5080
|
||||||
|
alpha:
|
||||||
|
image: dgraph/dgraph:latest
|
||||||
|
volumes:
|
||||||
|
- /tmp/data:/dgraph
|
||||||
|
ports:
|
||||||
|
- 8080:8080
|
||||||
|
- 9080:9080
|
||||||
|
restart: on-failure
|
||||||
|
command: dgraph alpha --my=alpha:7080 --zero=zero:5080 --security whitelist=<IP_ADDRESS>
|
||||||
|
ratel:
|
||||||
|
image: dgraph/ratel:latest
|
||||||
|
ports:
|
||||||
|
- 8000:8000
|
@ -10,19 +10,20 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// GetFavorites retrieves all favorites from the e621 API.
|
// GetFavorites retrieves all favorites from the e621 API.
|
||||||
func (c *Client) GetFavorites(user string) ([]models.Post, error) {
|
func (c *Client) GetFavorites(user models.E621User) ([]models.Post, error) {
|
||||||
time.Sleep(2 * time.Second)
|
time.Sleep(1 * time.Second)
|
||||||
var lastPostID int64
|
var lastPostID int64
|
||||||
var allFavorites []models.Post
|
var allFavorites []models.Post
|
||||||
|
var url string
|
||||||
|
|
||||||
for {
|
for {
|
||||||
url := fmt.Sprintf("%s/posts.json?tags=fav:%s+status:any&page=b%d", baseURL, user, lastPostID)
|
url = fmt.Sprintf("%s/favorites.json?user_id=%d&page=%d", baseURL, user.ID, lastPostID)
|
||||||
req, err := http.NewRequest("GET", url, nil)
|
req, err := http.NewRequest("GET", url, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
req.Header.Set("User-Agent", "FavGetter (by Selloo)")
|
req.Header.Set("User-Agent", "e621 to GraphDB (by Selloo)")
|
||||||
req.Header.Add("Accept", "application/json")
|
req.Header.Add("Accept", "application/json")
|
||||||
req.SetBasicAuth(c.username, c.apiKey)
|
req.SetBasicAuth(c.username, c.apiKey)
|
||||||
|
|
||||||
@ -30,7 +31,6 @@ func (c *Client) GetFavorites(user string) ([]models.Post, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
|
||||||
|
|
||||||
body, err := io.ReadAll(resp.Body)
|
body, err := io.ReadAll(resp.Body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -54,8 +54,7 @@ func (c *Client) GetFavorites(user string) ([]models.Post, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Update the last post ID for the next page request
|
// Update the last post ID for the next page request
|
||||||
lastPostID = fetchedFavorites.Posts[len(fetchedFavorites.Posts)-1].ID
|
lastPostID = lastPostID + 1
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -63,9 +63,9 @@ func UserToPostRelationship(ctx context.Context, driver neo4j.DriverWithContext,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// CheckUserToPostRelationship gives back a bool if the connection between the post and the user exists
|
// CheckUserToPostRelationship gives back a bool if the connection between the post and the user exists
|
||||||
func CheckUserToPostRelationship(ctx context.Context, driver neo4j.DriverWithContext, e621PostID int64, e621UserID int64) (error, bool) {
|
func CheckUserToPostRelationship(ctx context.Context, driver neo4j.DriverWithContext, e621PostID int64, e621UserID int64) (bool, error) {
|
||||||
query := `
|
query := `
|
||||||
MATCH (user:e621User {e621ID: $e621PostID})-[favorite:IS_FAVORITE]->(post:e621Post {e621PostID: $e621ID})
|
MATCH (user:e621User {e621ID: $e621ID})-[favorite:IS_FAVORITE]->(post:e621Post {e621PostID: $e621PostID})
|
||||||
RETURN COUNT(favorite) > 0 AS isFavorite
|
RETURN COUNT(favorite) > 0 AS isFavorite
|
||||||
`
|
`
|
||||||
params := map[string]interface{}{
|
params := map[string]interface{}{
|
||||||
@ -75,12 +75,12 @@ func CheckUserToPostRelationship(ctx context.Context, driver neo4j.DriverWithCon
|
|||||||
|
|
||||||
result, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer)
|
result, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err, false
|
return false, err
|
||||||
}
|
}
|
||||||
|
|
||||||
exists, _, err := neo4j.GetRecordValue[bool](result.Records[0], "isFavorite")
|
exists, _, err := neo4j.GetRecordValue[bool](result.Records[0], "isFavorite")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err, false
|
return false, err
|
||||||
}
|
}
|
||||||
return nil, exists
|
return exists, nil
|
||||||
}
|
}
|
||||||
|
@ -33,7 +33,7 @@ func ScrapeUser(ctx context.Context, driver neo4j.DriverWithContext, e621Client
|
|||||||
|
|
||||||
log.Printf("Getting favorites for user %s with id %d", e621User.Name, e621User.ID)
|
log.Printf("Getting favorites for user %s with id %d", e621User.Name, e621User.ID)
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
userFavorites, err := e621Client.GetFavorites(e621User.Name)
|
userFavorites, err := e621Client.GetFavorites(e621User)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
@ -43,6 +43,15 @@ func ScrapeUser(ctx context.Context, driver neo4j.DriverWithContext, e621Client
|
|||||||
|
|
||||||
// Uploads all Tags, Posts as Nodes to Neo4j
|
// Uploads all Tags, Posts as Nodes to Neo4j
|
||||||
for i, post := range userFavorites {
|
for i, post := range userFavorites {
|
||||||
|
|
||||||
|
if exists, err := neo4jAPI.CheckUserToPostRelationship(ctx, driver, post.ID, e621User.ID); err == nil && exists {
|
||||||
|
log.Printf("No new posts found for user %s with id %d", e621User.Name, e621User.ID)
|
||||||
|
log.Printf("Last Post ID Found: %d", post.ID)
|
||||||
|
break
|
||||||
|
} else if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
start = time.Now()
|
start = time.Now()
|
||||||
err = uploadNodes(ctx, driver, post)
|
err = uploadNodes(ctx, driver, post)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -50,10 +59,6 @@ func ScrapeUser(ctx context.Context, driver neo4j.DriverWithContext, e621Client
|
|||||||
}
|
}
|
||||||
log.Printf("Uploading post for user %s with id %d, %d of %d with ID: %d took: %v", e621User.Name, e621User.ID, i, len(userFavorites), post.ID, time.Since(start))
|
log.Printf("Uploading post for user %s with id %d, %d of %d with ID: %d took: %v", e621User.Name, e621User.ID, i, len(userFavorites), post.ID, time.Since(start))
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
// Makes relationships between different nodes
|
|
||||||
for i, post := range userFavorites {
|
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
err = uploadPostToUserRelationship(ctx, driver, post, e621User)
|
err = uploadPostToUserRelationship(ctx, driver, post, e621User)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -91,15 +96,15 @@ func ScrapeUser(ctx context.Context, driver neo4j.DriverWithContext, e621Client
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
log.Printf("Making relationship for user %s with id %d, %d for Post: %d with ID: %d took: %v", e621User.Name, e621User.ID, i, len(userFavorites), post.ID, time.Since(start))
|
log.Printf("Making relationship for user %s with id %d, %d for Post: %d with ID: %d took: %v", e621User.Name, e621User.ID, i, len(userFavorites), post.ID, time.Since(start))
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("Uploading all posts for user %s took: %v", username, time.Since(startUploadPosts))
|
log.Printf("Uploading all posts for user %s took: %v", username, time.Since(startUploadPosts))
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// uploadNodes uploads the post to the database and creates the nodes
|
// uploadNodes uploads the post to the database and creates the nodes
|
||||||
func uploadNodes(ctx context.Context, driver neo4j.DriverWithContext, post models.Post) error {
|
func uploadNodes(ctx context.Context, driver neo4j.DriverWithContext, post models.Post) error {
|
||||||
|
|
||||||
uniqueGeneralTags := make([]string, 0)
|
uniqueGeneralTags := make([]string, 0)
|
||||||
uniqueCharacterTags := make([]string, 0)
|
uniqueCharacterTags := make([]string, 0)
|
||||||
uniqueCopyrightTags := make([]string, 0)
|
uniqueCopyrightTags := make([]string, 0)
|
||||||
@ -120,7 +125,6 @@ func uploadNodes(ctx context.Context, driver neo4j.DriverWithContext, post model
|
|||||||
uniqueCopyrightTags = utils.UniqueNonEmptyElementsOf(allCopyrightTags)
|
uniqueCopyrightTags = utils.UniqueNonEmptyElementsOf(allCopyrightTags)
|
||||||
uniqueArtistTags = utils.UniqueNonEmptyElementsOf(allArtistTags)
|
uniqueArtistTags = utils.UniqueNonEmptyElementsOf(allArtistTags)
|
||||||
|
|
||||||
// Uploads post to database
|
|
||||||
err := neo4jAPI.CreatePostNode(ctx, driver, post.ID)
|
err := neo4jAPI.CreatePostNode(ctx, driver, post.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@ -160,6 +164,7 @@ func uploadNodes(ctx context.Context, driver neo4j.DriverWithContext, post model
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -239,3 +244,4 @@ func uploadArtistTagRelationship(ctx context.Context, driver neo4j.DriverWithCon
|
|||||||
}
|
}
|
||||||
|
|
||||||
//11min für Selloo, simultan mit mutt_jake
|
//11min für Selloo, simultan mit mutt_jake
|
||||||
|
//1h58m53 für mutt_jake, mit Selloo
|
||||||
|
33
utils/queue.go
Normal file
33
utils/queue.go
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
package utils
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Task struct {
|
||||||
|
URL string `json:"url,omitempty" :"url"`
|
||||||
|
Methode string `json:"method,omitempty" :"method"`
|
||||||
|
Channel chan any `:"channel"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type Queue struct {
|
||||||
|
elements []Task
|
||||||
|
}
|
||||||
|
|
||||||
|
func (queue *Queue) Pop() (Task, error) {
|
||||||
|
if len(queue.elements) == 0 {
|
||||||
|
return Task{}, errors.New("try to remove an element of a empty queue")
|
||||||
|
}
|
||||||
|
task := queue.elements[0]
|
||||||
|
queue.elements = queue.elements[1:]
|
||||||
|
return task, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (queue *Queue) Push(task Task) error {
|
||||||
|
empty := Task{}
|
||||||
|
if task == empty {
|
||||||
|
return errors.New("try to add task but task is empty")
|
||||||
|
}
|
||||||
|
queue.elements = append(queue.elements, task)
|
||||||
|
return nil
|
||||||
|
}
|
97
utils/queue_test.go
Normal file
97
utils/queue_test.go
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
package utils
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestQueue_Pop(t *testing.T) {
|
||||||
|
type fields struct {
|
||||||
|
elements []Task
|
||||||
|
}
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
fields fields
|
||||||
|
want Task
|
||||||
|
wantErr bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "Pop element of empty list",
|
||||||
|
fields: fields{},
|
||||||
|
want: Task{},
|
||||||
|
wantErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Pop element of a filled list with three elements",
|
||||||
|
fields: fields{elements: []Task{
|
||||||
|
{URL: "https://e621.net0....", Methode: "GET", Channel: nil},
|
||||||
|
{URL: "https://e621.net1....", Methode: "GET", Channel: nil},
|
||||||
|
{URL: "https://e621.net2....", Methode: "GET", Channel: nil},
|
||||||
|
}},
|
||||||
|
want: Task{
|
||||||
|
URL: "https://e621.net0....",
|
||||||
|
Methode: "GET",
|
||||||
|
Channel: nil,
|
||||||
|
},
|
||||||
|
wantErr: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
queue := &Queue{
|
||||||
|
elements: tt.fields.elements,
|
||||||
|
}
|
||||||
|
got, err := queue.Pop()
|
||||||
|
if (err != nil) != tt.wantErr {
|
||||||
|
t.Errorf("Pop() error = %v, wantErr %v", err, tt.wantErr)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if !reflect.DeepEqual(got, tt.want) {
|
||||||
|
t.Errorf("Pop() got = %v, want %v", got, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestQueue_Push(t *testing.T) {
|
||||||
|
t.Run("Push tasks to empty queue", func(t *testing.T) {
|
||||||
|
queue := Queue{elements: []Task{}}
|
||||||
|
task := Task{
|
||||||
|
URL: "http://e621.net0....",
|
||||||
|
Methode: "GET",
|
||||||
|
Channel: nil,
|
||||||
|
}
|
||||||
|
err := queue.Push(task)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Push() error = %v", err)
|
||||||
|
}
|
||||||
|
if len(queue.elements) != 1 {
|
||||||
|
t.Errorf("Push() error = queue is not one")
|
||||||
|
}
|
||||||
|
if queue.elements[0] != task {
|
||||||
|
t.Errorf("Push() error = wrong queue task in queue")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
t.Run("Push tasks to filled queue", func(t *testing.T) {
|
||||||
|
queue := Queue{elements: []Task{{
|
||||||
|
URL: "http://e621.net0....",
|
||||||
|
Methode: "GET",
|
||||||
|
Channel: nil,
|
||||||
|
}}}
|
||||||
|
task := Task{
|
||||||
|
URL: "http://e621.net1....",
|
||||||
|
Methode: "GET",
|
||||||
|
Channel: nil,
|
||||||
|
}
|
||||||
|
err := queue.Push(task)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Push() error = %v", err)
|
||||||
|
}
|
||||||
|
if len(queue.elements) != 2 {
|
||||||
|
t.Errorf("Push() error = queue is not two")
|
||||||
|
}
|
||||||
|
if queue.elements[1] != task {
|
||||||
|
t.Errorf("Push() error = wrong queue task in queue")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
Reference in New Issue
Block a user