diff --git a/README.MD b/README.MD new file mode 100644 index 0000000..b1c15df --- /dev/null +++ b/README.MD @@ -0,0 +1,2 @@ +## TODO: +-[ ] Que & Channels diff --git a/docker-compose.dgraph.yml b/docker-compose.dgraph.yml new file mode 100644 index 0000000..6b1b586 --- /dev/null +++ b/docker-compose.dgraph.yml @@ -0,0 +1,24 @@ +version: "3.2" +services: + zero: + image: dgraph/dgraph:latest + volumes: + - /tmp/data:/dgraph + ports: + - 5080:5080 + - 6080:6080 + restart: on-failure + command: dgraph zero --my=zero:5080 + alpha: + image: dgraph/dgraph:latest + volumes: + - /tmp/data:/dgraph + ports: + - 8080:8080 + - 9080:9080 + restart: on-failure + command: dgraph alpha --my=alpha:7080 --zero=zero:5080 --security whitelist= + ratel: + image: dgraph/ratel:latest + ports: + - 8000:8000 \ No newline at end of file diff --git a/e621/favorite.go b/e621/favorite.go index 607d002..2d73a81 100644 --- a/e621/favorite.go +++ b/e621/favorite.go @@ -10,19 +10,20 @@ import ( ) // GetFavorites retrieves all favorites from the e621 API. -func (c *Client) GetFavorites(user string) ([]models.Post, error) { - time.Sleep(2 * time.Second) +func (c *Client) GetFavorites(user models.E621User) ([]models.Post, error) { + time.Sleep(1 * time.Second) var lastPostID int64 var allFavorites []models.Post + var url string for { - url := fmt.Sprintf("%s/posts.json?tags=fav:%s+status:any&page=b%d", baseURL, user, lastPostID) + url = fmt.Sprintf("%s/favorites.json?user_id=%d&page=%d", baseURL, user.ID, lastPostID) req, err := http.NewRequest("GET", url, nil) if err != nil { return nil, err } - req.Header.Set("User-Agent", "FavGetter (by Selloo)") + req.Header.Set("User-Agent", "e621 to GraphDB (by Selloo)") req.Header.Add("Accept", "application/json") req.SetBasicAuth(c.username, c.apiKey) @@ -30,7 +31,6 @@ func (c *Client) GetFavorites(user string) ([]models.Post, error) { if err != nil { return nil, err } - defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { @@ -54,8 +54,7 @@ func (c *Client) GetFavorites(user string) ([]models.Post, error) { } // Update the last post ID for the next page request - lastPostID = fetchedFavorites.Posts[len(fetchedFavorites.Posts)-1].ID + lastPostID = lastPostID + 1 } - } diff --git a/neo4jAPI/relationship.go b/neo4jAPI/relationship.go index 4dbc2bf..5b3ee1c 100644 --- a/neo4jAPI/relationship.go +++ b/neo4jAPI/relationship.go @@ -63,9 +63,9 @@ func UserToPostRelationship(ctx context.Context, driver neo4j.DriverWithContext, } // CheckUserToPostRelationship gives back a bool if the connection between the post and the user exists -func CheckUserToPostRelationship(ctx context.Context, driver neo4j.DriverWithContext, e621PostID int64, e621UserID int64) (error, bool) { +func CheckUserToPostRelationship(ctx context.Context, driver neo4j.DriverWithContext, e621PostID int64, e621UserID int64) (bool, error) { query := ` - MATCH (user:e621User {e621ID: $e621PostID})-[favorite:IS_FAVORITE]->(post:e621Post {e621PostID: $e621ID}) + MATCH (user:e621User {e621ID: $e621ID})-[favorite:IS_FAVORITE]->(post:e621Post {e621PostID: $e621PostID}) RETURN COUNT(favorite) > 0 AS isFavorite ` params := map[string]interface{}{ @@ -75,12 +75,12 @@ func CheckUserToPostRelationship(ctx context.Context, driver neo4j.DriverWithCon result, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer) if err != nil { - return err, false + return false, err } exists, _, err := neo4j.GetRecordValue[bool](result.Records[0], "isFavorite") if err != nil { - return err, false + return false, err } - return nil, exists + return exists, nil } diff --git a/services/user.go b/services/user.go index 620f2e3..078e634 100644 --- a/services/user.go +++ b/services/user.go @@ -33,7 +33,7 @@ func ScrapeUser(ctx context.Context, driver neo4j.DriverWithContext, e621Client log.Printf("Getting favorites for user %s with id %d", e621User.Name, e621User.ID) start := time.Now() - userFavorites, err := e621Client.GetFavorites(e621User.Name) + userFavorites, err := e621Client.GetFavorites(e621User) if err != nil { log.Fatal(err) } @@ -43,6 +43,15 @@ func ScrapeUser(ctx context.Context, driver neo4j.DriverWithContext, e621Client // Uploads all Tags, Posts as Nodes to Neo4j for i, post := range userFavorites { + + if exists, err := neo4jAPI.CheckUserToPostRelationship(ctx, driver, post.ID, e621User.ID); err == nil && exists { + log.Printf("No new posts found for user %s with id %d", e621User.Name, e621User.ID) + log.Printf("Last Post ID Found: %d", post.ID) + break + } else if err != nil { + return err + } + start = time.Now() err = uploadNodes(ctx, driver, post) if err != nil { @@ -50,10 +59,6 @@ func ScrapeUser(ctx context.Context, driver neo4j.DriverWithContext, e621Client } log.Printf("Uploading post for user %s with id %d, %d of %d with ID: %d took: %v", e621User.Name, e621User.ID, i, len(userFavorites), post.ID, time.Since(start)) - } - - // Makes relationships between different nodes - for i, post := range userFavorites { start := time.Now() err = uploadPostToUserRelationship(ctx, driver, post, e621User) if err != nil { @@ -91,15 +96,15 @@ func ScrapeUser(ctx context.Context, driver neo4j.DriverWithContext, e621Client return err } log.Printf("Making relationship for user %s with id %d, %d for Post: %d with ID: %d took: %v", e621User.Name, e621User.ID, i, len(userFavorites), post.ID, time.Since(start)) - } - log.Printf("Uploading all posts for user %s took: %v", username, time.Since(startUploadPosts)) + return nil } // uploadNodes uploads the post to the database and creates the nodes func uploadNodes(ctx context.Context, driver neo4j.DriverWithContext, post models.Post) error { + uniqueGeneralTags := make([]string, 0) uniqueCharacterTags := make([]string, 0) uniqueCopyrightTags := make([]string, 0) @@ -120,7 +125,6 @@ func uploadNodes(ctx context.Context, driver neo4j.DriverWithContext, post model uniqueCopyrightTags = utils.UniqueNonEmptyElementsOf(allCopyrightTags) uniqueArtistTags = utils.UniqueNonEmptyElementsOf(allArtistTags) - // Uploads post to database err := neo4jAPI.CreatePostNode(ctx, driver, post.ID) if err != nil { return err @@ -160,6 +164,7 @@ func uploadNodes(ctx context.Context, driver neo4j.DriverWithContext, post model return err } } + return nil } @@ -239,3 +244,4 @@ func uploadArtistTagRelationship(ctx context.Context, driver neo4j.DriverWithCon } //11min für Selloo, simultan mit mutt_jake +//1h58m53 für mutt_jake, mit Selloo diff --git a/utils/queue.go b/utils/queue.go new file mode 100644 index 0000000..2cb865e --- /dev/null +++ b/utils/queue.go @@ -0,0 +1,33 @@ +package utils + +import ( + "errors" +) + +type Task struct { + URL string `json:"url,omitempty" :"url"` + Methode string `json:"method,omitempty" :"method"` + Channel chan any `:"channel"` +} + +type Queue struct { + elements []Task +} + +func (queue *Queue) Pop() (Task, error) { + if len(queue.elements) == 0 { + return Task{}, errors.New("try to remove an element of a empty queue") + } + task := queue.elements[0] + queue.elements = queue.elements[1:] + return task, nil +} + +func (queue *Queue) Push(task Task) error { + empty := Task{} + if task == empty { + return errors.New("try to add task but task is empty") + } + queue.elements = append(queue.elements, task) + return nil +} diff --git a/utils/queue_test.go b/utils/queue_test.go new file mode 100644 index 0000000..aac19b9 --- /dev/null +++ b/utils/queue_test.go @@ -0,0 +1,97 @@ +package utils + +import ( + "reflect" + "testing" +) + +func TestQueue_Pop(t *testing.T) { + type fields struct { + elements []Task + } + tests := []struct { + name string + fields fields + want Task + wantErr bool + }{ + { + name: "Pop element of empty list", + fields: fields{}, + want: Task{}, + wantErr: true, + }, + { + name: "Pop element of a filled list with three elements", + fields: fields{elements: []Task{ + {URL: "https://e621.net0....", Methode: "GET", Channel: nil}, + {URL: "https://e621.net1....", Methode: "GET", Channel: nil}, + {URL: "https://e621.net2....", Methode: "GET", Channel: nil}, + }}, + want: Task{ + URL: "https://e621.net0....", + Methode: "GET", + Channel: nil, + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + queue := &Queue{ + elements: tt.fields.elements, + } + got, err := queue.Pop() + if (err != nil) != tt.wantErr { + t.Errorf("Pop() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("Pop() got = %v, want %v", got, tt.want) + } + }) + } +} + +func TestQueue_Push(t *testing.T) { + t.Run("Push tasks to empty queue", func(t *testing.T) { + queue := Queue{elements: []Task{}} + task := Task{ + URL: "http://e621.net0....", + Methode: "GET", + Channel: nil, + } + err := queue.Push(task) + if err != nil { + t.Errorf("Push() error = %v", err) + } + if len(queue.elements) != 1 { + t.Errorf("Push() error = queue is not one") + } + if queue.elements[0] != task { + t.Errorf("Push() error = wrong queue task in queue") + } + }) + t.Run("Push tasks to filled queue", func(t *testing.T) { + queue := Queue{elements: []Task{{ + URL: "http://e621.net0....", + Methode: "GET", + Channel: nil, + }}} + task := Task{ + URL: "http://e621.net1....", + Methode: "GET", + Channel: nil, + } + err := queue.Push(task) + if err != nil { + t.Errorf("Push() error = %v", err) + } + if len(queue.elements) != 2 { + t.Errorf("Push() error = queue is not two") + } + if queue.elements[1] != task { + t.Errorf("Push() error = wrong queue task in queue") + } + }) +}