Compare commits

..

No commits in common. "3be16a9277b8dccc5838023ed0ae5b6dc1867bd5" and "010397ef6345a0830a7b8a1889c1f94883652ec7" have entirely different histories.

39 changed files with 747 additions and 676 deletions

View File

@ -1,29 +0,0 @@
name: Gitea Build Check
run-name: ${{ gitea.actor }} is testing the build
on:
pull_request:
branches: [ "master" ]
jobs:
Build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Setup Go environment
uses: https://github.com/actions/setup-go@v3
with:
# The Go version to download (if necessary) and use. Supports semver spec and ranges.
go-version: 1.21.3 # optional
# Path to the go.mod file.
go-version-file: ./go.mod # optional
# Set this option to true if you want the action to always check for the latest available version that satisfies the version spec
check-latest: true # optional
# Used to specify whether caching is needed. Set to true, if you'd like to enable caching.
cache: true # optional
- name: Build
run: go build -v ./...
- name: Test
run: go test -v ./... -json -coverprofile=coverage.out > test-report.out

View File

@ -1,6 +1,6 @@
FROM golang:alpine as builder FROM golang:alpine as builder
WORKDIR /go/src/git.dragse.it/fenpaws/e621-to-graph WORKDIR /go/src/git.dragse.it/fenpaws/e621-to-neo4j
RUN apk add -U --no-cache ca-certificates && update-ca-certificates RUN apk add -U --no-cache ca-certificates && update-ca-certificates
@ -8,8 +8,8 @@ COPY go.mod go.sum ./
RUN go mod download RUN go mod download
COPY . ./ COPY . ./
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -installsuffix cgo -ldflags "-w -s" -o /app
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -installsuffix cgo -ldflags "-w -s" -o /app ./cmd/scraper/
FROM scratch FROM scratch

View File

@ -35,7 +35,6 @@ To install the project, follow these steps:
## Setting Up Environment Variables ## Setting Up Environment Variables
The program requires certain environment variables to be set. The program requires certain environment variables to be set.
```plaintext ```plaintext
E621_API_KEY= E621_API_KEY=
E621_USERNAME= E621_USERNAME=
@ -43,12 +42,6 @@ DB_TYPE=neo4j
DB_URL= DB_URL=
DB_PASSWORD= DB_PASSWORD=
DB_USERNAME= DB_USERNAME=
# Allowed values are FATAL, ERROR, WARN, INFO, DEBUG, TRACE (default: INFO)
LOG_LEVEL=
# Allowed values are PLAIN, JSON (default: PLAIN)
LOG_FORMAT=
# Allowed values are TRUE, FALSE (default: FALSE)
NEO4J_DEBUG=
``` ```
## Running the Program ## Running the Program
@ -73,20 +66,14 @@ The program can also be run using Docker. To use Docker, follow these steps:
1. Create a .env file in the project's root directory with the following content: 1. Create a .env file in the project's root directory with the following content:
```plaintext ````plaintext
E621_API_KEY= E621_API_KEY=
E621_USERNAME= E621_USERNAME=
DB_TYPE=neo4j DB_TYPE=neo4j
DB_URL= DB_URL=
DB_PASSWORD= DB_PASSWORD=
DB_USERNAME= DB_USERNAME=
# Allowed values are FATAL, ERROR, WARN, INFO, DEBUG, TRACE (default: INFO) ````
LOG_LEVEL=
# Allowed values are PLAIN, JSON (default: PLAIN)
LOG_FORMAT=
# Allowed values are TRUE, FALSE (default: FALSE)
NEO4J_DEBUG=
```
2. Build the Docker image: 2. Build the Docker image:

36
api/user.go Normal file
View File

@ -0,0 +1,36 @@
package api
import (
"context"
"e621_to_neo4j/database"
"e621_to_neo4j/e621"
"e621_to_neo4j/services"
"fmt"
"net/http"
)
// UserHandler is the handler for the user API
func UserHandler(ctx context.Context, graphConnection database.GraphConnection, e621Client *e621.Client) func(response http.ResponseWriter, request *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
w.WriteHeader(http.StatusMethodNotAllowed)
fmt.Fprintf(w, "Only POST requests are allowed")
return
}
username := r.FormValue("username")
if username == "" {
w.WriteHeader(http.StatusBadRequest)
fmt.Fprintf(w, "Username is required")
return
}
// Perform further processing with the username
go services.ScrapeUser(ctx, graphConnection, *e621Client, username)
// Send a response
w.WriteHeader(http.StatusOK)
fmt.Fprintf(w, "Username %s processed successfully", username)
}
}

View File

@ -1,97 +0,0 @@
package main
import (
"context"
"git.dragse.it/anthrove/e621-sdk-go/pkg/e621"
"git.dragse.it/anthrove/e621-to-graph/internal/api"
"git.dragse.it/anthrove/e621-to-graph/internal/config"
"git.dragse.it/anthrove/e621-to-graph/internal/database/neo4j"
"git.dragse.it/anthrove/e621-to-graph/pkg/logic"
log "github.com/sirupsen/logrus"
"net/http"
"strings"
"time"
)
func main() {
// Loads Config
appConfig, err := config.LoadConfig()
if err != nil {
log.Fatal(err)
}
log.Debug("main: config loaded")
loggingSetup(appConfig)
var graphConnection logic.GraphConnection
ctx := context.Background()
switch strings.ToLower(appConfig.DBType) {
case "neo4j":
graphConnection = neo4j.NewNeo4JConnection(appConfig.Neo4jDebug)
err = graphConnection.Connect(ctx, appConfig.DBEndpoint, appConfig.DBUsername, appConfig.DBPassword)
if err != nil {
log.Panicf("main: %s", err)
}
default:
log.Panic("main: no database was selected!")
}
log.WithFields(log.Fields{
"database_type": strings.ToLower(appConfig.DBType),
}).Info("main: database connection successful")
// Initialize the e621API
client := e621.NewClient(appConfig.E621Username, appConfig.E621APIKey)
// Register the ScapeUserFavourites with the "/user" route
http.HandleFunc("/user", api.ScapeUserFavourites(ctx, graphConnection, &client))
// Start the HTTP server
err = http.ListenAndServe(":8080", nil)
if err != nil {
log.Panicf("main: %s", err)
}
}
func loggingSetup(appConfig *config.Config) {
switch strings.ToUpper(appConfig.LogLevel) {
case "FATAL":
log.SetLevel(log.FatalLevel)
case "ERROR":
log.SetLevel(log.ErrorLevel)
case "WARN":
log.SetLevel(log.WarnLevel)
case "INFO":
log.SetLevel(log.InfoLevel)
case "DEBUG":
log.SetLevel(log.DebugLevel)
case "TRACE":
log.SetLevel(log.TraceLevel)
default:
log.Panic("main: no log level was set")
}
switch strings.ToUpper(appConfig.LogFormat) {
case "PLAIN":
log.SetFormatter(&log.TextFormatter{
ForceColors: true,
ForceQuote: true,
DisableLevelTruncation: true,
PadLevelText: true,
FullTimestamp: true,
TimestampFormat: time.DateTime, // 2006-01-02 15:04:05
})
case "JSON":
log.SetFormatter(&log.JSONFormatter{
TimestampFormat: time.DateTime, // 2006-01-02 15:04:05,
})
default:
log.Panic("main: no formatter was set")
}
log.WithFields(log.Fields{
"log_level": log.GetLevel(),
"formatter": appConfig.LogFormat,
}).Info("main: setting logging info")
}

27
database/logic.go Normal file
View File

@ -0,0 +1,27 @@
package database
import (
"context"
"e621_to_neo4j/e621/models"
)
// [X:Neo4J] Connection
// [X:Neo4J] Upload User
// [X:Neo4J] Upload Source
// [X:Neo4J] Upload Post
// [X:Neo4J] Upload Tag
// [X:Neo4J] Relationship Post->Tag
// [X:Neo4J] Relationship Post->Source
// [X:Neo4J] Relationship User->Post
type GraphConnection interface {
Connect(ctx context.Context, endpoint string, username string, password string) error
UploadUser(ctx context.Context, user models.E621User) error
UploadSource(ctx context.Context, SourceURL string) error
UploadPost(ctx context.Context, e621ID int64) error
UploadTag(ctx context.Context, name string, tagType string) error
EstablishPostToTagLink(ctx context.Context, e621PostID int64, e621Tag string) error
EstablishPostToSourceLink(ctx context.Context, e621PostID int64, sourceURL string) error
EstablishUserToPostLink(ctx context.Context, e621PostID int64, e621UserID int64) error
CheckUserToPostLink(ctx context.Context, e621PostID int64, e621UserID int64) (bool, error)
}

View File

@ -2,41 +2,29 @@ package neo4j
import ( import (
"context" "context"
"git.dragse.it/anthrove/e621-sdk-go/pkg/e621/model" "e621_to_neo4j/database"
"git.dragse.it/anthrove/e621-to-graph/pkg/logic" "e621_to_neo4j/e621/models"
"github.com/neo4j/neo4j-go-driver/v5/neo4j" "github.com/neo4j/neo4j-go-driver/v5/neo4j"
"github.com/neo4j/neo4j-go-driver/v5/neo4j/config" "github.com/neo4j/neo4j-go-driver/v5/neo4j/config"
) )
type neo4jConnection struct { type neo4jConnection struct {
driver neo4j.DriverWithContext driver neo4j.DriverWithContext
neo4jDebug bool
} }
func NewNeo4JConnection(neo4jDebug bool) logic.GraphConnection { func (c *neo4jConnection) CheckUserToPostLink(ctx context.Context, e621PostID int64, e621UserID int64) (bool, error) {
return &neo4jConnection{ return RelationshipCheckUserToPost(ctx, c.driver, e621PostID, e621UserID)
driver: nil,
neo4jDebug: neo4jDebug,
}
} }
func (c *neo4jConnection) GetUserFavoriteCount(ctx context.Context, userID model.UserID) (int64, error) { func (c *neo4jConnection) EstablishPostToTagLink(ctx context.Context, e621PostID int64, e621Tag string) error {
return GetUserFavoritesCount(ctx, c.driver, userID)
}
func (c *neo4jConnection) CheckUserToPostLink(ctx context.Context, e621PostID model.PostID, e621UserID model.UserID) (bool, error) {
return CheckUserToPostLink(ctx, c.driver, e621PostID, e621UserID)
}
func (c *neo4jConnection) EstablishPostToTagLink(ctx context.Context, e621PostID model.PostID, e621Tag string) error {
return EstablishPostTagLink(ctx, c.driver, e621PostID, e621Tag) return EstablishPostTagLink(ctx, c.driver, e621PostID, e621Tag)
} }
func (c *neo4jConnection) EstablishPostToSourceLink(ctx context.Context, e621PostID model.PostID, sourceURL string) error { func (c *neo4jConnection) EstablishPostToSourceLink(ctx context.Context, e621PostID int64, sourceURL string) error {
return EstablishPostToSourceLink(ctx, c.driver, e621PostID, sourceURL) return EstablishPostToSourceLink(ctx, c.driver, e621PostID, sourceURL)
} }
func (c *neo4jConnection) EstablishUserToPostLink(ctx context.Context, e621PostID model.PostID, e621UserID model.UserID) error { func (c *neo4jConnection) EstablishUserToPostLink(ctx context.Context, e621PostID int64, e621UserID int64) error {
return EstablishUserToPostLink(ctx, c.driver, e621PostID, e621UserID) return EstablishUserToPostLink(ctx, c.driver, e621PostID, e621UserID)
} }
@ -44,26 +32,24 @@ func (c *neo4jConnection) UploadTag(ctx context.Context, name string, tagType st
return CreateTagNode(ctx, c.driver, name, tagType) return CreateTagNode(ctx, c.driver, name, tagType)
} }
func (c *neo4jConnection) UploadPost(ctx context.Context, postID model.PostID) error { func (c *neo4jConnection) UploadPost(ctx context.Context, e621ID int64) error {
return CreatePostNode(ctx, c.driver, postID) return CreatePostNode(ctx, c.driver, e621ID)
} }
func (c *neo4jConnection) UploadSource(ctx context.Context, SourceURL string) error { func (c *neo4jConnection) UploadSource(ctx context.Context, SourceURL string) error {
return CreateSourceNode(ctx, c.driver, SourceURL) return CreateSourceNode(ctx, c.driver, SourceURL)
} }
func (c *neo4jConnection) UploadUser(ctx context.Context, user model.User) error { func (c *neo4jConnection) UploadUser(ctx context.Context, user models.E621User) error {
return CreateUserNode(ctx, c.driver, user) return CreateUserNode(ctx, c.driver, user)
} }
func (c *neo4jConnection) Connect(ctx context.Context, endpoint string, username string, password string) error { func (c *neo4jConnection) Connect(ctx context.Context, endpoint string, username string, password string) error {
driver, err := neo4j.NewDriverWithContext(endpoint, neo4j.BasicAuth(username, password, ""), driver, err := neo4j.NewDriverWithContext(endpoint, neo4j.BasicAuth(username, password, ""),
logger(c.neo4jDebug)) useConsoleLogger(neo4j.INFO))
if err != nil { if err != nil {
return err return err
} }
err = driver.VerifyAuthentication(ctx, nil) err = driver.VerifyAuthentication(context.Background(), nil)
if err != nil { if err != nil {
return err return err
} }
@ -71,8 +57,12 @@ func (c *neo4jConnection) Connect(ctx context.Context, endpoint string, username
return nil return nil
} }
func logger(neo4jDebug bool) func(config *config.Config) { func NewNeo4JConnection() database.GraphConnection {
return &neo4jConnection{}
}
func useConsoleLogger(level neo4j.LogLevel) func(config *config.Config) {
return func(config *config.Config) { return func(config *config.Config) {
config.Log = NewNeo4jLogger(neo4jDebug) config.Log = neo4j.ConsoleLogger(level)
} }
} }

View File

@ -1,4 +1,4 @@
package model package models
type DBTag struct { type DBTag struct {
Tag string Tag string

View File

@ -2,16 +2,16 @@ package neo4j
import ( import (
"context" "context"
"git.dragse.it/anthrove/e621-sdk-go/pkg/e621/model"
"github.com/neo4j/neo4j-go-driver/v5/neo4j" "github.com/neo4j/neo4j-go-driver/v5/neo4j"
) )
func CreatePostNode(ctx context.Context, driver neo4j.DriverWithContext, postID model.PostID) error { func CreatePostNode(ctx context.Context, driver neo4j.DriverWithContext, e621ID int64) error {
query := ` query := `
MERGE (u:e621Post {e621PostID: $postID}); MERGE (u:e621Post {e621PostID: $e621ID})
RETURN u
` `
params := map[string]any{ params := map[string]any{
"postID": postID, "e621ID": e621ID,
} }
_, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer) _, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer)

View File

@ -2,26 +2,20 @@ package neo4j
import ( import (
"context" "context"
"git.dragse.it/anthrove/e621-sdk-go/pkg/e621/model"
"github.com/neo4j/neo4j-go-driver/v5/neo4j" "github.com/neo4j/neo4j-go-driver/v5/neo4j"
log "github.com/sirupsen/logrus"
) )
func EstablishPostTagLink(ctx context.Context, driver neo4j.DriverWithContext, e621PostID model.PostID, e621Tag string) error { func EstablishPostTagLink(ctx context.Context, driver neo4j.DriverWithContext, e621PostID int64, e621Tag string) error {
query := ` query := `
MATCH (p:e621Post {e621PostID: $e621PostID}) MATCH (p:e621Post {e621PostID: $e621PostID})
MATCH (t:e621Tag {e621Tag: $e621Tag}) MATCH (t:e621Tag {e621Tag: $e621Tag})
MERGE (p)-[:HAS_TAG]->(t); MERGE (p)-[:HAS_TAG]->(t)
` `
params := map[string]interface{}{ params := map[string]interface{}{
"e621PostID": e621PostID, "e621PostID": e621PostID,
"e621Tag": e621Tag, "e621Tag": e621Tag,
} }
log.WithFields(log.Fields{
"e621_post_id": e621PostID,
"e621_tag": e621Tag,
}).Trace("neo4j: creating post to e621Tag link")
_, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer) _, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer)
if err != nil { if err != nil {
return err return err
@ -30,7 +24,7 @@ func EstablishPostTagLink(ctx context.Context, driver neo4j.DriverWithContext, e
return nil return nil
} }
func EstablishPostToSourceLink(ctx context.Context, driver neo4j.DriverWithContext, e621PostID model.PostID, sourceURL string) error { func EstablishPostToSourceLink(ctx context.Context, driver neo4j.DriverWithContext, e621PostID int64, sourceURL string) error {
query := ` query := `
MATCH (p:e621Post {e621PostID: $e621PostID}) MATCH (p:e621Post {e621PostID: $e621PostID})
MATCH (s:Source {URL: $sourceURL}) MATCH (s:Source {URL: $sourceURL})
@ -41,10 +35,6 @@ func EstablishPostToSourceLink(ctx context.Context, driver neo4j.DriverWithConte
"sourceURL": sourceURL, "sourceURL": sourceURL,
} }
log.WithFields(log.Fields{
"e621_post_id": e621PostID,
"source_url": sourceURL,
}).Trace("neo4j: creating post to source link")
_, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer) _, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer)
if err != nil { if err != nil {
return err return err
@ -53,7 +43,7 @@ func EstablishPostToSourceLink(ctx context.Context, driver neo4j.DriverWithConte
return nil return nil
} }
func EstablishUserToPostLink(ctx context.Context, driver neo4j.DriverWithContext, e621PostID model.PostID, e621UserID model.UserID) error { func EstablishUserToPostLink(ctx context.Context, driver neo4j.DriverWithContext, e621PostID int64, e621UserID int64) error {
query := ` query := `
MATCH (p:e621Post {e621PostID: $e621PostID}) MATCH (p:e621Post {e621PostID: $e621PostID})
MATCH (u:e621User {e621ID: $e621ID}) MATCH (u:e621User {e621ID: $e621ID})
@ -63,10 +53,7 @@ func EstablishUserToPostLink(ctx context.Context, driver neo4j.DriverWithContext
"e621PostID": e621PostID, "e621PostID": e621PostID,
"e621ID": e621UserID, "e621ID": e621UserID,
} }
log.WithFields(log.Fields{
"e621_post_id": e621PostID,
"e621_user_id": e621UserID,
}).Trace("neo4j: creating user to post link")
_, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer) _, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer)
if err != nil { if err != nil {
return err return err
@ -75,8 +62,8 @@ func EstablishUserToPostLink(ctx context.Context, driver neo4j.DriverWithContext
return nil return nil
} }
// CheckUserToPostLink gives back a bool if the connection between the post and the user exists // CheckUserToPostRelationship gives back a bool if the connection between the post and the user exists
func CheckUserToPostLink(ctx context.Context, driver neo4j.DriverWithContext, e621PostID model.PostID, e621UserID model.UserID) (bool, error) { func RelationshipCheckUserToPost(ctx context.Context, driver neo4j.DriverWithContext, e621PostID int64, e621UserID int64) (bool, error) {
query := ` query := `
MATCH (user:e621User {e621ID: $e621ID})-[favorite:IS_FAVORITE]->(post:e621Post {e621PostID: $e621PostID}) MATCH (user:e621User {e621ID: $e621ID})-[favorite:IS_FAVORITE]->(post:e621Post {e621PostID: $e621PostID})
RETURN COUNT(favorite) > 0 AS isFavorite RETURN COUNT(favorite) > 0 AS isFavorite
@ -86,10 +73,6 @@ func CheckUserToPostLink(ctx context.Context, driver neo4j.DriverWithContext, e6
"e621ID": e621UserID, "e621ID": e621UserID,
} }
log.WithFields(log.Fields{
"e621_post_id": e621PostID,
"e621_user_id": e621UserID,
}).Trace("neo4j: check user post relationship")
result, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer) result, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer)
if err != nil { if err != nil {
return false, err return false, err

View File

@ -7,7 +7,8 @@ import (
func CreateSourceNode(ctx context.Context, driver neo4j.DriverWithContext, URL string) error { func CreateSourceNode(ctx context.Context, driver neo4j.DriverWithContext, URL string) error {
query := ` query := `
MERGE (u:Source {URL: $url}); MERGE (u:Source {URL: $url})
RETURN u
` `
params := map[string]any{ params := map[string]any{
"url": URL, "url": URL,

View File

@ -2,13 +2,14 @@ package neo4j
import ( import (
"context" "context"
"git.dragse.it/anthrove/e621-to-graph/internal/database/neo4j/model" "e621_to_neo4j/database/neo4j/models"
"github.com/neo4j/neo4j-go-driver/v5/neo4j" "github.com/neo4j/neo4j-go-driver/v5/neo4j"
) )
func CreateTagNode(ctx context.Context, driver neo4j.DriverWithContext, name string, tagType string) error { func CreateTagNode(ctx context.Context, driver neo4j.DriverWithContext, name string, tagType string) error {
query := ` query := `
MERGE (u:e621Tag {e621Tag: $name, e621TagType: $tagType}); MERGE (u:e621Tag {e621Tag: $name, e621TagType: $tagType})
RETURN u
` `
params := map[string]interface{}{ params := map[string]interface{}{
"name": name, "name": name,
@ -23,13 +24,13 @@ func CreateTagNode(ctx context.Context, driver neo4j.DriverWithContext, name str
return nil return nil
} }
func GetTagNodeByName(ctx context.Context, driver neo4j.DriverWithContext, name string) (model.DBTag, bool, error) { func GetTagNode(ctx context.Context, driver neo4j.DriverWithContext, name string) (models.DBTag, bool, error) {
var tag model.DBTag var tag models.DBTag
query := ` query := `
MATCH (u:e621Tag {e621Tag: $name}) MATCH (u:e621Tag {e621Tag: $name})
RETURN u.e621Tag AS e621Tag, u.e621TagType AS e621TagType; RETURN u.e621Tag as e621Tag, u.e621TagType as e621TagType
` `
params := map[string]interface{}{ params := map[string]interface{}{
@ -47,7 +48,7 @@ func GetTagNodeByName(ctx context.Context, driver neo4j.DriverWithContext, name
e621Tag, _, _ := neo4j.GetRecordValue[string](record, "e621Tag") e621Tag, _, _ := neo4j.GetRecordValue[string](record, "e621Tag")
e621TagType, _, _ := neo4j.GetRecordValue[string](record, "e621TagType") e621TagType, _, _ := neo4j.GetRecordValue[string](record, "e621TagType")
tag = model.DBTag{ tag = models.DBTag{
Tag: e621Tag, Tag: e621Tag,
TagType: e621TagType, TagType: e621TagType,
} }

24
database/neo4j/user.go Normal file
View File

@ -0,0 +1,24 @@
package neo4j
import (
"context"
"e621_to_neo4j/e621/models"
"github.com/neo4j/neo4j-go-driver/v5/neo4j"
)
func CreateUserNode(ctx context.Context, driver neo4j.DriverWithContext, user models.E621User) error {
query := `
MERGE (u:e621User {e621ID: $id, e621Username: $name})
RETURN u
`
params := map[string]interface{}{
"id": user.ID,
"name": user.Name,
}
_, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer)
if err != nil {
return err
}
return nil
}

View File

@ -1,11 +0,0 @@
services:
app:
restart: unless-stopped
image: anthrove/e621-to-graph:latest
build:
context: ../.
dockerfile: build/package/Dockerfile
ports:
- 8080:8080
env_file: .env

8
docker-compose.yaml Normal file
View File

@ -0,0 +1,8 @@
services:
app:
restart: unless-stopped
build: .
ports:
- 8080:8080
env_file: .env

31
e621/client.go Normal file
View File

@ -0,0 +1,31 @@
package e621
import (
"e621_to_neo4j/utils"
"golang.org/x/time/rate"
"net/http"
)
const (
baseURL = "https://e621.net"
)
// Client represents the e621 API client.
type Client struct {
apiKey string
username string
client *http.Client
limiter *rate.Limiter
queue *utils.Queue
}
// NewClient creates a new e621 API client.
func NewClient(apiKey string, username string) *Client {
return &Client{
apiKey: apiKey,
username: username,
client: &http.Client{},
limiter: rate.NewLimiter(1, 2),
queue: &utils.Queue{},
}
}

39
e621/favorite.go Normal file
View File

@ -0,0 +1,39 @@
package e621
import (
"e621_to_neo4j/e621/models"
"fmt"
log "github.com/sirupsen/logrus"
)
// GetFavorites retrieves all favorites from the e621 API.
func (c *Client) GetFavorites(user models.E621User) ([]models.Post, error) {
page := 1
var allFavorites []models.Post
var URIPath string
for {
URIPath = fmt.Sprintf("favorites.json?user_id=%d&limit=%d&page=%d", user.ID, page, 320)
log.WithFields(log.Fields{
"id": user.ID,
"fav_page": page,
"uri": URIPath,
}).Debug("Requesting API for favorites")
favorite, err := ExecuteGetAPIRequest[models.PostResponseWrapper](c, URIPath)
if err != nil {
log.Printf(err.Error())
}
// Append the fetched posts to the result slice
allFavorites = append(allFavorites, favorite.Posts...)
// If no more posts are returned, return the accumulated favorites
if len(favorite.Posts) == 0 {
return allFavorites, nil
}
// Update the last post ID for the next page request
page += 1
}
}

102
e621/models/post.go Normal file
View File

@ -0,0 +1,102 @@
package models
import "encoding/json"
func UnmarshalE621Post(data []byte) (PostResponseWrapper, error) {
var r PostResponseWrapper
err := json.Unmarshal(data, &r)
return r, err
}
func (r *PostResponseWrapper) Marshal() ([]byte, error) {
return json.Marshal(r)
}
type PostResponseWrapper struct {
Posts []Post `json:"posts"`
}
type Post struct {
ID int64 `json:"id"`
CreatedAt string `json:"created_at"`
UpdatedAt string `json:"updated_at"`
File File `json:"file"`
Preview Preview `json:"preview"`
Sample Sample `json:"sample"`
Score Score `json:"score"`
Tags PostTags `json:"tags"`
LockedTags []interface{} `json:"locked_tags"`
ChangeSeq int64 `json:"change_seq"`
Flags Flags `json:"flags"`
Rating string `json:"rating"`
FavCount int64 `json:"fav_count"`
Sources []string `json:"sources"`
Pools []interface{} `json:"pools"`
Relationships Relationships `json:"relationships"`
ApproverID *int64 `json:"approver_id"`
UploaderID int64 `json:"uploader_id"`
Description string `json:"description"`
CommentCount int64 `json:"comment_count"`
IsFavorited bool `json:"is_favorited"`
HasNotes bool `json:"has_notes"`
Duration interface{} `json:"duration"`
}
type File struct {
Width int64 `json:"width"`
Height int64 `json:"height"`
EXT string `json:"ext"`
Size int64 `json:"size"`
Md5 string `json:"md5"`
URL string `json:"url"`
}
type Flags struct {
Pending bool `json:"pending"`
Flagged bool `json:"flagged"`
NoteLocked bool `json:"note_locked"`
StatusLocked bool `json:"status_locked"`
RatingLocked bool `json:"rating_locked"`
Deleted bool `json:"deleted"`
}
type Preview struct {
Width int64 `json:"width"`
Height int64 `json:"height"`
URL string `json:"url"`
}
type Relationships struct {
ParentID interface{} `json:"parent_id"`
HasChildren bool `json:"has_children"`
HasActiveChildren bool `json:"has_active_children"`
Children []interface{} `json:"children"`
}
type Sample struct {
Has bool `json:"has"`
Height int64 `json:"height"`
Width int64 `json:"width"`
URL string `json:"url"`
Alternates Alternates `json:"alternates"`
}
type Alternates struct {
}
type Score struct {
Up int64 `json:"up"`
Down int64 `json:"down"`
Total int64 `json:"total"`
}
type PostTags struct {
General []string `json:"general"`
Species []string `json:"species"`
Character []string `json:"character"`
Copyright []string `json:"copyright"`
Artist []string `json:"artist"`
Invalid []interface{} `json:"invalid"`
Lore []interface{} `json:"lore"`
Meta []string `json:"meta"`
}

83
e621/models/user.go Normal file
View File

@ -0,0 +1,83 @@
package models
import "encoding/json"
func UnmarshalE621User(data []byte) (E621User, error) {
var r E621User
err := json.Unmarshal(data, &r)
return r, err
}
func (r *E621User) Marshal() ([]byte, error) {
return json.Marshal(r)
}
type E621User struct {
WikiPageVersionCount int64 `json:"wiki_page_version_count"`
ArtistVersionCount int64 `json:"artist_version_count"`
PoolVersionCount int64 `json:"pool_version_count"`
ForumPostCount int64 `json:"forum_post_count"`
CommentCount int64 `json:"comment_count"`
FlagCount int64 `json:"flag_count"`
FavoriteCount int64 `json:"favorite_count"`
PositiveFeedbackCount int64 `json:"positive_feedback_count"`
NeutralFeedbackCount int64 `json:"neutral_feedback_count"`
NegativeFeedbackCount int64 `json:"negative_feedback_count"`
UploadLimit int64 `json:"upload_limit"`
ID int64 `json:"id"`
CreatedAt string `json:"created_at"`
Name string `json:"name"`
Level int64 `json:"level"`
BaseUploadLimit int64 `json:"base_upload_limit"`
PostUploadCount int64 `json:"post_upload_count"`
PostUpdateCount int64 `json:"post_update_count"`
NoteUpdateCount int64 `json:"note_update_count"`
IsBanned bool `json:"is_banned"`
CanApprovePosts bool `json:"can_approve_posts"`
CanUploadFree bool `json:"can_upload_free"`
LevelString string `json:"level_string"`
AvatarID int64 `json:"avatar_id"`
ShowAvatars bool `json:"show_avatars"`
BlacklistAvatars bool `json:"blacklist_avatars"`
BlacklistUsers bool `json:"blacklist_users"`
DescriptionCollapsedInitially bool `json:"description_collapsed_initially"`
HideComments bool `json:"hide_comments"`
ShowHiddenComments bool `json:"show_hidden_comments"`
ShowPostStatistics bool `json:"show_post_statistics"`
HasMail bool `json:"has_mail"`
ReceiveEmailNotifications bool `json:"receive_email_notifications"`
EnableKeyboardNavigation bool `json:"enable_keyboard_navigation"`
EnablePrivacyMode bool `json:"enable_privacy_mode"`
StyleUsernames bool `json:"style_usernames"`
EnableAutoComplete bool `json:"enable_auto_complete"`
HasSavedSearches bool `json:"has_saved_searches"`
DisableCroppedThumbnails bool `json:"disable_cropped_thumbnails"`
DisableMobileGestures bool `json:"disable_mobile_gestures"`
EnableSafeMode bool `json:"enable_safe_mode"`
DisableResponsiveMode bool `json:"disable_responsive_mode"`
DisablePostTooltips bool `json:"disable_post_tooltips"`
NoFlagging bool `json:"no_flagging"`
NoFeedback bool `json:"no_feedback"`
DisableUserDmails bool `json:"disable_user_dmails"`
EnableCompactUploader bool `json:"enable_compact_uploader"`
ReplacementsBeta bool `json:"replacements_beta"`
IsBdStaff bool `json:"is_bd_staff"`
UpdatedAt string `json:"updated_at"`
Email string `json:"email"`
LastLoggedInAt string `json:"last_logged_in_at"`
LastForumReadAt string `json:"last_forum_read_at"`
RecentTags string `json:"recent_tags"`
CommentThreshold int64 `json:"comment_threshold"`
DefaultImageSize string `json:"default_image_size"`
FavoriteTags string `json:"favorite_tags"`
BlacklistedTags string `json:"blacklisted_tags"`
TimeZone string `json:"time_zone"`
PerPage int64 `json:"per_page"`
CustomStyle string `json:"custom_style"`
APIRegenMultiplier int64 `json:"api_regen_multiplier"`
APIBurstLimit int64 `json:"api_burst_limit"`
RemainingAPILimit int64 `json:"remaining_api_limit"`
StatementTimeout int64 `json:"statement_timeout"`
FavoriteLimit int64 `json:"favorite_limit"`
TagQueryLimit int64 `json:"tag_query_limit"`
}

45
e621/request.go Normal file
View File

@ -0,0 +1,45 @@
package e621
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
)
func ExecuteGetAPIRequest[dataType any](c *Client, URIPath string) (*dataType, error) {
var err error
c.limiter.Wait(context.Background())
url := fmt.Sprintf("%s/%s", baseURL, URIPath)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "FavGetter (by Selloo)")
req.Header.Add("Accept", "application/json")
req.SetBasicAuth(c.username, c.apiKey)
resp, err := c.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("failed to retrieve posts: %s", resp.Status)
}
var r dataType
err = json.Unmarshal(body, &r)
if err != nil {
return nil, err
}
return &r, nil
}

21
e621/users.go Normal file
View File

@ -0,0 +1,21 @@
package e621
import (
"e621_to_neo4j/e621/models"
"fmt"
log "github.com/sirupsen/logrus"
)
// GetUserInfo retrieves the users information from e621 API.
func (c *Client) GetUserInfo(username string) (models.E621User, error) {
URIPath := fmt.Sprintf("users/%s.json", username)
log.WithFields(log.Fields{
"username": username,
"uri": URIPath,
}).Debug("Requesting API for user details")
user, err := ExecuteGetAPIRequest[models.E621User](c, URIPath)
if err != nil {
return models.E621User{}, err
}
return *user, nil
}

13
go.mod
View File

@ -1,17 +1,12 @@
module git.dragse.it/anthrove/e621-to-graph module e621_to_neo4j
go 1.21.3 go 1.20
require ( require (
git.dragse.it/anthrove/e621-sdk-go v1.3.0
github.com/caarlos0/env v3.5.0+incompatible github.com/caarlos0/env v3.5.0+incompatible
github.com/neo4j/neo4j-go-driver/v5 v5.8.1 github.com/neo4j/neo4j-go-driver/v5 v5.8.1
github.com/sirupsen/logrus v1.9.3 github.com/sirupsen/logrus v1.9.3
golang.org/x/time v0.3.0
) )
require ( require golang.org/x/sys v0.9.0 // indirect
github.com/joho/godotenv v1.5.1 // indirect
golang.org/x/sys v0.9.0 // indirect
golang.org/x/time v0.3.0 // indirect
)

6
go.sum
View File

@ -1,14 +1,8 @@
git.dragse.it/anthrove/e621-sdk-go v1.3.0 h1:sbjrOps4WxXf42kyNb8ZVxC6dCiFrCkqv4C8BgKkIGA=
git.dragse.it/anthrove/e621-sdk-go v1.3.0/go.mod h1:urFl0jjx2UK8Vz1tMI253CvWK8fZqd/a9+xdE8ZBwq4=
github.com/caarlos0/env v3.5.0+incompatible h1:Yy0UN8o9Wtr/jGHZDpCBLpNrzcFLLM2yixi/rBrKyJs= github.com/caarlos0/env v3.5.0+incompatible h1:Yy0UN8o9Wtr/jGHZDpCBLpNrzcFLLM2yixi/rBrKyJs=
github.com/caarlos0/env v3.5.0+incompatible/go.mod h1:tdCsowwCzMLdkqRYDlHpZCp2UooDD3MspDBjZ2AD02Y= github.com/caarlos0/env v3.5.0+incompatible/go.mod h1:tdCsowwCzMLdkqRYDlHpZCp2UooDD3MspDBjZ2AD02Y=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/jarcoal/httpmock v1.3.1 h1:iUx3whfZWVf3jT01hQTO/Eo5sAYtB2/rqaUuOtpInww=
github.com/jarcoal/httpmock v1.3.1/go.mod h1:3yb8rc4BI7TCBhFY8ng0gjuLKJNquuDNiPaZjnENuYg=
github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
github.com/neo4j/neo4j-go-driver/v5 v5.8.1 h1:IysKg6KJIUgyItmnHRRrt2N8srbd6znMslRW3qQErTQ= github.com/neo4j/neo4j-go-driver/v5 v5.8.1 h1:IysKg6KJIUgyItmnHRRrt2N8srbd6znMslRW3qQErTQ=
github.com/neo4j/neo4j-go-driver/v5 v5.8.1/go.mod h1:Vff8OwT7QpLm7L2yYr85XNWe9Rbqlbeb9asNXJTHO4k= github.com/neo4j/neo4j-go-driver/v5 v5.8.1/go.mod h1:Vff8OwT7QpLm7L2yYr85XNWe9Rbqlbeb9asNXJTHO4k=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=

View File

@ -1,39 +0,0 @@
package api
import (
"context"
"fmt"
"git.dragse.it/anthrove/e621-sdk-go/pkg/e621"
"git.dragse.it/anthrove/e621-to-graph/internal/service"
"git.dragse.it/anthrove/e621-to-graph/pkg/logic"
log "github.com/sirupsen/logrus"
"net/http"
)
// ScapeUserFavourites is the handler for the user API
func ScapeUserFavourites(ctx context.Context, graphConnection logic.GraphConnection, client *e621.Client) func(response http.ResponseWriter, request *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
w.WriteHeader(http.StatusMethodNotAllowed)
fmt.Fprintf(w, "only POST requests are allowed")
return
}
username := r.FormValue("username")
if username == "" {
w.WriteHeader(http.StatusBadRequest)
fmt.Fprintf(w, "username is required")
return
}
// Perform further processing with the username
go service.ScrapeUser(ctx, graphConnection, client, username)
// Send a response
w.WriteHeader(http.StatusOK)
log.WithFields(log.Fields{
"requested_user": username,
}).Info("api: processing user")
}
}

View File

@ -1,40 +0,0 @@
package config
import (
"fmt"
"github.com/caarlos0/env"
"strings"
)
type Config struct {
E621APIKey string `env:"E621_API_KEY,required"`
E621Username string `env:"E621_USERNAME,required"`
DBType string `env:"DB_TYPE,required"`
DBEndpoint string `env:"DB_URL,required"`
DBUsername string `env:"DB_USERNAME,required"`
DBPassword string `env:"DB_PASSWORD,required"`
LogLevel string `env:"LOG_LEVEL" envDefault:"INFO"`
LogFormat string `env:"LOG_FORMAT" envDefault:"PLAIN"`
Neo4jDebug bool `env:"NEO4J_DEBUG" envDefault:"FALSE"`
}
// LoadConfig loads the configuration from environment variables
func LoadConfig() (*Config, error) {
config := &Config{}
if err := env.Parse(config); err != nil {
return nil, fmt.Errorf("config: error parsing configuration: %w", err)
}
logLevel := strings.ToUpper(config.LogLevel)
logFormat := strings.ToUpper(config.LogFormat)
if logLevel != "FATAL" && logLevel != "ERROR" && logLevel != "WARN" && logLevel != "INFO" && logLevel != "DEBUG" && logLevel != "TRACE" {
return nil, fmt.Errorf("config: valid levels for erros are: FATAL, ERROR, WARN, INFO, DEBUG, TRACE")
}
if logFormat != "PLAIN" && logFormat != "JSON" {
return nil, fmt.Errorf("config: valit formatters are: PLAIN, JSON")
}
return config, nil
}

View File

@ -1,45 +0,0 @@
package neo4j
import (
"fmt"
neo4jLog "github.com/neo4j/neo4j-go-driver/v5/neo4j/log"
log "github.com/sirupsen/logrus"
)
type neo4jLogger struct {
neo4jDebug bool
}
func NewNeo4jLogger(neo4jDebug bool) neo4jLog.Logger {
return &neo4jLogger{neo4jDebug: neo4jDebug}
}
func (n neo4jLogger) Error(name string, id string, err error) {
log.WithFields(log.Fields{
"name": name,
"id": id,
}).Errorf("neo4j: %s", err)
}
func (n neo4jLogger) Warnf(name string, id string, msg string, args ...any) {
log.WithFields(log.Fields{
"name": name,
"id": id,
}).Warnf("neo4j: %v", fmt.Sprintf(msg, args...))
}
func (n neo4jLogger) Infof(name string, id string, msg string, args ...any) {
log.WithFields(log.Fields{
"name": name,
"id": id,
}).Infof("neo4j: %v", fmt.Sprintf(msg, args...))
}
func (n neo4jLogger) Debugf(name string, id string, msg string, args ...any) {
if n.neo4jDebug {
log.WithFields(log.Fields{
"name": name,
"id": id,
}).Debugf("neo4j: %v", fmt.Sprintf(msg, args...))
}
}

View File

@ -1,54 +0,0 @@
package neo4j
import (
"context"
"git.dragse.it/anthrove/e621-sdk-go/pkg/e621/model"
"github.com/neo4j/neo4j-go-driver/v5/neo4j"
)
func CreateUserNode(ctx context.Context, driver neo4j.DriverWithContext, user model.User) error {
query := `
MERGE (u:e621User {e621ID: $id, e621Username: $name});
`
params := map[string]interface{}{
"id": user.ID,
"name": user.Name,
}
_, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer)
if err != nil {
return err
}
return nil
}
func GetUserFavoritesCount(ctx context.Context, driver neo4j.DriverWithContext, userID model.UserID) (int64, error) {
var userFavoriteCount int64
query := `
MATCH (:e621User {e621ID: $userID})-[:IS_FAVORITE]->(:e621Post)
RETURN count(*) AS numberOfFavoritedPosts;
`
params := map[string]interface{}{
"userID": userID,
}
result, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer)
if err != nil {
return 0, err
}
if len(result.Records) == 0 {
// no matches -> user does not exist, return count 0
return userFavoriteCount, err
}
record := result.Records[0]
userFavoriteCount, _, err = neo4j.GetRecordValue[int64](record, "numberOfFavoritedPosts")
if err != nil {
return userFavoriteCount, err
}
return userFavoriteCount, nil
}

69
main.go Normal file
View File

@ -0,0 +1,69 @@
package main
import (
"context"
"e621_to_neo4j/api"
"e621_to_neo4j/database"
neo4j "e621_to_neo4j/database/neo4j"
"e621_to_neo4j/e621"
"e621_to_neo4j/utils"
log "github.com/sirupsen/logrus"
"net/http"
"os"
"strings"
)
func init() {
// Log as JSON instead of the default ASCII formatter.
//log.SetFormatter(&log.JSONFormatter{})
// Output to stdout instead of the default stderr
// Can be any io.Writer, see below for File example
log.SetOutput(os.Stdout)
// Only log the warning severity or above.
log.SetLevel(log.DebugLevel)
// Logging Method Name
//log.SetReportCaller(true)
}
func main() {
var graphConnection database.GraphConnection
ctx := context.Background()
// Loads Config
config, err := utils.LoadConfig()
if err != nil {
log.Println(err)
}
switch strings.ToLower(config.DBType) {
case "neo4j":
log.Println("Setup Neo4J Connection")
graphConnection = neo4j.NewNeo4JConnection()
err = graphConnection.Connect(ctx, config.DBEndpoint, config.DBUsername, config.DBPassword)
if err != nil {
panic(err)
}
log.Println("Connection successful")
default:
panic("No Database was selected!")
}
// Initialize the e621API
e621Client := e621.NewClient(config.E621APIKey, config.E621Username)
log.Printf("Im ready!")
// Register the UserHandler with the "/user" route
http.HandleFunc("/user", api.UserHandler(ctx, graphConnection, e621Client))
// Start the HTTP server
err = http.ListenAndServe(":8080", nil)
if err != nil {
return
}
}

View File

@ -1,19 +0,0 @@
package logic
import (
"context"
"git.dragse.it/anthrove/e621-sdk-go/pkg/e621/model"
)
type GraphConnection interface {
Connect(ctx context.Context, endpoint string, username string, password string) error
UploadUser(ctx context.Context, user model.User) error
UploadSource(ctx context.Context, SourceURL string) error
UploadPost(ctx context.Context, e621ID model.PostID) error
UploadTag(ctx context.Context, name string, tagType string) error
EstablishPostToTagLink(ctx context.Context, e621PostID model.PostID, e621Tag string) error
EstablishPostToSourceLink(ctx context.Context, e621PostID model.PostID, sourceURL string) error
EstablishUserToPostLink(ctx context.Context, e621PostID model.PostID, e621UserID model.UserID) error
CheckUserToPostLink(ctx context.Context, e621PostID model.PostID, e621UserID model.UserID) (bool, error)
GetUserFavoriteCount(ctx context.Context, userID model.UserID) (int64, error)
}

View File

@ -1,44 +0,0 @@
package queue
import (
"errors"
log "github.com/sirupsen/logrus"
)
type Queue struct {
tasks []SchedulerTask
notifyChannel chan bool
}
func NewQueue() Queue {
return Queue{
notifyChannel: make(chan bool),
}
}
// WaitForElement need to be called everytime before Popping an Element!
// Also, it is required to have this function called in a separate go-routine because Push use the NotifyChannel in the
// routine. So the thread waits, till WaitForElement pulls the Item from the channel
// FIXME this should be fixed in future. require more discussion what is the best way.
func (queue *Queue) WaitForElement() {
log.Debug("queue: waiting for element")
_ = <-queue.notifyChannel
}
func (queue *Queue) Pop() (SchedulerTask, error) {
if len(queue.tasks) == 0 {
return nil, errors.New("try to remove an element of a empty queue")
}
task := queue.tasks[0]
queue.tasks = queue.tasks[1:]
return task, nil
}
func (queue *Queue) Push(task SchedulerTask) error {
if task == nil {
return errors.New("try to add task but task is empty")
}
queue.tasks = append(queue.tasks, task)
queue.notifyChannel <- true
return nil
}

View File

@ -1,13 +0,0 @@
package queue
import (
"net/http"
)
type SchedulerTask interface {
UriPath() string
SendStatusCode(statusCode int)
SendError(err error)
SendResponse(response *http.Response)
BasicAuth() (string, string)
}

View File

@ -1 +0,0 @@
docker run -it -p 7687:7687 -p 7444:7444 -p 3000:3000 -v mg_lib:/var/lib/memgraph memgraph/memgraph-platform

View File

@ -1,164 +1,143 @@
package service package services
import ( import (
"context" "context"
"git.dragse.it/anthrove/e621-sdk-go/pkg/e621" "e621_to_neo4j/database"
"git.dragse.it/anthrove/e621-sdk-go/pkg/e621/model" "e621_to_neo4j/e621"
"git.dragse.it/anthrove/e621-to-graph/pkg/logic" "e621_to_neo4j/e621/models"
"git.dragse.it/anthrove/e621-to-graph/pkg/util" "e621_to_neo4j/utils"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
"time" "time"
) )
func ScrapeUser(ctx context.Context, graphConnection logic.GraphConnection, client *e621.Client, username string) error { func ScrapeUser(ctx context.Context, graphConnection database.GraphConnection, e621Client e621.Client, username string) error {
var err error var err error
scrapeTime := time.Now() e621User, err := e621Client.GetUserInfo(username)
e621User, err := client.GetUserByName(username).Execute()
if err != nil { if err != nil {
return err return err
} }
if e621User.IsBanned { if e621User.IsBanned {
log.WithFields(log.Fields{ log.WithFields(log.Fields{
"e621_username": e621User.Name, "user": e621User.Name,
"e621_user_id": e621User.ID, "id": e621User.ID,
"e621_user_bann": e621User.IsBanned, "bann": e621User.IsBanned,
}).Info("service: user is Banned") }).Info("User is Banned")
return nil return nil
} }
log.WithFields(log.Fields{
"user": e621User.Name,
"id": e621User.ID,
}).Info("Processing user")
err = graphConnection.UploadUser(ctx, e621User) err = graphConnection.UploadUser(ctx, e621User)
if err != nil { if err != nil {
return err log.Fatal(err)
}
currentDBFavCount, err := graphConnection.GetUserFavoriteCount(ctx, e621User.ID)
if err != nil {
return err
}
favoriteBuilder, err := client.GetFavoritesForUser(e621User.Name)
if err != nil {
return err
}
if currentDBFavCount > e621User.FavoriteCount {
//TODO: IMPLEMENT USER MARKED FOR DELETED FAVS
log.WithFields(log.Fields{
"e621_username": e621User.Name,
"e621_user_id": e621User.ID,
"e621_current_db_favorite_count": currentDBFavCount,
"e621_user_favorite_count": e621User.FavoriteCount,
}).Debug("service: user has favorites deleted")
}
var pageIndex = 1
for currentDBFavCount < e621User.FavoriteCount {
favorites, err := favoriteBuilder.Page(pageIndex).Execute()
if err != nil {
return err
}
if len(favorites) <= 0 {
return nil
}
for _, favorite := range favorites {
if currentDBFavCount == e621User.FavoriteCount {
break
}
isFaved, err := graphConnection.CheckUserToPostLink(ctx, favorite.ID, e621User.ID)
if err != nil {
return err
}
if !isFaved {
err = uploadDataToDB(ctx, graphConnection, favorite, e621User)
if err != nil {
return err
}
currentDBFavCount++
}
}
pageIndex++
} }
log.WithFields(log.Fields{ log.WithFields(log.Fields{
"e621_username": e621User.Name, "user": e621User.Name,
"e621_user_id": e621User.ID, "id": e621User.ID,
"post_amount": e621User.FavoriteCount, }).Info("Getting favorites for user")
"scrape_time": time.Since(scrapeTime),
}).Info("service: finished processing favorites")
return nil
}
func uploadDataToDB(ctx context.Context, graphConnection logic.GraphConnection, favorite model.Post, e621User model.User) error {
start := time.Now() start := time.Now()
err := uploadNodes(ctx, graphConnection, favorite) userFavorites, err := e621Client.GetFavorites(e621User)
if err != nil { if err != nil {
return err log.Fatal(err)
} }
log.WithFields(log.Fields{ log.WithFields(log.Fields{
"e621_username": e621User.Name, "user": e621User.Name,
"e621_user_id": e621User.ID, "id": e621User.ID,
"post_id": favorite.ID, "post_amount": len(userFavorites),
"upload_time": time.Since(start), "scrape_time": time.Since(start),
}).Debug("service: uploaded post") }).Info("Getting favorites for user")
startUploadPosts := time.Now()
// Uploads all Tags, Posts as Nodes to Neo4j
for i, post := range userFavorites {
if exists, err := graphConnection.CheckUserToPostLink(ctx, post.ID, e621User.ID); err == nil && exists {
log.WithFields(log.Fields{
"user": e621User.Name,
"id": e621User.ID,
"last_post_id": post.ID,
}).Info("No new favorites found")
break
} else if err != nil {
return err
}
start = time.Now() start = time.Now()
err = uploadPostToUserRelationship(ctx, graphConnection, favorite, e621User) err = uploadNodes(ctx, graphConnection, post)
if err != nil {
return err
}
log.WithFields(log.Fields{
"user": e621User.Name,
"id": e621User.ID,
"post_number": i,
"post_amount": len(userFavorites),
"post_id": post.ID,
"upload_time": time.Since(start),
}).Debug("Uploading post")
start := time.Now()
err = uploadPostToUserRelationship(ctx, graphConnection, post, e621User)
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
return err return err
} }
err = uploadSourceTagRelationship(ctx, graphConnection, favorite) err = uploadSourceTagRelationship(ctx, graphConnection, post)
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
return err return err
} }
err = uploadGeneralTagRelationship(ctx, graphConnection, favorite) err = uploadGeneralTagRelationship(ctx, graphConnection, post)
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
return err return err
} }
err = uploadCharacterTagtRelationship(ctx, graphConnection, favorite) err = uploadCharacterTagtRelationship(ctx, graphConnection, post)
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
return err return err
} }
err = uploadCopyrightTagRelationship(ctx, graphConnection, favorite) err = uploadCopyrightTagRelationship(ctx, graphConnection, post)
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
return err return err
} }
err = uploadArtistTagRelationship(ctx, graphConnection, favorite) err = uploadArtistTagRelationship(ctx, graphConnection, post)
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
return err return err
} }
log.WithFields(log.Fields{ log.WithFields(log.Fields{
"e621_username": e621User.Name, "user": e621User.Name,
"e621_user_id": e621User.ID, "id": e621User.ID,
"post_id": favorite.ID, "post_number": i,
"post_amount": len(userFavorites),
"post_id": post.ID,
"upload_time": time.Since(start), "upload_time": time.Since(start),
}).Debug("service: made relationship") }).Debug("Making relationship")
}
log.WithFields(log.Fields{
"user": e621User.Name,
"id": e621User.ID,
"upload_time": time.Since(startUploadPosts),
}).Info("Upload to Database finished")
return nil return nil
} }
// uploadNodes uploads the post to the database and creates the nodes // uploadNodes uploads the post to the database and creates the nodes
func uploadNodes(ctx context.Context, graphConnection logic.GraphConnection, post model.Post) error { func uploadNodes(ctx context.Context, graphConnection database.GraphConnection, post models.Post) error {
uniqueGeneralTags := make([]string, 0) uniqueGeneralTags := make([]string, 0)
uniqueCharacterTags := make([]string, 0) uniqueCharacterTags := make([]string, 0)
@ -175,10 +154,10 @@ func uploadNodes(ctx context.Context, graphConnection logic.GraphConnection, pos
allCopyrightTags = append(allCopyrightTags, post.Tags.Copyright...) allCopyrightTags = append(allCopyrightTags, post.Tags.Copyright...)
allArtistTags = append(allArtistTags, post.Tags.Artist...) allArtistTags = append(allArtistTags, post.Tags.Artist...)
uniqueGeneralTags = util.UniqueNonEmptyElementsOf(allGeneralTags) uniqueGeneralTags = utils.UniqueNonEmptyElementsOf(allGeneralTags)
uniqueCharacterTags = util.UniqueNonEmptyElementsOf(allCharacterTags) uniqueCharacterTags = utils.UniqueNonEmptyElementsOf(allCharacterTags)
uniqueCopyrightTags = util.UniqueNonEmptyElementsOf(allCopyrightTags) uniqueCopyrightTags = utils.UniqueNonEmptyElementsOf(allCopyrightTags)
uniqueArtistTags = util.UniqueNonEmptyElementsOf(allArtistTags) uniqueArtistTags = utils.UniqueNonEmptyElementsOf(allArtistTags)
err := graphConnection.UploadPost(ctx, post.ID) err := graphConnection.UploadPost(ctx, post.ID)
if err != nil { if err != nil {
@ -224,7 +203,7 @@ func uploadNodes(ctx context.Context, graphConnection logic.GraphConnection, pos
} }
// uploadPostToUserRelationship creates a relationship between the user and the post // uploadPostToUserRelationship creates a relationship between the user and the post
func uploadPostToUserRelationship(ctx context.Context, graphConnection logic.GraphConnection, post model.Post, e621User model.User) error { func uploadPostToUserRelationship(ctx context.Context, graphConnection database.GraphConnection, post models.Post, e621User models.E621User) error {
err := graphConnection.EstablishUserToPostLink(ctx, post.ID, e621User.ID) err := graphConnection.EstablishUserToPostLink(ctx, post.ID, e621User.ID)
if err != nil { if err != nil {
return err return err
@ -234,7 +213,7 @@ func uploadPostToUserRelationship(ctx context.Context, graphConnection logic.Gra
} }
// uploadSourceTagRelationship creates a relationship between the post and the source // uploadSourceTagRelationship creates a relationship between the post and the source
func uploadSourceTagRelationship(ctx context.Context, graphConnection logic.GraphConnection, post model.Post) error { func uploadSourceTagRelationship(ctx context.Context, graphConnection database.GraphConnection, post models.Post) error {
for _, source := range post.Sources { for _, source := range post.Sources {
err := graphConnection.EstablishPostToSourceLink(ctx, post.ID, source) err := graphConnection.EstablishPostToSourceLink(ctx, post.ID, source)
if err != nil { if err != nil {
@ -247,7 +226,7 @@ func uploadSourceTagRelationship(ctx context.Context, graphConnection logic.Grap
} }
// uploadGeneralTagRelationship creates a relationship between the post and the general tag // uploadGeneralTagRelationship creates a relationship between the post and the general tag
func uploadGeneralTagRelationship(ctx context.Context, graphConnection logic.GraphConnection, post model.Post) error { func uploadGeneralTagRelationship(ctx context.Context, graphConnection database.GraphConnection, post models.Post) error {
for _, generalTag := range post.Tags.General { for _, generalTag := range post.Tags.General {
err := graphConnection.EstablishPostToTagLink(ctx, post.ID, generalTag) err := graphConnection.EstablishPostToTagLink(ctx, post.ID, generalTag)
if err != nil { if err != nil {
@ -260,7 +239,7 @@ func uploadGeneralTagRelationship(ctx context.Context, graphConnection logic.Gra
} }
// uploadCharacterTagtRelationship creates a relationship between the post and the character tag // uploadCharacterTagtRelationship creates a relationship between the post and the character tag
func uploadCharacterTagtRelationship(ctx context.Context, graphConnection logic.GraphConnection, post model.Post) error { func uploadCharacterTagtRelationship(ctx context.Context, graphConnection database.GraphConnection, post models.Post) error {
for _, characterTag := range post.Tags.Character { for _, characterTag := range post.Tags.Character {
err := graphConnection.EstablishPostToTagLink(ctx, post.ID, characterTag) err := graphConnection.EstablishPostToTagLink(ctx, post.ID, characterTag)
if err != nil { if err != nil {
@ -273,20 +252,20 @@ func uploadCharacterTagtRelationship(ctx context.Context, graphConnection logic.
} }
// uploadCopyrightTagRelationship creates a relationship between the post and the copyright tag // uploadCopyrightTagRelationship creates a relationship between the post and the copyright tag
func uploadCopyrightTagRelationship(ctx context.Context, graphConnection logic.GraphConnection, post model.Post) error { func uploadCopyrightTagRelationship(ctx context.Context, graphConnection database.GraphConnection, post models.Post) error {
for _, copyrightTag := range post.Tags.Copyright { for _, copyrightTag := range post.Tags.Copyright {
err := graphConnection.EstablishPostToTagLink(ctx, post.ID, copyrightTag) err := graphConnection.EstablishPostToTagLink(ctx, post.ID, copyrightTag)
if err != nil { if err != nil {
return err return err
} }
// log.Printf("Created PostToTagRelationship for post: %d to copyright tag: %s", post.ID, copyrightTag) // log.Printf("Created PostToTagRelationship for post: %d to copyrigh tag: %s", post.ID, copyrightTag)
} }
return nil return nil
} }
// uploadArtistTagRelationship creates a relationship between the post and the artist tag // uploadArtistTagRelationship creates a relationship between the post and the artist tag
func uploadArtistTagRelationship(ctx context.Context, graphConnection logic.GraphConnection, post model.Post) error { func uploadArtistTagRelationship(ctx context.Context, graphConnection database.GraphConnection, post models.Post) error {
for _, artistTag := range post.Tags.Artist { for _, artistTag := range post.Tags.Artist {
err := graphConnection.EstablishPostToTagLink(ctx, post.ID, artistTag) err := graphConnection.EstablishPostToTagLink(ctx, post.ID, artistTag)
if err != nil { if err != nil {

25
utils/config.go Normal file
View File

@ -0,0 +1,25 @@
package utils
import (
"fmt"
"github.com/caarlos0/env"
)
type Config struct {
E621APIKey string `env:"E621_API_KEY,required"`
E621Username string `env:"E621_USERNAME,required"`
DBType string `env:"DB_TYPE,required"`
DBEndpoint string `env:"DB_URL,required"`
DBUsername string `env:"DB_USERNAME,required"`
DBPassword string `env:"DB_PASSWORD,required"`
}
// LoadConfig loads the configuration from environment variables
func LoadConfig() (*Config, error) {
config := &Config{}
if err := env.Parse(config); err != nil {
return nil, fmt.Errorf("error parsing configuration: %w", err)
}
return config, nil
}

View File

@ -1,4 +1,4 @@
package util package utils
// UniqueNonEmptyElementsOf returns a new slice containing unique non-empty elements from the input slice. // UniqueNonEmptyElementsOf returns a new slice containing unique non-empty elements from the input slice.
// It removes duplicate elements and empty strings while preserving the order of appearance. // It removes duplicate elements and empty strings while preserving the order of appearance.

33
utils/queue.go Normal file
View File

@ -0,0 +1,33 @@
package utils
import (
"errors"
)
type Task struct {
URIPath string `json:"url,omitempty" :"url"`
Methode string `json:"method,omitempty" :"method"`
Channel chan any `:"channel"`
}
type Queue struct {
elements []Task
}
func (queue *Queue) Pop() (Task, error) {
if len(queue.elements) == 0 {
return Task{}, errors.New("try to remove an element of a empty queue")
}
task := queue.elements[0]
queue.elements = queue.elements[1:]
return task, nil
}
func (queue *Queue) Push(task Task) error {
empty := Task{}
if task == empty {
return errors.New("try to add task but task is empty")
}
queue.elements = append(queue.elements, task)
return nil
}

View File

@ -1,65 +1,45 @@
package queue package utils
import ( import (
"net/http"
"reflect" "reflect"
"testing" "testing"
) )
type schedulerTaskImplDummy struct {
}
func (s schedulerTaskImplDummy) BasicAuth() (string, string) {
return "", ""
}
func (s schedulerTaskImplDummy) UriPath() string {
return ""
}
func (s schedulerTaskImplDummy) SendError(_ error) {
return
}
func (s schedulerTaskImplDummy) SendStatusCode(_ int) {
return
}
func (s schedulerTaskImplDummy) SendResponse(_ *http.Response) {
return
}
func TestQueue_Pop(t *testing.T) { func TestQueue_Pop(t *testing.T) {
type fields struct { type fields struct {
elements []SchedulerTask elements []Task
} }
tests := []struct { tests := []struct {
name string name string
fields fields fields fields
want SchedulerTask want Task
wantErr bool wantErr bool
}{ }{
{ {
name: "Pop element of empty list", name: "Pop element of empty list",
fields: fields{}, fields: fields{},
want: nil, want: Task{},
wantErr: true, wantErr: true,
}, },
{ {
name: "Pop element of a filled list with three elements", name: "Pop element of a filled list with three elements",
fields: fields{elements: []SchedulerTask{ fields: fields{elements: []Task{
schedulerTaskImplDummy{}, {URIPath: "https://e621.net0....", Methode: "GET", Channel: nil},
schedulerTaskImplDummy{}, {URIPath: "https://e621.net1....", Methode: "GET", Channel: nil},
schedulerTaskImplDummy{}, {URIPath: "https://e621.net2....", Methode: "GET", Channel: nil},
}}, }},
want: schedulerTaskImplDummy{}, want: Task{
URIPath: "https://e621.net0....",
Methode: "GET",
Channel: nil,
},
wantErr: false, wantErr: false,
}, },
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
queue := &Queue{ queue := &Queue{
tasks: tt.fields.elements, elements: tt.fields.elements,
} }
got, err := queue.Pop() got, err := queue.Pop()
if (err != nil) != tt.wantErr { if (err != nil) != tt.wantErr {
@ -75,32 +55,42 @@ func TestQueue_Pop(t *testing.T) {
func TestQueue_Push(t *testing.T) { func TestQueue_Push(t *testing.T) {
t.Run("Push tasks to empty queue", func(t *testing.T) { t.Run("Push tasks to empty queue", func(t *testing.T) {
queue := Queue{tasks: []SchedulerTask{}, notifyChannel: make(chan bool)} queue := Queue{elements: []Task{}}
go queue.WaitForElement() task := Task{
task := schedulerTaskImplDummy{} URIPath: "http://e621.net0....",
Methode: "GET",
Channel: nil,
}
err := queue.Push(task) err := queue.Push(task)
if err != nil { if err != nil {
t.Errorf("Push() error = %v", err) t.Errorf("Push() error = %v", err)
} }
if len(queue.tasks) != 1 { if len(queue.elements) != 1 {
t.Errorf("Push() error = queue is not one") t.Errorf("Push() error = queue is not one")
} }
if queue.tasks[0] != task { if queue.elements[0] != task {
t.Errorf("Push() error = wrong queue task in queue") t.Errorf("Push() error = wrong queue task in queue")
} }
}) })
t.Run("Push tasks to filled queue", func(t *testing.T) { t.Run("Push tasks to filled queue", func(t *testing.T) {
queue := Queue{tasks: []SchedulerTask{schedulerTaskImplDummy{}}, notifyChannel: make(chan bool)} queue := Queue{elements: []Task{{
go queue.WaitForElement() URIPath: "http://e621.net0....",
task := schedulerTaskImplDummy{} Methode: "GET",
Channel: nil,
}}}
task := Task{
URIPath: "http://e621.net1....",
Methode: "GET",
Channel: nil,
}
err := queue.Push(task) err := queue.Push(task)
if err != nil { if err != nil {
t.Errorf("Push() error = %v", err) t.Errorf("Push() error = %v", err)
} }
if len(queue.tasks) != 2 { if len(queue.elements) != 2 {
t.Errorf("Push() error = queue is not two") t.Errorf("Push() error = queue is not two")
} }
if queue.tasks[1] != task { if queue.elements[1] != task {
t.Errorf("Push() error = wrong queue task in queue") t.Errorf("Push() error = wrong queue task in queue")
} }
}) })