new_scrape_algorithm_#5 (#20)
First implementation of the new algorithm that got proposed in issue #5 Reviewed-on: anthrove/e621-to-graph#20 Reviewed-by: Lennard Brinkhaus <lennard.brinkhaus@noreply.localhost> Reviewed-by: daskadse <daskadse@noreply.localhost> Co-authored-by: SoXX <soxx@fenpa.ws> Co-committed-by: SoXX <soxx@fenpa.ws>
This commit is contained in:
parent
60b3502ee3
commit
3be16a9277
@ -20,6 +20,10 @@ func NewNeo4JConnection(neo4jDebug bool) logic.GraphConnection {
|
||||
}
|
||||
}
|
||||
|
||||
func (c *neo4jConnection) GetUserFavoriteCount(ctx context.Context, userID model.UserID) (int64, error) {
|
||||
return GetUserFavoritesCount(ctx, c.driver, userID)
|
||||
}
|
||||
|
||||
func (c *neo4jConnection) CheckUserToPostLink(ctx context.Context, e621PostID model.PostID, e621UserID model.UserID) (bool, error) {
|
||||
return CheckUserToPostLink(ctx, c.driver, e621PostID, e621UserID)
|
||||
}
|
||||
|
@ -8,9 +8,8 @@ import (
|
||||
|
||||
func CreatePostNode(ctx context.Context, driver neo4j.DriverWithContext, postID model.PostID) error {
|
||||
query := `
|
||||
MERGE (u:e621Post {e621PostID: $postID})
|
||||
RETURN u
|
||||
`
|
||||
MERGE (u:e621Post {e621PostID: $postID});
|
||||
`
|
||||
params := map[string]any{
|
||||
"postID": postID,
|
||||
}
|
||||
|
@ -11,7 +11,7 @@ func EstablishPostTagLink(ctx context.Context, driver neo4j.DriverWithContext, e
|
||||
query := `
|
||||
MATCH (p:e621Post {e621PostID: $e621PostID})
|
||||
MATCH (t:e621Tag {e621Tag: $e621Tag})
|
||||
MERGE (p)-[:HAS_TAG]->(t)
|
||||
MERGE (p)-[:HAS_TAG]->(t);
|
||||
`
|
||||
params := map[string]interface{}{
|
||||
"e621PostID": e621PostID,
|
||||
|
@ -7,9 +7,8 @@ import (
|
||||
|
||||
func CreateSourceNode(ctx context.Context, driver neo4j.DriverWithContext, URL string) error {
|
||||
query := `
|
||||
MERGE (u:Source {URL: $url})
|
||||
RETURN u
|
||||
`
|
||||
MERGE (u:Source {URL: $url});
|
||||
`
|
||||
params := map[string]any{
|
||||
"url": URL,
|
||||
}
|
||||
|
@ -8,9 +8,8 @@ import (
|
||||
|
||||
func CreateTagNode(ctx context.Context, driver neo4j.DriverWithContext, name string, tagType string) error {
|
||||
query := `
|
||||
MERGE (u:e621Tag {e621Tag: $name, e621TagType: $tagType})
|
||||
RETURN u
|
||||
`
|
||||
MERGE (u:e621Tag {e621Tag: $name, e621TagType: $tagType});
|
||||
`
|
||||
params := map[string]interface{}{
|
||||
"name": name,
|
||||
"tagType": tagType,
|
||||
@ -30,7 +29,7 @@ func GetTagNodeByName(ctx context.Context, driver neo4j.DriverWithContext, name
|
||||
|
||||
query := `
|
||||
MATCH (u:e621Tag {e621Tag: $name})
|
||||
RETURN u.e621Tag as e621Tag, u.e621TagType as e621TagType
|
||||
RETURN u.e621Tag AS e621Tag, u.e621TagType AS e621TagType;
|
||||
`
|
||||
|
||||
params := map[string]interface{}{
|
||||
|
@ -8,8 +8,7 @@ import (
|
||||
|
||||
func CreateUserNode(ctx context.Context, driver neo4j.DriverWithContext, user model.User) error {
|
||||
query := `
|
||||
MERGE (u:e621User {e621ID: $id, e621Username: $name})
|
||||
RETURN u
|
||||
MERGE (u:e621User {e621ID: $id, e621Username: $name});
|
||||
`
|
||||
params := map[string]interface{}{
|
||||
"id": user.ID,
|
||||
@ -22,3 +21,34 @@ func CreateUserNode(ctx context.Context, driver neo4j.DriverWithContext, user mo
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetUserFavoritesCount(ctx context.Context, driver neo4j.DriverWithContext, userID model.UserID) (int64, error) {
|
||||
var userFavoriteCount int64
|
||||
|
||||
query := `
|
||||
MATCH (:e621User {e621ID: $userID})-[:IS_FAVORITE]->(:e621Post)
|
||||
RETURN count(*) AS numberOfFavoritedPosts;
|
||||
`
|
||||
params := map[string]interface{}{
|
||||
"userID": userID,
|
||||
}
|
||||
|
||||
result, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if len(result.Records) == 0 {
|
||||
// no matches -> user does not exist, return count 0
|
||||
return userFavoriteCount, err
|
||||
}
|
||||
|
||||
record := result.Records[0]
|
||||
|
||||
userFavoriteCount, _, err = neo4j.GetRecordValue[int64](record, "numberOfFavoritedPosts")
|
||||
if err != nil {
|
||||
return userFavoriteCount, err
|
||||
}
|
||||
|
||||
return userFavoriteCount, nil
|
||||
}
|
||||
|
@ -13,9 +13,10 @@ import (
|
||||
func ScrapeUser(ctx context.Context, graphConnection logic.GraphConnection, client *e621.Client, username string) error {
|
||||
var err error
|
||||
|
||||
scrapeTime := time.Now()
|
||||
|
||||
e621User, err := client.GetUserByName(username).Execute()
|
||||
if err != nil {
|
||||
log.Info(err)
|
||||
return err
|
||||
}
|
||||
|
||||
@ -28,107 +29,134 @@ func ScrapeUser(ctx context.Context, graphConnection logic.GraphConnection, clie
|
||||
return nil
|
||||
}
|
||||
|
||||
log.WithFields(log.Fields{
|
||||
"e621_username": e621User.Name,
|
||||
"e621_user_id": e621User.ID,
|
||||
}).Info("service: processing user")
|
||||
|
||||
err = graphConnection.UploadUser(ctx, e621User)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return err
|
||||
}
|
||||
|
||||
currentDBFavCount, err := graphConnection.GetUserFavoriteCount(ctx, e621User.ID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
favoriteBuilder, err := client.GetFavoritesForUser(e621User.Name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if currentDBFavCount > e621User.FavoriteCount {
|
||||
//TODO: IMPLEMENT USER MARKED FOR DELETED FAVS
|
||||
log.WithFields(log.Fields{
|
||||
"e621_username": e621User.Name,
|
||||
"e621_user_id": e621User.ID,
|
||||
}).Info("service: start processing favorites")
|
||||
start := time.Now()
|
||||
"e621_current_db_favorite_count": currentDBFavCount,
|
||||
"e621_user_favorite_count": e621User.FavoriteCount,
|
||||
}).Debug("service: user has favorites deleted")
|
||||
}
|
||||
|
||||
e621FavoritesBuilder := client.GetFavoritesBuilder().SetUserID(e621User.ID)
|
||||
e621Favorites, err := client.GetAllFavoritesForUser(e621FavoritesBuilder)
|
||||
var pageIndex = 1
|
||||
for currentDBFavCount < e621User.FavoriteCount {
|
||||
|
||||
// Uploads all Tags, Posts as Nodes to Neo4j
|
||||
for i, post := range e621Favorites {
|
||||
if exists, err := graphConnection.CheckUserToPostLink(ctx, post.ID, e621User.ID); err == nil && exists {
|
||||
log.WithFields(log.Fields{
|
||||
"e621_username": e621User.Name,
|
||||
"e621_user_id": e621User.ID,
|
||||
"last_post_id": post.ID,
|
||||
}).Info("service: no new favorites found")
|
||||
favorites, err := favoriteBuilder.Page(pageIndex).Execute()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(favorites) <= 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, favorite := range favorites {
|
||||
|
||||
if currentDBFavCount == e621User.FavoriteCount {
|
||||
break
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
start = time.Now()
|
||||
err = uploadNodes(ctx, graphConnection, post)
|
||||
isFaved, err := graphConnection.CheckUserToPostLink(ctx, favorite.ID, e621User.ID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !isFaved {
|
||||
err = uploadDataToDB(ctx, graphConnection, favorite, e621User)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
currentDBFavCount++
|
||||
}
|
||||
|
||||
}
|
||||
pageIndex++
|
||||
}
|
||||
|
||||
log.WithFields(log.Fields{
|
||||
"e621_username": e621User.Name,
|
||||
"e621_user_id": e621User.ID,
|
||||
"post_number": i,
|
||||
"post_amount": len(e621Favorites),
|
||||
"post_id": post.ID,
|
||||
"upload_time": time.Since(start),
|
||||
}).Debug("service: uploading post")
|
||||
|
||||
start := time.Now()
|
||||
err = uploadPostToUserRelationship(ctx, graphConnection, post, e621User)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return err
|
||||
}
|
||||
|
||||
err = uploadSourceTagRelationship(ctx, graphConnection, post)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return err
|
||||
}
|
||||
|
||||
err = uploadGeneralTagRelationship(ctx, graphConnection, post)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return err
|
||||
}
|
||||
|
||||
err = uploadCharacterTagtRelationship(ctx, graphConnection, post)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return err
|
||||
}
|
||||
|
||||
err = uploadCopyrightTagRelationship(ctx, graphConnection, post)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return err
|
||||
}
|
||||
|
||||
err = uploadArtistTagRelationship(ctx, graphConnection, post)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return err
|
||||
}
|
||||
log.WithFields(log.Fields{
|
||||
"e621_username": e621User.Name,
|
||||
"e621_user_id": e621User.ID,
|
||||
"post_number": i,
|
||||
"post_amount": len(e621Favorites),
|
||||
"post_id": post.ID,
|
||||
"upload_time": time.Since(start),
|
||||
}).Debug("service: making relationship")
|
||||
}
|
||||
log.WithFields(log.Fields{
|
||||
"e621_username": e621User.Name,
|
||||
"e621_user_id": e621User.ID,
|
||||
"post_amount": len(e621Favorites),
|
||||
"scrape_time": time.Since(start),
|
||||
"post_amount": e621User.FavoriteCount,
|
||||
"scrape_time": time.Since(scrapeTime),
|
||||
}).Info("service: finished processing favorites")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func uploadDataToDB(ctx context.Context, graphConnection logic.GraphConnection, favorite model.Post, e621User model.User) error {
|
||||
start := time.Now()
|
||||
err := uploadNodes(ctx, graphConnection, favorite)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
log.WithFields(log.Fields{
|
||||
"e621_username": e621User.Name,
|
||||
"e621_user_id": e621User.ID,
|
||||
"post_id": favorite.ID,
|
||||
"upload_time": time.Since(start),
|
||||
}).Debug("service: uploaded post")
|
||||
|
||||
start = time.Now()
|
||||
err = uploadPostToUserRelationship(ctx, graphConnection, favorite, e621User)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return err
|
||||
}
|
||||
|
||||
err = uploadSourceTagRelationship(ctx, graphConnection, favorite)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return err
|
||||
}
|
||||
|
||||
err = uploadGeneralTagRelationship(ctx, graphConnection, favorite)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return err
|
||||
}
|
||||
|
||||
err = uploadCharacterTagtRelationship(ctx, graphConnection, favorite)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return err
|
||||
}
|
||||
|
||||
err = uploadCopyrightTagRelationship(ctx, graphConnection, favorite)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return err
|
||||
}
|
||||
|
||||
err = uploadArtistTagRelationship(ctx, graphConnection, favorite)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return err
|
||||
}
|
||||
log.WithFields(log.Fields{
|
||||
"e621_username": e621User.Name,
|
||||
"e621_user_id": e621User.ID,
|
||||
"post_id": favorite.ID,
|
||||
"upload_time": time.Since(start),
|
||||
}).Debug("service: made relationship")
|
||||
return nil
|
||||
}
|
||||
|
||||
// uploadNodes uploads the post to the database and creates the nodes
|
||||
func uploadNodes(ctx context.Context, graphConnection logic.GraphConnection, post model.Post) error {
|
||||
|
||||
|
@ -15,4 +15,5 @@ type GraphConnection interface {
|
||||
EstablishPostToSourceLink(ctx context.Context, e621PostID model.PostID, sourceURL string) error
|
||||
EstablishUserToPostLink(ctx context.Context, e621PostID model.PostID, e621UserID model.UserID) error
|
||||
CheckUserToPostLink(ctx context.Context, e621PostID model.PostID, e621UserID model.UserID) (bool, error)
|
||||
GetUserFavoriteCount(ctx context.Context, userID model.UserID) (int64, error)
|
||||
}
|
||||
|
Reference in New Issue
Block a user