From 409788d0b06669a907699c56834585eba72b85b4 Mon Sep 17 00:00:00 2001 From: David Janowski Date: Mon, 22 May 2023 19:53:30 +0200 Subject: [PATCH] added support for e621 post and groundwork for sources --- e621/api/models/post.go | 4 +- main.go | 220 +++++++++++++++++----------------------- neo4jAPI/models/tag.go | 6 ++ neo4jAPI/post.go | 23 +++++ neo4jAPI/source.go | 23 +++++ neo4jAPI/tag.go | 61 +++++++++++ 6 files changed, 209 insertions(+), 128 deletions(-) create mode 100644 neo4jAPI/models/tag.go create mode 100644 neo4jAPI/post.go create mode 100644 neo4jAPI/source.go diff --git a/e621/api/models/post.go b/e621/api/models/post.go index df71e48..fe153e2 100644 --- a/e621/api/models/post.go +++ b/e621/api/models/post.go @@ -24,7 +24,7 @@ type Post struct { Preview Preview `json:"preview"` Sample Sample `json:"sample"` Score Score `json:"score"` - Tags Tags `json:"tags"` + Tags PostTags `json:"tags"` LockedTags []interface{} `json:"locked_tags"` ChangeSeq int64 `json:"change_seq"` Flags Flags `json:"flags"` @@ -90,7 +90,7 @@ type Score struct { Total int64 `json:"total"` } -type Tags struct { +type PostTags struct { General []string `json:"general"` Species []string `json:"species"` Character []string `json:"character"` diff --git a/main.go b/main.go index 3dc47c3..74381ba 100644 --- a/main.go +++ b/main.go @@ -42,139 +42,34 @@ func main() { } start := time.Now() - Upload3(favs, ctx, driver) + uploadTags(favs, ctx, driver) elapsed := time.Since(start) + log.Printf("upload of Tags took %d", elapsed) + + start = time.Now() + uploadPosts(favs, ctx, driver) + elapsed = time.Since(start) + log.Printf("upload of Posts took %d", elapsed) + + start = time.Now() + //uploadSources(favs, ctx, driver) + elapsed = time.Since(start) + log.Printf("upload of Sources took %d", elapsed) - log.Printf("This took %s", elapsed) } - -// ~4min -func Upload(favs []models.Post, ctx context.Context, driver neo4j.DriverWithContext) { - for i, fav := range favs { - - log.Printf("The e621 post with the id %d has %d general Tags, %d character Tags, %d copyright Tags, %d artist Tags.", fav.ID, len(fav.Tags.General), len(fav.Tags.Character), len(fav.Tags.Copyright), len(fav.Tags.Artist)) - log.Printf("Uploaded Posts: %d", i) - - for _, general := range fav.Tags.General { - log.Printf("TagType: General - Tag: %s", general) - err := neo4jAPI.CreateTagNode(ctx, driver, general, "general") - if err != nil { - log.Fatal(err) - } - } - for _, character := range fav.Tags.Character { - log.Printf("TagType: Character - Tag: %s", character) - err := neo4jAPI.CreateTagNode(ctx, driver, character, "character") - if err != nil { - log.Fatal(err) - } - } - - for _, copyright := range fav.Tags.Copyright { - log.Printf("TagType: Copyright - Tag: %s", copyright) - err := neo4jAPI.CreateTagNode(ctx, driver, copyright, "copyright") - if err != nil { - log.Fatal(err) - } - } - - for _, artist := range fav.Tags.Artist { - log.Printf("TagType: Artist - Tag: %s", artist) - err := neo4jAPI.CreateTagNode(ctx, driver, artist, "artist") - if err != nil { - log.Fatal(err) - } - } - } -} - -// ~30sec -func Upload2(favs []models.Post, ctx context.Context, driver neo4j.DriverWithContext) { - uniqueGeneralTags := make(map[string]struct{}) - uniqueCharacterTags := make(map[string]struct{}) - uniqueCopyrightTags := make(map[string]struct{}) - uniqueArtistTags := make(map[string]struct{}) - - allGeneralTags := make([]string, 0) - allCharacterTags := make([]string, 0) - allCopyrightTags := make([]string, 0) - allArtistTags := make([]string, 0) - +func uploadPosts(favs []models.Post, ctx context.Context, driver neo4j.DriverWithContext) error { for _, fav := range favs { - - // Process General Tags - for _, general := range fav.Tags.General { - uniqueGeneralTags[general] = struct{}{} - allGeneralTags = append(allGeneralTags, general) - } - - // Process Character Tags - for _, character := range fav.Tags.Character { - uniqueCharacterTags[character] = struct{}{} - allCharacterTags = append(allCharacterTags, character) - } - - // Process Copyright Tags - for _, copyright := range fav.Tags.Copyright { - uniqueCopyrightTags[copyright] = struct{}{} - allCopyrightTags = append(allCopyrightTags, copyright) - } - - // Process Artist Tags - for _, artist := range fav.Tags.Artist { - uniqueArtistTags[artist] = struct{}{} - allArtistTags = append(allArtistTags, artist) - } - - } - - log.Printf("uniqueGeneralTags length: %d", len(uniqueGeneralTags)) - log.Printf("allGeneralTags length: %d", len(allGeneralTags)) - - log.Printf("uniqueCharacterTags length: %d", len(uniqueCharacterTags)) - log.Printf("allCharacterTags length: %d", len(allCharacterTags)) - - log.Printf("uniqueCopyrightTags length: %d", len(uniqueCopyrightTags)) - log.Printf("alleCopyrightTags length: %d", len(allCopyrightTags)) - - log.Printf("uniqueArtistTags length: %d", len(uniqueArtistTags)) - log.Printf("allArtistTags length: %d", len(allArtistTags)) - - for uniqueGeneralTag := range uniqueGeneralTags { - log.Printf("TagType: General - Tag: %s", uniqueGeneralTag) - err := neo4jAPI.CreateTagNode(ctx, driver, uniqueGeneralTag, "general") + err := neo4jAPI.CreatePostNode(ctx, driver, fav.ID) if err != nil { - log.Fatal(err) + return err } - } - for uniqueCharacterTag := range uniqueCharacterTags { - log.Printf("TagType: Character - Tag: %s", uniqueCharacterTag) - err := neo4jAPI.CreateTagNode(ctx, driver, uniqueCharacterTag, "character") - if err != nil { - log.Fatal(err) - } - } - for uniqueCopyrightTag := range uniqueCopyrightTags { - log.Printf("TagType: Copyright - Tag: %s", uniqueCopyrightTag) - err := neo4jAPI.CreateTagNode(ctx, driver, uniqueCopyrightTag, "copyright") - if err != nil { - log.Fatal(err) - } - } - - for uniqueArtistTag := range uniqueArtistTags { - log.Printf("TagType: Artist - Tag: %s", uniqueArtistTag) - err := neo4jAPI.CreateTagNode(ctx, driver, uniqueArtistTag, "artist") - if err != nil { - log.Fatal(err) - } } + return nil } -// ~57s -func Upload3(favs []models.Post, ctx context.Context, driver neo4j.DriverWithContext) { +func uploadTags(favs []models.Post, ctx context.Context, driver neo4j.DriverWithContext) { uniqueGeneralTags := make([]string, 0) uniqueCharacterTags := make([]string, 0) uniqueCopyrightTags := make([]string, 0) @@ -193,16 +88,89 @@ func Upload3(favs []models.Post, ctx context.Context, driver neo4j.DriverWithCon allArtistTags = append(allArtistTags, fav.Tags.Artist...) } - // Process General Tags + // Process General PostTags uniqueGeneralTags = utils.UniqueNonEmptyElementsOf(allGeneralTags) - // Process Character Tags + // Process Character PostTags uniqueCharacterTags = utils.UniqueNonEmptyElementsOf(allCharacterTags) - // Process Copyright Tags + // Process Copyright PostTags uniqueCopyrightTags = utils.UniqueNonEmptyElementsOf(allCopyrightTags) - // Process Artist Tags + // Process Artist PostTags + uniqueArtistTags = utils.UniqueNonEmptyElementsOf(allArtistTags) + + log.Printf("uniqueGeneralTags length: %d", len(uniqueGeneralTags)) + + log.Printf("uniqueCharacterTags length: %d", len(uniqueCharacterTags)) + + log.Printf("uniqueCopyrightTags length: %d", len(uniqueCopyrightTags)) + + log.Printf("uniqueArtistTags length: %d", len(uniqueArtistTags)) + + for _, uniqueGeneralTag := range uniqueGeneralTags { + + log.Printf("TagType: General - Tag: %s", uniqueGeneralTag) + err := neo4jAPI.CreateTagNode(ctx, driver, uniqueGeneralTag, "general") + if err != nil { + log.Fatal(err) + } + } + for _, uniqueCharacterTag := range uniqueCharacterTags { + log.Printf("TagType: Character - Tag: %s", uniqueCharacterTag) + err := neo4jAPI.CreateTagNode(ctx, driver, uniqueCharacterTag, "character") + if err != nil { + log.Fatal(err) + } + } + + for _, uniqueCopyrightTag := range uniqueCopyrightTags { + log.Printf("TagType: Copyright - Tag: %s", uniqueCopyrightTag) + err := neo4jAPI.CreateTagNode(ctx, driver, uniqueCopyrightTag, "copyright") + if err != nil { + log.Fatal(err) + } + } + + for _, uniqueArtistTag := range uniqueArtistTags { + log.Printf("TagType: Artist - Tag: %s", uniqueArtistTag) + err := neo4jAPI.CreateTagNode(ctx, driver, uniqueArtistTag, "artist") + if err != nil { + log.Fatal(err) + } + } + +} + +func uploadSources(favs []models.Post, ctx context.Context, driver neo4j.DriverWithContext) { + uniqueGeneralTags := make([]string, 0) + uniqueCharacterTags := make([]string, 0) + uniqueCopyrightTags := make([]string, 0) + uniqueArtistTags := make([]string, 0) + + allGeneralTags := make([]string, 0) + allCharacterTags := make([]string, 0) + allCopyrightTags := make([]string, 0) + allArtistTags := make([]string, 0) + + // add all tags together + for _, fav := range favs { + allGeneralTags = append(allGeneralTags, fav.Tags.General...) + allCharacterTags = append(allCharacterTags, fav.Tags.Character...) + allCopyrightTags = append(allCopyrightTags, fav.Tags.Character...) + allArtistTags = append(allArtistTags, fav.Tags.Artist...) + } + + // Process General PostTags + uniqueGeneralTags = utils.UniqueNonEmptyElementsOf(allGeneralTags) + + // Process Character PostTags + uniqueCharacterTags = utils.UniqueNonEmptyElementsOf(allCharacterTags) + + // Process Copyright PostTags + uniqueCopyrightTags = utils.UniqueNonEmptyElementsOf(allCopyrightTags) + + // Process Artist PostTags uniqueArtistTags = utils.UniqueNonEmptyElementsOf(allArtistTags) log.Printf("uniqueGeneralTags length: %d", len(uniqueGeneralTags)) diff --git a/neo4jAPI/models/tag.go b/neo4jAPI/models/tag.go new file mode 100644 index 0000000..72ff24a --- /dev/null +++ b/neo4jAPI/models/tag.go @@ -0,0 +1,6 @@ +package models + +type DBTag struct { + Tag string + TagType string +} diff --git a/neo4jAPI/post.go b/neo4jAPI/post.go new file mode 100644 index 0000000..1e7d817 --- /dev/null +++ b/neo4jAPI/post.go @@ -0,0 +1,23 @@ +package neo4jAPI + +import ( + "context" + "github.com/neo4j/neo4j-go-driver/v5/neo4j" +) + +func CreatePostNode(ctx context.Context, driver neo4j.DriverWithContext, e621ID int64) error { + query := ` + MERGE (u:e621Post {e621PostID: $e621ID}) + RETURN u +` + params := map[string]any{ + "e621ID": e621ID, + } + + _, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer) + if err != nil { + return err + } + + return nil +} diff --git a/neo4jAPI/source.go b/neo4jAPI/source.go new file mode 100644 index 0000000..3a2c3c6 --- /dev/null +++ b/neo4jAPI/source.go @@ -0,0 +1,23 @@ +package neo4jAPI + +import ( + "context" + "github.com/neo4j/neo4j-go-driver/v5/neo4j" +) + +func CreateSourceNode(ctx context.Context, driver neo4j.DriverWithContext, URL string) error { + query := ` + MERGE (u:Source {URL: $url}) + RETURN u +` + params := map[string]any{ + "URL": URL, + } + + _, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer) + if err != nil { + return err + } + + return nil +} diff --git a/neo4jAPI/tag.go b/neo4jAPI/tag.go index f6b6e24..8679835 100644 --- a/neo4jAPI/tag.go +++ b/neo4jAPI/tag.go @@ -2,6 +2,7 @@ package neo4jAPI import ( "context" + "e621_to_neo4j/neo4jAPI/models" "github.com/neo4j/neo4j-go-driver/v5/neo4j" ) @@ -22,3 +23,63 @@ func CreateTagNode(ctx context.Context, driver neo4j.DriverWithContext, name str return nil } + +func GetAllTagNodes(ctx context.Context, driver neo4j.DriverWithContext, tagType string) ([]string, error) { + query := ` + MATCH (u:e621Tag {e621TagType: $tagType}) + RETURN u.e621Tag as eTag + ` + + params := map[string]interface{}{ + "tagType": tagType, + } + + result, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer) + if err != nil { + return nil, err + } + + var tags []string + for _, record := range result.Records { + tag, _, _ := neo4j.GetRecordValue[string](record, "eTag") + tags = append(tags, tag) + } + + return tags, nil +} + +func GetTagNode(ctx context.Context, driver neo4j.DriverWithContext, name string) (models.DBTag, bool, error) { + + var tag models.DBTag + + query := ` + MATCH (u:e621Tag {e621Tag: $name}) + RETURN u.e621Tag as e621Tag, u.e621TagType as e621TagType + ` + + params := map[string]interface{}{ + "name": name, + } + + result, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer) + if err != nil { + return tag, false, err + } + + if len(result.Records) > 0 { + record := result.Records[0] + + e621Tag, _, _ := neo4j.GetRecordValue[string](record, "e621Tag") + e621TagType, _, _ := neo4j.GetRecordValue[string](record, "e621TagType") + + tag = models.DBTag{ + Tag: e621Tag, + TagType: e621TagType, + } + if e621Tag != name { + return tag, false, nil + } + return tag, true, nil + } + return tag, false, err +}