From 8e76fa709db1fca836fd81a08eb791ef553d554c Mon Sep 17 00:00:00 2001 From: SoXX Date: Wed, 15 Nov 2023 13:21:13 +0100 Subject: [PATCH] feat: added partial implementation for low, mid and high level api for db exports Signed-off-by: SoXX --- example/highlevel/dbexport.go | 46 +++++++++++ example/lowlevel/dbexport.go | 3 +- example/midlevel/dbexport.go | 55 +++++++++++++ go.mod | 5 +- go.sum | 2 + pkg/e621/builder/export_file.go | 31 ++++++++ pkg/e621/builder/export_list.go | 25 ++++++ pkg/e621/client.go | 137 ++++++++++++++++++++++++++++++++ pkg/e621/endpoints/dbexport.go | 11 +-- pkg/e621/model/pool.go | 66 ++++++++++++--- pkg/e621/model/post.go | 116 +++++++++++++-------------- pkg/e621/utils/helper.go | 10 +++ 12 files changed, 430 insertions(+), 77 deletions(-) create mode 100644 example/highlevel/dbexport.go create mode 100644 example/midlevel/dbexport.go create mode 100644 pkg/e621/builder/export_file.go create mode 100644 pkg/e621/builder/export_list.go create mode 100644 pkg/e621/utils/helper.go diff --git a/example/highlevel/dbexport.go b/example/highlevel/dbexport.go new file mode 100644 index 0000000..7ff6593 --- /dev/null +++ b/example/highlevel/dbexport.go @@ -0,0 +1,46 @@ +package main + +import ( + "git.dragse.it/anthrove/e621-sdk-go/pkg/e621" + _ "github.com/joho/godotenv/autoload" + "log" + "os" +) + +func main() { + client := e621.NewClient(os.Getenv("API_USER"), os.Getenv("API_KEY")) + + { + fileName, _, err := client.GetLatestPoolsDBExportDataAsBytes() + if err != nil { + panic(err) + } + log.Println(fileName) + } + + { + latestDBPoolExport, err := client.GetLatestPoolsDBExportDataAsStruct() + if err != nil { + panic(err) + } + log.Println(latestDBPoolExport[0]) + } + + { + fileName, _, err := client.GetLatestPostsDBExportDataAsBytes() + if err != nil { + panic(err) + } + log.Println(fileName) + + } + + { + latestDBPoolExport, err := client.GetLatestPostsDBExportDataAsStruct() + if err != nil { + panic(err) + } + log.Println(latestDBPoolExport[0]) + } + +} diff --git a/example/lowlevel/dbexport.go b/example/lowlevel/dbexport.go index 8b3162b..356d35f 100644 --- a/example/lowlevel/dbexport.go +++ b/example/lowlevel/dbexport.go @@ -3,7 +3,6 @@ package main import ( "git.dragse.it/anthrove/e621-sdk-go/pkg/e621/endpoints" "git.dragse.it/anthrove/e621-sdk-go/pkg/e621/model" - "io" "log" "net/http" "os" @@ -42,7 +41,7 @@ func main() { defer file.Close() - _, err = io.Copy(file, rawFile) + err = os.WriteFile(exportFileName, rawFile, 0644) if err != nil { panic(err) } diff --git a/example/midlevel/dbexport.go b/example/midlevel/dbexport.go new file mode 100644 index 0000000..61f0222 --- /dev/null +++ b/example/midlevel/dbexport.go @@ -0,0 +1,55 @@ +package main + +import ( + "git.dragse.it/anthrove/e621-sdk-go/pkg/e621/builder" + "git.dragse.it/anthrove/e621-sdk-go/pkg/e621/model" + "log" + "net/http" + "os" +) + +func main() { + // Define the request context with essential information. + requestContext := model.RequestContext{ + Client: http.Client{}, + Host: "https://e621.net", + UserAgent: "Go-e621-SDK (@username)", + Username: os.Getenv("API_USER"), // Replace with your username + APIKey: os.Getenv("API_KEY"), // Replace with your API key + } + + log.Println("Getting a list of DB Exports: ") + getDBExportList := builder.NewGetDBExportListBuilder(requestContext) + dbExportFiles, err := getDBExportList.Execute() + if err != nil { + panic(err) + } + + log.Printf("%d files found", len(dbExportFiles)) + for _, v := range dbExportFiles { + log.Printf("File found: %s", v) + } + + log.Println(dbExportFiles) + + exportFileName := dbExportFiles[0] + log.Println("Downloading DB export") + log.Printf("File to download: %s", exportFileName) + getDBExportFile := builder.NewGetDBExportFileBuilder(requestContext) + + rawFile, err := getDBExportFile.SetFile(exportFileName).Execute() + + file, err := os.Create(exportFileName) + if err != nil { + panic(err) + } + + defer file.Close() + + err = os.WriteFile(exportFileName, rawFile, 0644) + if err != nil { + panic(err) + } + log.Printf("File %s downloaded", exportFileName) + +} diff --git a/go.mod b/go.mod index e006cb7..8ae495b 100644 --- a/go.mod +++ b/go.mod @@ -8,4 +8,7 @@ require ( golang.org/x/time v0.3.0 ) -require golang.org/x/net v0.18.0 // indirect +require ( + github.com/gocarina/gocsv v0.0.0-20230616125104-99d496ca653d // indirect + golang.org/x/net v0.18.0 // indirect +) diff --git a/go.sum b/go.sum index ff43cec..5107a55 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,7 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/gocarina/gocsv v0.0.0-20230616125104-99d496ca653d h1:KbPOUXFUDJxwZ04vbmDOc3yuruGvVO+LOa7cVER3yWw= +github.com/gocarina/gocsv v0.0.0-20230616125104-99d496ca653d/go.mod h1:5YoVOkjYAQumqlV356Hj3xeYh4BdZuLE0/nRkf2NKkI= github.com/jarcoal/httpmock v1.3.1 h1:iUx3whfZWVf3jT01hQTO/Eo5sAYtB2/rqaUuOtpInww= github.com/jarcoal/httpmock v1.3.1/go.mod h1:3yb8rc4BI7TCBhFY8ng0gjuLKJNquuDNiPaZjnENuYg= github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= diff --git a/pkg/e621/builder/export_file.go b/pkg/e621/builder/export_file.go new file mode 100644 index 0000000..3767637 --- /dev/null +++ b/pkg/e621/builder/export_file.go @@ -0,0 +1,31 @@ +package builder + +import ( + "git.dragse.it/anthrove/e621-sdk-go/pkg/e621/endpoints" + "git.dragse.it/anthrove/e621-sdk-go/pkg/e621/model" +) + +type DBExportFileBuilder interface { + SetFile(fileName string) DBExportFileBuilder + Execute() ([]byte, error) +} + +func NewGetDBExportFileBuilder(requestContext model.RequestContext) DBExportFileBuilder { + return &getDBExportFile{ + requestContext: requestContext, + } +} + +type getDBExportFile struct { + requestContext model.RequestContext + fileName string +} + +func (g *getDBExportFile) SetFile(fileName string) DBExportFileBuilder { + g.fileName = fileName + return g +} + +func (g *getDBExportFile) Execute() ([]byte, error) { + return endpoints.GetDBExportFile(g.requestContext, g.fileName) +} diff --git a/pkg/e621/builder/export_list.go b/pkg/e621/builder/export_list.go new file mode 100644 index 0000000..232f9e3 --- /dev/null +++ b/pkg/e621/builder/export_list.go @@ -0,0 +1,25 @@ +package builder + +import ( + "git.dragse.it/anthrove/e621-sdk-go/pkg/e621/endpoints" + "git.dragse.it/anthrove/e621-sdk-go/pkg/e621/model" +) + +type DBExportListBuilder interface { + Execute() ([]string, error) +} + +func NewGetDBExportListBuilder(requestContext model.RequestContext) DBExportListBuilder { + return &getDBExportList{ + requestContext: requestContext, + } +} + +type getDBExportList struct { + requestContext model.RequestContext +} + +func (g *getDBExportList) Execute() ([]string, error) { + return endpoints.GetDBExportList(g.requestContext) + +} diff --git a/pkg/e621/client.go b/pkg/e621/client.go index 88911b6..619f2dc 100644 --- a/pkg/e621/client.go +++ b/pkg/e621/client.go @@ -1,15 +1,23 @@ package e621 import ( + "bytes" + "compress/gzip" + "encoding/csv" "fmt" "git.dragse.it/anthrove/e621-sdk-go/pkg/e621/builder" "git.dragse.it/anthrove/e621-sdk-go/pkg/e621/model" "git.dragse.it/anthrove/e621-sdk-go/pkg/e621/utils" + "github.com/gocarina/gocsv" _ "github.com/joho/godotenv/autoload" "golang.org/x/time/rate" + "log" "math" "net/http" + "regexp" "strconv" + "strings" + "time" ) // Client is the main client for interacting with the e621 API. @@ -242,3 +250,132 @@ func (c *Client) GetAllPosts(postBuilder builder.PostsBuilder) ([]model.Post, er // Retrieves all available posts using the provided post builder. return c.GetNPosts(math.MaxInt, postBuilder) } + +func (c *Client) GetLatestPoolsDBExportDataAsBytes() (string, []byte, error) { + dbExportFileNameList, err := builder.NewGetDBExportListBuilder(c.RequestContext).Execute() + if err != nil { + return "", nil, err + } + + getDBExportFile := builder.NewGetDBExportFileBuilder(c.RequestContext) + + filter := func(s string) bool { return strings.HasPrefix(s, "pools") } + filteredFileNameList := utils.SliceFilter(dbExportFileNameList, filter) + + regex, err := regexp.Compile("\\d{4}\\-(0?[1-9]|1[012])\\-(0?[1-9]|[12][0-9]|3[01])*") + if err != nil { + return "", nil, err + } + + currentDate, err := time.Parse("2006-04-02", time.Now().Format("2006-01-02")) + if err != nil { + return "", nil, err + } + + duration := math.MaxFloat64 + var fileName string + for _, listFileName := range filteredFileNameList { + if !regex.MatchString(listFileName) { + return "", nil, nil + } + + fileDate, err := time.Parse("2006-04-02", regex.FindString(listFileName)) + if err != nil { + return "", nil, err + } + + if currentDate.Sub(fileDate).Seconds() < duration { + duration = currentDate.Sub(fileDate).Seconds() + fileName = listFileName + } + } + + rawFile, err := getDBExportFile.SetFile(fileName).Execute() + if err != nil { + return "", nil, err + } + + return fileName, rawFile, nil +} + +func (c *Client) GetLatestPoolsDBExportDataAsStruct() ([]*model.Pool, error) { + var pools []*model.Pool + + _, data, err := c.GetLatestPoolsDBExportDataAsBytes() + if err != nil { + return nil, err + } + + zipReader, err := gzip.NewReader(bytes.NewReader(data)) + if err != nil { + panic(err) + } + + defer zipReader.Close() + + // Create a CSV reader + reader := csv.NewReader(zipReader) + + err = gocsv.UnmarshalCSV(reader, &pools) + if err != nil { + panic(err) + } + + return pools, nil +} + +func (c *Client) GetLatestPostsDBExportDataAsBytes() (string, []byte, error) { + log.Println("Please wait while the download is in progress for the post export... (file over 1GB) ") + dbExportFileNameList, err := builder.NewGetDBExportListBuilder(c.RequestContext).Execute() + if err != nil { + return "", nil, err + } + + getDBExportFile := builder.NewGetDBExportFileBuilder(c.RequestContext) + + filter := func(s string) bool { return strings.HasPrefix(s, "posts") } + filteredFileNameList := utils.SliceFilter(dbExportFileNameList, filter) + + regex, err := regexp.Compile("\\d{4}\\-(0?[1-9]|1[012])\\-(0?[1-9]|[12][0-9]|3[01])*") + if err != nil { + return "", nil, err + } + + currentDate, err := time.Parse("2006-04-02", time.Now().Format("2006-01-02")) + if err != nil { + return "", nil, err + } + + duration := math.MaxFloat64 + var fileName string + for _, listFileName := range filteredFileNameList { + if !regex.MatchString(listFileName) { + return "", nil, nil + } + + fileDate, err := time.Parse("2006-04-02", regex.FindString(listFileName)) + if err != nil { + return "", nil, err + } + + if currentDate.Sub(fileDate).Seconds() < duration { + duration = currentDate.Sub(fileDate).Seconds() + fileName = listFileName + } + } + + rawFile, err := getDBExportFile.SetFile(fileName).Execute() + if err != nil { + return "", nil, err + } + + return fileName, rawFile, nil +} + +func (c *Client) GetLatestPostsDBExportDataAsStruct() ([]*model.Post, error) { + var post []*model.Post + + // TODO: Implement this function; tags in CSV are just a string with no categories assignment, needs special parsing + + return post, nil +} diff --git a/pkg/e621/endpoints/dbexport.go b/pkg/e621/endpoints/dbexport.go index a17c69a..173a5b8 100644 --- a/pkg/e621/endpoints/dbexport.go +++ b/pkg/e621/endpoints/dbexport.go @@ -16,7 +16,7 @@ import ( // the HTML content to extract the links to export files with the ".csv.gz" extension. // // Parameters: -// - requestContext: The context for the API request, including the host, user agent, username, and API key. +// - model.RequestContext: The context for the API request, including the host, user agent, username, and API key. // // Returns: // - []string: A slice of file names with the ".csv.gz" extension. @@ -80,13 +80,13 @@ func GetDBExportList(requestContext model.RequestContext) ([]string, error) { // particular file identified by its name. // // Parameters: -// - requestContext: The context for the API request, including the host, user agent, username, and API key. +// - model.RequestContext: The context for the API request, including the host, user agent, username, and API key. // - file: The name of the file to be fetched from the database export. // // Returns: -// - io.ReadCloser: The HTTP response containing the requested file (probably a csv.gz). +// - []byte: A byte array representation of the file. // - error: An error, if any, encountered during the API request or response handling. -func GetDBExportFile(requestContext model.RequestContext, file string) (io.ReadCloser, error) { +func GetDBExportFile(requestContext model.RequestContext, file string) ([]byte, error) { if file == "" { return nil, fmt.Errorf("no file specified") } @@ -112,6 +112,7 @@ func GetDBExportFile(requestContext model.RequestContext, file string) (io.ReadC // If the status code is outside the 2xx range, return an error based on the status code. return nil, utils.StatusCodesToError(resp.StatusCode) } - return resp.Body, nil + + return io.ReadAll(resp.Body) } diff --git a/pkg/e621/model/pool.go b/pkg/e621/model/pool.go index 0d8fff4..1c0b02a 100644 --- a/pkg/e621/model/pool.go +++ b/pkg/e621/model/pool.go @@ -1,7 +1,14 @@ package model +import ( + "fmt" + "strconv" + "strings" +) + type PoolCategory string type PoolOrder string +type PoolIDs []int64 const ( Series PoolCategory = "series" @@ -16,15 +23,52 @@ const ( ) type Pool struct { - ID int64 `json:"id"` - Name string `json:"name"` - CreatedAt string `json:"created_at"` - UpdatedAt string `json:"updated_at"` - CreatorID int64 `json:"creator_id"` - Description string `json:"description"` - IsActive bool `json:"is_active"` - Category PoolCategory `json:"category"` - PostIDS []int64 `json:"post_ids"` - CreatorName string `json:"creator_name"` - PostCount int64 `json:"post_count"` + ID int64 `json:"id" csv:"id"` + Name string `json:"name" csv:"name"` + CreatedAt string `json:"created_at" csv:"created_at"` + UpdatedAt string `json:"updated_at" csv:"updated_at"` + CreatorID int64 `json:"creator_id" csv:"creator_id"` + Description string `json:"description" csv:"description"` + IsActive bool `json:"is_active" csv:"is_active"` + Category PoolCategory `json:"category" csv:"category"` + PostIDS PoolIDs `json:"post_ids" csv:"post_ids"` + CreatorName string `json:"creator_name" csv:"-"` + PostCount int64 `json:"post_count" csv:"-"` +} + +// UnmarshalCSV parses a CSV-formatted string containing pool IDs and populates the PoolIDs receiver. +// +// This method is designed to unmarshal a CSV-formatted string, where pool IDs are separated by commas. +// It trims the surrounding curly braces and splits the string to extract individual pool IDs. +// The parsed IDs are then converted to int64 and assigned to the PoolIDs receiver. +// +// Parameters: +// - csv: The CSV-formatted string containing pool IDs. +// +// Returns: +// - error: An error encountered during the unmarshaling process, if any. +func (poolIDs *PoolIDs) UnmarshalCSV(csv string) error { + // Trim the surrounding curly braces + csv = strings.TrimPrefix(csv, "{") + csv = strings.TrimSuffix(csv, "}") + + // Split the CSV string into individual pool IDs + ids := strings.Split(csv, ",") + + var localPoolIDs PoolIDs + + // Iterate through each ID, parse it to int64, and append to the localPoolIDs + for _, id := range ids { + if id != "" { + int64ID, err := strconv.ParseInt(id, 10, 64) + if err != nil { + return fmt.Errorf("failed to parse pool ID '%s': %v", id, err) + } + localPoolIDs = append(localPoolIDs, int64ID) + } + } + + // Assign the parsed IDs to the receiver + *poolIDs = localPoolIDs + return nil } diff --git a/pkg/e621/model/post.go b/pkg/e621/model/post.go index c22abb2..45f502e 100644 --- a/pkg/e621/model/post.go +++ b/pkg/e621/model/post.go @@ -8,86 +8,86 @@ type PostResponse struct { } type Post struct { - ID PostID `json:"id"` - CreatedAt string `json:"created_at"` - UpdatedAt string `json:"updated_at"` - File File `json:"file"` - Preview Preview `json:"preview"` - Sample Sample `json:"sample"` - Score Score `json:"score"` - Tags Tags `json:"tags"` - LockedTags []interface{} `json:"locked_tags"` - ChangeSeq int64 `json:"change_seq"` - Flags Flags `json:"flags"` - Rating string `json:"rating"` - FavCount int64 `json:"fav_count"` - Sources []string `json:"sources"` - Pools []interface{} `json:"pools"` - Relationships Relationships `json:"relationships"` - ApproverID interface{} `json:"approver_id"` - UploaderID int64 `json:"uploader_id"` - Description string `json:"description"` - CommentCount int64 `json:"comment_count"` - IsFavorited bool `json:"is_favorited"` - HasNotes bool `json:"has_notes"` - Duration interface{} `json:"duration"` + ID PostID `json:"id" csv:"id"` + CreatedAt string `json:"created_at" csv:"created_at"` + UpdatedAt string `json:"updated_at" csv:"updated_at"` + File File `json:"file" csv:"file"` + Preview Preview `json:"preview" csv:"-"` + Sample Sample `json:"sample" csv:"-"` + Score Score `json:"score" csv:"score"` + Tags Tags `json:"tags" csv:"tag_string"` + LockedTags []interface{} `json:"locked_tags" csv:"locked_tags"` + ChangeSeq int64 `json:"change_seq" csv:"change_seq"` + Flags Flags `json:"flags" csv:"-"` + Rating string `json:"rating" csv:"rating"` + FavCount int64 `json:"fav_count" csv:"fav_count"` + Sources []string `json:"sources" csv:"source"` + Pools []interface{} `json:"pools" csv:"-"` + Relationships Relationships `json:"relationships" csv:"-"` + ApproverID interface{} `json:"approver_id" csv:"approver_id"` + UploaderID int64 `json:"uploader_id" csv:"uploader_id"` + Description string `json:"description" csv:"description"` + CommentCount int64 `json:"comment_count" csv:"comment_count"` + IsFavorited bool `json:"is_favorited" csv:"-"` + HasNotes bool `json:"has_notes" csv:"-"` + Duration interface{} `json:"duration" csv:"duration"` } type File struct { - Width int64 `json:"width"` - Height int64 `json:"height"` - EXT string `json:"ext"` - Size int64 `json:"size"` - Md5 string `json:"md5"` - URL string `json:"url"` + Width int64 `json:"width" csv:"image_width"` + Height int64 `json:"height" csv:"image_height"` + EXT string `json:"ext" csv:"file_ext"` + Size int64 `json:"size" csv:"file_size"` + Md5 string `json:"md5" csv:"md5"` + URL string `json:"url" csv:"-"` } type Flags struct { - Pending bool `json:"pending"` - Flagged bool `json:"flagged"` - NoteLocked bool `json:"note_locked"` - StatusLocked bool `json:"status_locked"` - RatingLocked bool `json:"rating_locked"` - Deleted bool `json:"deleted"` + Pending bool `json:"pending" csv:"is_pending"` + Flagged bool `json:"flagged" csv:"is_flagged"` + NoteLocked bool `json:"note_locked" csv:"is_note_locked"` + StatusLocked bool `json:"status_locked" csv:"is_status_locked"` + RatingLocked bool `json:"rating_locked" csv:"is_rating_locked"` + Deleted bool `json:"deleted" csv:"is_deleted"` } type Preview struct { - Width int64 `json:"width"` - Height int64 `json:"height"` - URL string `json:"url"` + Width int64 `json:"width" csv:"-"` + Height int64 `json:"height" csv:"-"` + URL string `json:"url" csv:"-"` } type Relationships struct { - ParentID interface{} `json:"parent_id"` - HasChildren bool `json:"has_children"` - HasActiveChildren bool `json:"has_active_children"` - Children []interface{} `json:"children"` + ParentID interface{} `json:"parent_id" csv:"parent_id"` + HasChildren bool `json:"has_children" csv:"-"` + HasActiveChildren bool `json:"has_active_children" csv:"-"` + Children []interface{} `json:"children" csv:"-"` } type Sample struct { - Has bool `json:"has"` - Height int64 `json:"height"` - Width int64 `json:"width"` - URL string `json:"url"` - Alternates Alternates `json:"alternates"` + Has bool `json:"has" csv:"-"` + Height int64 `json:"height" csv:"-"` + Width int64 `json:"width" csv:"-"` + URL string `json:"url" csv:"-"` + Alternates Alternates `json:"alternates" csv:"-"` } type Alternates struct { } type Score struct { - Up int64 `json:"up"` - Down int64 `json:"down"` - Total int64 `json:"total"` + Up int64 `json:"up" csv:"up_score"` + Down int64 `json:"down" csv:"down_score"` + Total int64 `json:"total" csv:"-"` } type Tags struct { - General []string `json:"general"` - Artist []string `json:"artist"` - Copyright []string `json:"copyright"` - Character []string `json:"character"` - Species []string `json:"species"` - Invalid []string `json:"invalid"` - Meta []string `json:"meta"` - Lore []string `json:"lore"` + General []string `json:"general" csv:"-"` + Artist []string `json:"artist" csv:"-"` + Copyright []string `json:"copyright" csv:"-"` + Character []string `json:"character" csv:"-"` + Species []string `json:"species" csv:"-"` + Invalid []string `json:"invalid" csv:"-"` + Meta []string `json:"meta" csv:"-"` + Lore []string `json:"lore" csv:"-"` } diff --git a/pkg/e621/utils/helper.go b/pkg/e621/utils/helper.go new file mode 100644 index 0000000..802fc93 --- /dev/null +++ b/pkg/e621/utils/helper.go @@ -0,0 +1,10 @@ +package utils + +func SliceFilter[T any](slice []T, filter func(T) bool) (ret []T) { + for _, s := range slice { + if filter(s) { + ret = append(ret, s) + } + } + return +}