diff --git a/internal/dehashed/dehashed.go b/internal/dehashed/dehashed.go index 6c0e249..bb03500 100644 --- a/internal/dehashed/dehashed.go +++ b/internal/dehashed/dehashed.go @@ -12,24 +12,34 @@ import ( // Dehasher is a struct for querying the Dehashed API type Dehasher struct { - options sqlite.QueryOptions - nextPage int - debug bool - balance int - request *DehashedSearchRequest - client *DehashedClientV2 + options sqlite.QueryOptions + nextPage int + debug bool + balance int + request *DehashedSearchRequest + client *DehashedClientV2 + queryPlan []struct{ Page, Size int } } // NewDehasher creates a new Dehasher func NewDehasher(options *sqlite.QueryOptions) *Dehasher { dh := &Dehasher{ - options: *options, - nextPage: options.StartingPage + 1, - debug: options.Debug, - balance: 0, + options: *options, + nextPage: options.StartingPage + 1, + debug: options.Debug, + balance: 0, + queryPlan: make([]struct{ Page, Size int }, 0), } dh.setQueries() - dh.request = NewDehashedSearchRequest(dh.options.StartingPage, dh.options.MaxRecords, dh.options.WildcardMatch, dh.options.RegexMatch, false, options.Debug) + dh.request = NewDehashedSearchRequest( + dh.queryPlan[0].Page, + dh.queryPlan[0].Size, + dh.options.WildcardMatch, + dh.options.RegexMatch, + false, + options.Debug, + ) + dh.buildRequest() return dh } @@ -48,121 +58,172 @@ func (dh *Dehasher) getNextPage() int { return nextPage } +// generatePagination creates a list of (page, size) tuples such that page * size <= 10000 +func generatePagination(maxRecords int) []struct{ Page, Size int } { + const maxPageProduct = 9500 + var queries []struct{ Page, Size int } + + remaining := maxRecords + page := 1 + + for remaining > 0 { + size := (maxPageProduct - 1) / page // guarantees page * size < 10000 + if size > remaining { + size = remaining + } + queries = append(queries, struct{ Page, Size int }{page, size}) + remaining -= size + page++ + } + + return queries +} + // setQueries sets the number of queries to make based on the number of records and requests func (dh *Dehasher) setQueries() { - var numQueries int - - if dh.debug { - debug.PrintInfo("setting queries") + if dh.options.MaxRecords <= 0 { + dh.options.MaxRecords = 10000 } - switch { - case dh.options.MaxRequests == 0: - zap.L().Error("max requests cannot be zero") - fmt.Println("[!] Max Requests cannot be zero") - os.Exit(1) - case dh.options.MaxRecords <= 10000 || dh.options.MaxRequests == 1: - numQueries = 1 - if dh.options.MaxRecords > 10000 { - dh.options.MaxRecords = 10000 + dh.queryPlan = generatePagination(dh.options.MaxRecords) + + fmt.Printf("Making %d requests to retrieve %d records\n", len(dh.queryPlan), dh.options.MaxRecords) + + if dh.debug { + for i, q := range dh.queryPlan { + debug.PrintInfo(fmt.Sprintf("query %d: page=%d, size=%d", i+1, q.Page, q.Size)) } - zap.L().Info("max requests set to 1", zap.Int("max_records", dh.options.MaxRecords)) - case dh.options.MaxRequests < 0 && dh.options.MaxRecords > 20000: - numQueries = 3 - dh.options.MaxRecords = 10000 - zap.L().Info("max requests set to 3", zap.Int("max_records", dh.options.MaxRecords)) - case dh.options.MaxRequests < 0 && dh.options.MaxRecords > 10000: - numQueries = 2 - dh.options.MaxRecords = 10000 - zap.L().Info("max requests set to 2", zap.Int("max_records", dh.options.MaxRecords)) - case dh.options.MaxRecords < 0 && dh.options.MaxRecords < 10000: - numQueries = 1 - zap.L().Info("max requests set to 1", zap.Int("max_records", dh.options.MaxRecords)) - case dh.options.MaxRequests == 2 && dh.options.MaxRecords > 20000: - numQueries = 2 - dh.options.MaxRecords = 10000 - zap.L().Info("max requests set to 2", zap.Int("max_records", dh.options.MaxRecords)) - case dh.options.MaxRequests == 2 && dh.options.MaxRecords <= 10000: - numQueries = 1 - zap.L().Info("max requests set to 1", zap.Int("max_records", dh.options.MaxRecords)) - default: - numQueries = 3 - dh.options.MaxRecords = 10000 - zap.L().Info("max requests set to 3", zap.Int("max_records", dh.options.MaxRecords)) } - - dh.options.MaxRequests = numQueries - - if dh.debug { - debug.PrintInfo(fmt.Sprintf("setting max requests: %d", numQueries)) - debug.PrintInfo(fmt.Sprintf("setting max records: %d", dh.options.MaxRecords)) - } - - fmt.Printf("Making %d Requests for %d Records (%d Total)\n", dh.options.MaxRequests, dh.options.MaxRecords, dh.options.MaxRequests*dh.options.MaxRecords) } // Start starts the querying process func (dh *Dehasher) Start() { fmt.Printf("[*] Querying Dehashed API...\n") - for i := 0; i < dh.options.MaxRequests; i++ { - fmt.Printf(" [*] Performing Request...\n") - count, balance, err := dh.client.Search(*dh.request) - if err != nil { - if dh.debug { - debug.PrintInfo("error performing request") - debug.PrintError(err) - } - // Check if it's a DehashError - if dhErr, ok := err.(*DehashError); ok { - fmt.Printf(" [!] Dehashed API Error: %s (Code: %d)\n", dhErr.Message, dhErr.Code) - zap.L().Error("dehashed_api_error", - zap.String("message", dhErr.Message), - zap.Int("code", dhErr.Code), - ) - } else { - fmt.Printf(" [!] Error performing request: %v\n", err) - zap.L().Error("request_error", - zap.String("message", "failed to perform request"), - zap.Error(err), - ) - } + // Make initial request to get total count + fmt.Printf(" [*] Performing initial request to determine total records...\n") + totalRecords, balance, err := dh.client.Search(*dh.request) + if err != nil { + handleSearchError(dh, err) + return + } - if len(dh.client.results) > 0 { - fmt.Printf(" [!] Partial results retrieved. Storing Results...\n") - err := sqlite.StoreResults(dh.client.GetResults()) - if err != nil { - zap.L().Error("store_results", - zap.String("message", "failed to store results"), - zap.Error(err), - ) - fmt.Printf(" [!] Error storing results: %v\n", err) - } - } + dh.balance = balance + recordsRetrieved := len(dh.client.results) + + fmt.Printf(" [+] Retrieved %d records\n", recordsRetrieved) + fmt.Printf(" [*] Total available records: %d\n", totalRecords) + + if dh.options.PrintBalance { + fmt.Printf(" [*] Balance: %d\n", balance) + } + + // If we've already got all records or reached our limit, we're done + if recordsRetrieved >= totalRecords || recordsRetrieved >= dh.options.MaxRecords { + fmt.Printf(" [*] All requested records retrieved\n") + dh.parseResults() + return + } + + // Calculate remaining records to fetch + remainingRecords := totalRecords - recordsRetrieved + if dh.options.MaxRecords > 0 && dh.options.MaxRecords < totalRecords { + remainingRecords = dh.options.MaxRecords - recordsRetrieved + } + + // Check if we need user confirmation for large datasets + if remainingRecords > 30000 { + tokensRequired := (remainingRecords + 9999) / 10000 // Ceiling division + fmt.Printf("\n[!] Large dataset detected: %d additional records\n", remainingRecords) + fmt.Printf("[!] This will require approximately %d API tokens\n", tokensRequired) + fmt.Printf("[!] Your current balance: %d\n", balance) + + if balance < tokensRequired { + fmt.Printf("[!] WARNING: Your balance (%d) is less than required tokens (%d)\n", balance, tokensRequired) + } + + fmt.Printf("[?] Do you want to continue? (y/n): ") + var response string + fmt.Scanln(&response) + + if response != "y" && response != "Y" { + fmt.Println("[*] Operation cancelled by user") dh.parseResults() - os.Exit(-1) + return + } + } + + // Make additional requests + for i, q := range dh.queryPlan { + if i == 0 { + // We already made the first request before this loop + continue + } + + dh.request.Page = q.Page + dh.request.Size = q.Size + + fmt.Printf(" [*] Performing Request %d of %d (page=%d, size=%d)...\n", i+1, len(dh.queryPlan), q.Page, q.Size) + + _, balance, err := dh.client.Search(*dh.request) + if err != nil { + handleSearchError(dh, err) + break } dh.balance = balance + recordsRetrieved += len(dh.client.results) - if count < dh.options.MaxRecords { - fmt.Printf(" [+] Retrieved %d records\n", count) - fmt.Printf(" [-] Not enough entries, ending queries\n") - break - } else { - fmt.Printf(" [+] Retrieved %d records\n", dh.options.MaxRecords) - } + fmt.Printf(" [+] Retrieved %d total records so far\n", recordsRetrieved) if dh.options.PrintBalance { fmt.Printf(" [*] Balance: %d\n", balance) } - dh.request.Page = dh.getNextPage() + if recordsRetrieved >= totalRecords || recordsRetrieved >= dh.options.MaxRecords { + fmt.Printf(" [*] All requested records retrieved\n") + break + } } dh.parseResults() } +// Helper function to handle search errors +func handleSearchError(dh *Dehasher, err error) { + if dh.debug { + debug.PrintInfo("error performing request") + debug.PrintError(err) + } + + // Check if it's a DehashError + if dhErr, ok := err.(*DehashError); ok { + fmt.Printf(" [!] Dehashed API Error: %s (Code: %d)\n", dhErr.Message, dhErr.Code) + zap.L().Error("dehashed_api_error", + zap.String("message", dhErr.Message), + zap.Int("code", dhErr.Code), + ) + } else { + fmt.Printf(" [!] Error performing request: %v\n", err) + zap.L().Error("request_error", + zap.String("message", "failed to perform request"), + zap.Error(err), + ) + } + + if len(dh.client.results) > 0 { + fmt.Printf(" [!] Partial results retrieved. Storing Results...\n") + err := sqlite.StoreResults(dh.client.GetResults()) + if err != nil { + zap.L().Error("store_results", + zap.String("message", "failed to store results"), + zap.Error(err), + ) + fmt.Printf(" [!] Error storing results: %v\n", err) + } + } +} + // buildRequest constructs the query map func (dh *Dehasher) buildRequest() { if len(dh.options.UsernameQuery) > 0 {