Skip to main content

Document Processing

The SDK provides functionality for processing documents. This section covers the available methods for document processing, fully aligned with the Nanonets API.

Document Schema

The document object returned by the API contains the following fields:

type Document struct {
DocumentID string `json:"document_id"`
Status string `json:"status"` // "success", "pending", "failure"
UploadedAt string `json:"uploaded_at"` // ISO 8601 timestamp
Metadata interface{} `json:"metadata"` // Optional metadata attached during upload
OriginalDocumentName string `json:"original_document_name"` // Original filename or URL
RawDocumentURL string `json:"raw_document_url"` // URL to access original document
VerificationStatus string `json:"verification_status"` // "success", "failed"
VerificationStage string `json:"verification_stage"` // stage_id where document is flagged
VerificationMessage string `json:"verification_message"` // Optional message explaining failure
AssignedReviewers []string `json:"assigned_reviewers"` // List of reviewer email addresses
Pages []Page `json:"pages"`
}

type Page struct {
PageID string `json:"page_id"`
PageNumber int `json:"page_number"`
ImageURL string `json:"image_url"` // URL to access page image
Data PageData `json:"data"`
}

type PageData struct {
Fields map[string][]FieldData `json:"fields"`
Tables []Table `json:"tables"`
}

type FieldData struct {
FieldDataID string `json:"field_data_id"`
Value string `json:"value"`
Confidence float64 `json:"confidence"`
Bbox []float64 `json:"bbox"`
VerificationStatus string `json:"verification_status"`
VerificationMessage string `json:"verification_message"`
IsModerated bool `json:"is_moderated"`
}

type Table struct {
TableID string `json:"table_id"`
Bbox []float64 `json:"bbox"`
Cells []TableCell `json:"cells"`
}

type TableCell struct {
CellID string `json:"cell_id"`
Row int `json:"row"`
Col int `json:"col"`
Header string `json:"header"`
Text string `json:"text"`
Bbox []float64 `json:"bbox"`
VerificationStatus string `json:"verification_status"`
VerificationMessage string `json:"verification_message"`
IsModerated bool `json:"is_moderated"`
}

Upload Document

Uploads a document for processing in a specific workflow. Supports both file and URL upload, with async and metadata options.

import "github.com/NanoNets/nanonets-go/nanonets"

client := nanonets.NewClient("your_api_key")

// Upload document from file
result, err := client.Documents.Upload("workflow_123", nanonets.UploadDocumentRequest{
File: "path/to/document.pdf",
Async: false, // Set to true for asynchronous processing
Metadata: map[string]string{
"customer_id": "12345",
"document_type": "invoice",
"department": "finance",
},
})
if err != nil {
// Handle error
}

// Upload document from URL
result, err := client.Documents.UploadFromURL("workflow_123", nanonets.UploadDocumentFromURLRequest{
URL: "https://example.com/invoice.pdf",
Async: false,
Metadata: map[string]string{
"customer_id": "12345",
"document_type": "invoice",
"department": "finance",
},
})
if err != nil {
// Handle error
}

Get Document Status

Retrieves the current processing status and results of a specific document.

document, err := client.Documents.Get("workflow_123", "document_123")
if err != nil {
// Handle error
}

List Documents

Retrieves a list of all documents in a specific workflow.

// List all documents
documents, err := client.Documents.List("workflow_123")
if err != nil {
// Handle error
}

// List documents with pagination
documents, err := client.Documents.ListWithPagination("workflow_123", 1, 10) // page 1, 10 per page
if err != nil {
// Handle error
}

Delete Document

Removes a document from the workflow.

err := client.Documents.Delete("workflow_123", "document_123")
if err != nil {
// Handle error
}

Get Document Fields

Retrieves all extracted fields from a specific document.

fields, err := client.Documents.GetFields("workflow_123", "document_123")
if err != nil {
// Handle error
}

Get Document Tables

Retrieves all extracted tables from a specific document.

tables, err := client.Documents.GetTables("workflow_123", "document_123")
if err != nil {
// Handle error
}

Get Document Original File

Downloads the original document file.

fileBytes, err := client.Documents.GetOriginalFile("workflow_123", "document_123")
if err != nil {
// Handle error
}

Error Handling & Common Scenarios

API error codes:

  • 200 OK: Request successful
  • 201 Created: Document uploaded successfully
  • 400 Bad Request: Invalid request parameters or unsupported file type
  • 401 Unauthorized: Invalid/missing API key
  • 404 Not Found: Workflow or document not found
  • 413 Payload Too Large: File size exceeds limit
  • 500 Internal Server Error: Server-side error

Common error scenarios:

  • File upload issues (unsupported type, too large, corrupted)
  • Processing errors (timeout, unreadable content, failure)
  • Field/table header issues (invalid/duplicate names)
result, err := client.Documents.Upload(...)
if err != nil {
switch {
case strings.Contains(err.Error(), "401"):
fmt.Println("Authentication failed:", err)
case strings.Contains(err.Error(), "400"):
fmt.Println("Invalid input:", err)
default:
fmt.Println("An error occurred:", err)
}
}

Best Practices

  1. Handle Async Processing

    // For async uploads, implement polling
    func waitForProcessing(client *nanonets.Client, workflowID, documentID string) (*nanonets.Document, error) {
    for i := 0; i < 30; i++ { // 30 attempts
    doc, err := client.Documents.Get(workflowID, documentID)
    if err != nil {
    return nil, err
    }
    if doc.Status == "success" {
    return doc, nil
    }
    if doc.Status == "failure" {
    return nil, fmt.Errorf("processing failed: %s", doc.VerificationMessage)
    }
    time.Sleep(2 * time.Second)
    }
    return nil, fmt.Errorf("timeout waiting for processing")
    }
  2. Batch Processing

    func processBatch(client *nanonets.Client, workflowID string, files []string) error {
    for _, file := range files {
    _, err := client.Documents.Upload(workflowID, nanonets.UploadDocumentRequest{
    File: file,
    Async: true,
    })
    if err != nil {
    return fmt.Errorf("failed to upload %s: %v", file, err)
    }
    }
    return nil
    }
  3. Error Recovery

    func uploadWithRetry(client *nanonets.Client, workflowID, file string) (*nanonets.Document, error) {
    var result *nanonets.Document
    var err error
    for i := 0; i < 3; i++ { // 3 retries
    result, err = client.Documents.Upload(workflowID, nanonets.UploadDocumentRequest{
    File: file,
    Async: false,
    })
    if err == nil {
    return result, nil
    }
    time.Sleep(time.Second * time.Duration(i+1))
    }
    return nil, err
    }

Setup

Minimum Go version required: 1.18

Install the Nanonets Go SDK:

go get github.com/NanoNets/nanonets-go/nanonets
import (
"github.com/NanoNets/nanonets-go/nanonets"
)

client := nanonets.NewClient("your_api_key")
// ... update all method calls to use client.Workflows, client.Documents, etc. ...