Create job

An endpoint to initiate a job for document processor flows, including Entity Classification, Document Classification, and Entity + Document Classification. Each processor flow is identified by a unique documentProcessorFlowGuid, and the processing is handled by the GenAI provider.

Sync and Async Extraction

The endpoint /ocrsvc/v{ver}/DocumentProcessorFlows/{documentProcessorFlowGuid}/Job is used to run a document processor flow. It accepts file formats such as JPG, JPEG, PNG, and PDF.

The table shows the required values:


Component	Description
APIMethod	/ocrsvc/v{ver}/DocumentProcessorFlows/{documentProcessorFlowGuid}/Job
Input	documentProcessorFlowGuid: Unique document processor flow guid ver: 1 - old extraction method or 2 - new extraction method document: Processes .jpg, .jpeg, .png and .pdf files as input. pageNo: PageNo can be, for example, for pdf with Page size10, values can be 1,2,3 or 1-3 or 1, 3-5 or 1-3, 7-10
Output	Job ID is generated

Response output for ver = 1

{
  "ExtractionData": [
    {
      "FieldName": "Name of the extracted entity",
      "FieldValue": "Extracted text value",
      "FieldValues": "List of Value in case of multiple values scenario",
      "FieldGeometry": [
[Left, Top, Width, Height] (Field Name geometry),
        	[Left, Top, Width, Height] (Field Value geometry)
      ],
      "Confidence": ["Confidence score for each extracted value"],
      "PageNo": ["List of pages where the field was found"],
      "Type": {
        "Value": "Category of the extracted field (e.g., Currency, Buyer Name, etc.)",
        "Confidence": "Confidence score for field classification"
      }
    }
  ],
  "TableDetection": [
    {
      "TableName": "Name of the detected table",
      "TableRows": [
        [
          {
            "ColumnName": "Column name in the table",
            "OCR_text": "Extracted text for this column",
            "Confidence": "Confidence score for the extracted text",
            "Geometry": "Bounding box coordinates for the column",
            "Type": "Category of the column data",
            "PageNo": "Page number where this table is found"
          }
        ]
      ],
      "TableGeometry": "Bounding box coordinates for the table",
      "PageNo": "List of pages where the table was found"
    }
  ],
  "DocumentClassification": {
    "DocumentDetected": "Type of document identified (e.g., Invoice, Purchase Order, etc.)",
    "ConfidenceScore": "Confidence score for document classification",
    "ListOfClass": {
      "0": "Possible document types detected"
    },
    "ConfidenceOfClass": {
      "Document Type": "Confidence score for each detected document type"
    }
  },
  "_metadata": {
    "TotalFields": "Total number of extracted fields",
    "Confidence": "Overall confidence score of the extraction",
    "TaskID": "Unique identifier for the OCR processing job",
    "OcrProvider": "Name of the OCR service provider used",
    "TenantID": "Identifier for the tenant or client using the service",
    "NumberOfPages": "Total number of pages in the document",
    "TotalTables": "Total number of tables detected in the document"
  }
}

Response output for ver = 2

{
	"ExtractionOutput": [
		{
			"EntityExtraction": [
				{
					"FieldName": "Name of the extracted entity",
					"FieldValues": "List of Value in case of multiple values scenario",
					"FieldGeometry": [
						[
							"Left",
							"Top",
							"Width",
							"Height"
						],
						[
							"Left",
							"Top",
							"Width",
							"Height"
						]
					],
					"Confidence": [
						"Confidence score for each extracted value"
					],
					"PageNo": [
						"List of pages where the field was found"
					],
					"Type": {
						"Value": "Category of the extracted field (e.g., Currency, Buyer Name, etc.)",
						"Confidence": "Confidence score for field classification"
					}
				}
			],
			"TableExtraction": [
				{
					"TableName": "Name of the detected table",
					"TableRows": [
						[
							{
								"ColumnName": "Column name in the table",
								"OCR_text": "Extracted text for this column",
								"Confidence": "Confidence score for the extracted text",
								"Geometry": "Bounding box coordinates for the column",
								"Type": "Category of the column data",
								"PageNo": "Page number where this table is found"
							}
						]
					],
					"TableGeometry": "Bounding box coordinates for the table",
					"PageNo": "List of pages where the table was found"
				}
			],
			"DocumentClassification": {
				"DocumentDetected": "Type of document identified (e.g., Invoice, Purchase Order, etc.)",
				"ConfidenceScore": "Confidence score for document classification",
				"ListOfClass": {
					"0": "Possible document types detected"
				},
				"ConfidenceOfClass": {
					"Document Type": "Confidence score for each detected document type"
				},
				"PageNo": "Page number where the detected document class is located",
				"KeyIdentifiers": [
					"List of key identifiers used for classification"
				]
			},
			"_metadata": {
				"TotalFields": "Total number of extracted fields",
				"TotalTables": "Total number of tables detected in the document",
				"Confidence": "Overall confidence score of the extraction",
				"TaskID": "Unique identifier for the OCR processing job",
				"OcrProvider": "Name of the OCR service provider used",
				"TenantID": "TenantID",
				"NumberOfPages": "Total number of pages in the document",
				"Model": "GenAI Model ID, like Cluade 3.7"
			}
		}
	],
	"_metadata": {
		"TotalFields": "Total number of extracted fields",
		"TotalTables": "Total number of tables detected in the document",
		"Confidence": "Overall confidence score of the extraction",
		"TaskID": "Unique identifier for the OCR processing job",
		"OcrProvider": "Name of the OCR service provider used",
		"TenantID": "TenantID",
		"NumberOfPages": "Total number of pages in the document",
		"Model": "GenAI Model ID, like Cluade 3.7"
	}
}