assemblyai

package module

v1.10.0 Latest Latest Go to latest Published: Jan 30, 2025 License: MIT Imports: 18 Imported by: 31

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/AssemblyAI/assemblyai-go-sdk

Links

Open Source Insights

README ¶

AssemblyAI Go SDK

A Go client library for accessing AssemblyAI.

Overview

AssemblyAI Go SDK

Documentation

Visit our AssemblyAI API Documentation to get an overview of our models!

See the reference docs at pkg.go.dev.

Quickstart

Installation

go get github.com/AssemblyAI/assemblyai-go-sdk

Examples

Before you begin, you need to have your API key. If you don't have one yet, sign up for one!

Core Transcription

Transcribe an audio file from URL

package main

import (
    "context"
    "log"
    "os"

    "github.com/AssemblyAI/assemblyai-go-sdk"
)

func main() {
    apiKey := os.Getenv("ASSEMBLYAI_API_KEY")

    ctx := context.Background()

    audioURL := "https://example.org/audio.mp3"

    client := assemblyai.NewClient(apiKey)

    transcript, err := client.Transcripts.TranscribeFromURL(ctx, audioURL, nil)
    if err != nil {
        log.Fatal("Something bad happened:", err)
    }

    log.Println(*transcript.Text)
}

Transcribe a local audio file

package main

import (
    "context"
    "log"
    "os"

    "github.com/AssemblyAI/assemblyai-go-sdk"
)

func main() {
    apiKey := os.Getenv("ASSEMBLYAI_API_KEY")

    ctx := context.Background()

    client := assemblyai.NewClient(apiKey)

    f, err := os.Open("./my-local-audio-file.wav")
    if err != nil {
        log.Fatal("Couldn't open audio file:", err)
    }
    defer f.Close()

    transcript, err := client.Transcripts.TranscribeFromReader(ctx, f, nil)
    if err != nil {
        log.Fatal("Something bad happened:", err)
    }

    log.Println(*transcript.Text)
}

Audio Intelligence

Identify entities in a transcript

package main

import (
    "context"
    "log"
    "os"

    "github.com/AssemblyAI/assemblyai-go-sdk"
)

func main() {
    apiKey := os.Getenv("ASSEMBLYAI_API_KEY")

    ctx := context.Background()

    audioURL := "https://example.org/audio.mp3"

    client := assemblyai.NewClient(apiKey)

    opts := &assemblyai.TranscriptParams{
        EntityDetection: assemblyai.Bool(true),
    }

    transcript, err := client.Transcripts.TranscribeFromURL(ctx, audioURL, opts)
    if err != nil {
        log.Fatal("Something bad happened:", err)
    }

    for _, entity := range transcript.Entities {
        log.Println(*entity.Text)
        log.Println(entity.EntityType)
        log.Printf("Timestamp: %v - %v", *entity.Start, *entity.End)
    }
}

Real-Time Transcription

Check out the realtime example.

Playgrounds

Visit one of our Playgrounds:

Tips and tricks

Inspect API errors

If you receive an API error, you can inspect the HTTP response returned by the API for more details:

transcript, err := client.Transcripts.TranscribeFromURL(ctx, audioURL, nil)
if err != nil {
    var apierr aai.APIError
    if errors.As(err, &apierr) {
        // apierr.Response is the *http.Response from the API call.
        fmt.Println(apierr.Response.StatusCode)
    } else {
        // err is not an API error.
    }
}

Documentation ¶

Index ¶

Variables
func Bool(v bool) *bool
func Float64(v float64) *float64
func Int64(v int64) *int64
func String(v string) *string
func ToBool(p *bool) (v bool)
func ToFloat64(p *float64) (v float64)
func ToInt64(p *int64) (v int64)
func ToString(p *string) (v string)
type APIError
- func (e APIError) Error() string
type AudioData
type AudioIntelligenceModelStatus
type AutoHighlightResult
type AutoHighlightsResult
type Chapter
type Client
- func NewClient(apiKey string) *Client
- func NewClientWithOptions(opts ...ClientOption) *Client
- func (c *Client) Upload(ctx context.Context, data io.Reader) (string, error)
type ClientOption
- func WithAPIKey(key string) ClientOption
- func WithBaseURL(rawurl string) ClientOption
- func WithHTTPClient(httpClient *http.Client) ClientOption
- func WithUserAgent(userAgent string) ClientOption
type ContentSafetyLabel
type ContentSafetyLabelResult
type ContentSafetyLabelsResult
type CreateRealtimeTemporaryTokenParams
type Entity
type EntityType
type Error
type FinalTranscript
type LeMURActionItemsParams
type LeMURActionItemsResponse
type LeMURBaseParams
type LeMURBaseResponse
type LeMURModel
type LeMURQuestion
type LeMURQuestionAnswer
type LeMURQuestionAnswerParams
type LeMURQuestionAnswerResponse
type LeMURService
- func (s *LeMURService) ActionItems(ctx context.Context, params LeMURActionItemsParams) (LeMURActionItemsResponse, error)
- func (s *LeMURService) GetResponseData(ctx context.Context, requestID string, response interface{}) error
- func (s *LeMURService) PurgeRequestData(ctx context.Context, requestID string) (PurgeLeMURRequestDataResponse, error)
- func (s *LeMURService) Question(ctx context.Context, params LeMURQuestionAnswerParams) (LeMURQuestionAnswerResponse, error)
- func (s *LeMURService) Summarize(ctx context.Context, params LeMURSummaryParams) (LeMURSummaryResponse, error)
- func (s *LeMURService) Task(ctx context.Context, params LeMURTaskParams) (LeMURTaskResponse, error)
type LeMURSummaryParams
type LeMURSummaryResponse
type LeMURTaskParams
type LeMURTaskResponse
type LeMURUsage
type ListTranscriptParams
type MessageType
type PIIPolicy
type PageDetails
type ParagraphsResponse
type PartialTranscript
type PurgeLeMURRequestDataResponse
type RealTimeBaseMessage
type RealTimeBaseTranscript
type RealTimeClient
- func NewRealTimeClient(apiKey string, handler RealTimeHandler) *RealTimeClient
- func NewRealTimeClientWithOptions(options ...RealTimeClientOption) *RealTimeClient
- func (c *RealTimeClient) Connect(ctx context.Context) error
- func (c *RealTimeClient) Disconnect(ctx context.Context, waitForSessionTermination bool) error
- func (c *RealTimeClient) ForceEndUtterance(ctx context.Context) error
- func (c *RealTimeClient) Send(ctx context.Context, samples []byte) error
- func (c *RealTimeClient) SetEndUtteranceSilenceThreshold(ctx context.Context, threshold int64) error
type RealTimeClientOption
- func WithHandler(handler RealTimeHandler) RealTimeClientOptiondeprecated
- func WithRealTimeAPIKey(apiKey string) RealTimeClientOption
- func WithRealTimeAuthToken(token string) RealTimeClientOption
- func WithRealTimeBaseURL(rawurl string) RealTimeClientOption
- func WithRealTimeEncoding(encoding RealTimeEncoding) RealTimeClientOption
- func WithRealTimeSampleRate(sampleRate int) RealTimeClientOption
- func WithRealTimeTranscriber(transcriber *RealTimeTranscriber) RealTimeClientOption
- func WithRealTimeWordBoost(wordBoost []string) RealTimeClientOption
type RealTimeEncoding
type RealTimeError
type RealTimeHandler
type RealTimeService
- func (svc *RealTimeService) CreateTemporaryToken(ctx context.Context, expiresIn int64) (*RealtimeTemporaryTokenResponse, error)
type RealTimeTranscriber
type RealtimeTemporaryTokenResponse
type RedactPIIAudioQuality
type RedactedAudioNotification
type RedactedAudioResponse
type RedactedAudioStatus
type SentencesResponse
type Sentiment
type SentimentAnalysisResult
type SessionBegins
type SessionInformation
type SessionTerminated
type SeverityScoreSummary
type SpeechModel
type SubstitutionPolicy
type SubtitleFormat
type SummaryModel
type SummaryType
type TerminateSession
type Timestamp
type TopicDetectionModelResult
type TopicDetectionResult
type Transcript
type TranscriptBoostParam
type TranscriptCustomSpelling
type TranscriptGetSubtitlesOptions
type TranscriptLanguageCode
type TranscriptList
type TranscriptListItem
type TranscriptOptionalParams
type TranscriptParagraph
type TranscriptParams
type TranscriptReadyNotification
type TranscriptReadyStatus
type TranscriptSentence
type TranscriptService
- func (s *TranscriptService) Delete(ctx context.Context, transcriptID string) (Transcript, error)
- func (s *TranscriptService) Get(ctx context.Context, transcriptID string) (Transcript, error)
- func (s *TranscriptService) GetParagraphs(ctx context.Context, transcriptID string) (ParagraphsResponse, error)
- func (s *TranscriptService) GetRedactedAudio(ctx context.Context, transcriptID string) (RedactedAudioResponse, error)
- func (s *TranscriptService) GetSentences(ctx context.Context, transcriptID string) (SentencesResponse, error)
- func (s *TranscriptService) GetSubtitles(ctx context.Context, transcriptID string, format SubtitleFormat, ...) ([]byte, error)
- func (s *TranscriptService) List(ctx context.Context, options ListTranscriptParams) (TranscriptList, error)
- func (s *TranscriptService) SubmitFromReader(ctx context.Context, reader io.Reader, params *TranscriptOptionalParams) (Transcript, error)
- func (s *TranscriptService) SubmitFromURL(ctx context.Context, audioURL string, opts *TranscriptOptionalParams) (Transcript, error)
- func (s *TranscriptService) TranscribeFromReader(ctx context.Context, reader io.Reader, opts *TranscriptOptionalParams) (Transcript, error)
- func (s *TranscriptService) TranscribeFromURL(ctx context.Context, audioURL string, opts *TranscriptOptionalParams) (Transcript, error)
- func (s *TranscriptService) Wait(ctx context.Context, transcriptID string) (Transcript, error)
- func (s *TranscriptService) WordSearch(ctx context.Context, transcriptID string, words []string) (WordSearchResponse, error)
type TranscriptStatus
type TranscriptUtterance
type TranscriptWebhookNotification
type TranscriptWord
type UploadedFile
type Word
type WordSearchMatch
type WordSearchResponse
type WordSearchTimestamp

Constants ¶

This section is empty.

Variables ¶

View Source

var (
	// ErrSessionClosed is returned when attempting to write to a closed
	// session.
	ErrSessionClosed = errors.New("session closed")

	// ErrDisconnected is returned when attempting to write to a disconnected
	// client.
	ErrDisconnected = errors.New("client is disconnected")

	// ErrConnectionNotFound is returned when attempting to disconnect a
	// nil connection
	ErrConnectionNotFound = errors.New("client connection does not exist")
)

View Source

var DefaultSampleRate = 16_000

Functions ¶

func Bool ¶

func Bool(v bool) *bool

func Float64 ¶

func Float64(v float64) *float64

func Int64 ¶

func Int64(v int64) *int64

func String ¶

func String(v string) *string

func ToBool ¶

func ToBool(p *bool) (v bool)

func ToFloat64 ¶

func ToFloat64(p *float64) (v float64)

func ToInt64 ¶

func ToInt64(p *int64) (v int64)

func ToString ¶

func ToString(p *string) (v string)

Types ¶

type APIError ¶

type APIError struct {
	Status  int    `json:"-"`
	Message string `json:"error"`

	Response *http.Response `json:"-"`
}

APIError represents an error returned by the AssemblyAI API.

func (APIError) Error ¶

func (e APIError) Error() string

Error returns the API error message.

type AudioData ¶

type AudioData struct {
	// Base64 encoded raw audio data
	AudioData string `json:"audio_data,omitempty"`
}

type AudioIntelligenceModelStatus ¶

type AudioIntelligenceModelStatus string

Either success, or unavailable in the rare case that the model failed

type AutoHighlightResult ¶

type AutoHighlightResult struct {
	// The total number of times the key phrase appears in the audio file
	Count *int64 `json:"count,omitempty"`

	// The total relevancy to the overall audio file of this key phrase - a greater number means more relevant
	Rank *float64 `json:"rank,omitempty"`

	// The text itself of the key phrase
	Text *string `json:"text,omitempty"`

	// The timestamp of the of the key phrase
	Timestamps []Timestamp `json:"timestamps,omitempty"`
}

type AutoHighlightsResult ¶

type AutoHighlightsResult struct {
	// A temporally-sequential array of Key Phrases
	Results []AutoHighlightResult `json:"results,omitempty"`

	// The status of the Key Phrases model. Either success, or unavailable in the rare case that the model failed.
	Status AudioIntelligenceModelStatus `json:"status,omitempty"`
}

An array of results for the Key Phrases model, if it is enabled. See [Key phrases](https://www.assemblyai.com/docs/models/key-phrases) for more information.

type Chapter ¶

type Chapter struct {
	// The starting time, in milliseconds, for the chapter
	End *int64 `json:"end,omitempty"`

	// An ultra-short summary (just a few words) of the content spoken in the chapter
	Gist *string `json:"gist,omitempty"`

	// A single sentence summary of the content spoken during the chapter
	Headline *string `json:"headline,omitempty"`

	// The starting time, in milliseconds, for the chapter
	Start *int64 `json:"start,omitempty"`

	// A one paragraph summary of the content spoken during the chapter
	Summary *string `json:"summary,omitempty"`
}

Chapter of the audio file

type Client ¶

type Client struct {
	Transcripts *TranscriptService
	LeMUR       *LeMURService
	RealTime    *RealTimeService
	// contains filtered or unexported fields
}

Client manages the communication with the AssemblyAI API.

func NewClient ¶

func NewClient(apiKey string) *Client

NewClient returns a new authenticated AssemblyAI client.

func NewClientWithOptions ¶

func NewClientWithOptions(opts ...ClientOption) *Client

NewClientWithOptions returns a new configurable AssemblyAI client. If you provide client options, they override the default values. Most users will want to use [NewClientWithAPIKey].

func (*Client) Upload ¶

func (c *Client) Upload(ctx context.Context, data io.Reader) (string, error)

Uploads an audio file to AssemblyAI's servers and returns the new URL.

The uploaded file can only be accessed from AssemblyAI's servers. You can use the URL to transcript the file, but you can't download it.

https://www.assemblyai.com/docs/API%20reference/upload

type ClientOption ¶

type ClientOption func(*Client)

ClientOption lets you configure the AssemblyAI client.

func WithAPIKey ¶

func WithAPIKey(key string) ClientOption

WithAPIKey sets the API key used for authentication.

func WithBaseURL ¶

func WithBaseURL(rawurl string) ClientOption

WithBaseURL sets the API endpoint used by the client. Mainly used for testing.

func WithHTTPClient ¶

func WithHTTPClient(httpClient *http.Client) ClientOption

WithHTTPClient sets the http.Client used for making requests to the API.

func WithUserAgent ¶

func WithUserAgent(userAgent string) ClientOption

WithUserAgent sets the user agent used by the client.

type ContentSafetyLabel ¶

type ContentSafetyLabel struct {
	// The confidence score for the topic being discussed, from 0 to 1
	Confidence *float64 `json:"confidence,omitempty"`

	// The label of the sensitive topic
	Label *string `json:"label,omitempty"`

	// How severely the topic is discussed in the section, from 0 to 1
	Severity *float64 `json:"severity,omitempty"`
}

type ContentSafetyLabelResult ¶

type ContentSafetyLabelResult struct {
	// An array of safety labels, one per sensitive topic that was detected in the section
	Labels []ContentSafetyLabel `json:"labels,omitempty"`

	// The sentence index at which the section ends
	SentencesIDxEnd *int64 `json:"sentences_idx_end,omitempty"`

	// The sentence index at which the section begins
	SentencesIDxStart *int64 `json:"sentences_idx_start,omitempty"`

	// The transcript of the section flagged by the Content Moderation model
	Text *string `json:"text,omitempty"`

	// Timestamp information for the section
	Timestamp Timestamp `json:"timestamp,omitempty"`
}

type ContentSafetyLabelsResult ¶

type ContentSafetyLabelsResult struct {
	// An array of results for the Content Moderation model
	Results []ContentSafetyLabelResult `json:"results,omitempty"`

	// A summary of the Content Moderation severity results for the entire audio file
	SeverityScoreSummary map[string]SeverityScoreSummary `json:"severity_score_summary,omitempty"`

	// The status of the Content Moderation model. Either success, or unavailable in the rare case that the model failed.
	Status AudioIntelligenceModelStatus `json:"status,omitempty"`

	// A summary of the Content Moderation confidence results for the entire audio file
	Summary map[string]float64 `json:"summary,omitempty"`
}

An array of results for the Content Moderation model, if it is enabled. See [Content moderation](https://www.assemblyai.com/docs/models/content-moderation) for more information.

type CreateRealtimeTemporaryTokenParams ¶

type CreateRealtimeTemporaryTokenParams struct {
	// The amount of time until the token expires in seconds
	ExpiresIn *int64 `json:"expires_in,omitempty"`
}

type Entity ¶

type Entity struct {
	// The ending time, in milliseconds, for the detected entity in the audio file
	End *int64 `json:"end,omitempty"`

	// The type of entity for the detected entity
	EntityType EntityType `json:"entity_type,omitempty"`

	// The starting time, in milliseconds, at which the detected entity appears in the audio file
	Start *int64 `json:"start,omitempty"`

	// The text for the detected entity
	Text *string `json:"text,omitempty"`
}

A detected entity

type EntityType ¶

type EntityType string

The type of entity for the detected entity

type Error ¶

type Error struct {
	// Error message
	Error *string `json:"error,omitempty"`

	Status *string `json:"status,omitempty"`
}

type FinalTranscript ¶

type FinalTranscript struct {
	RealTimeBaseTranscript

	// Describes the type of message
	MessageType MessageType `json:"message_type"`

	// Whether the text is punctuated and cased
	Punctuated bool `json:"punctuated"`

	// Whether the text is formatted, for example Dollar -> $
	TextFormatted bool `json:"text_formatted"`
}

type LeMURActionItemsParams ¶

type LeMURActionItemsParams struct {
	LeMURBaseParams
	// How you want the action items to be returned. This can be any text.
	// Defaults to "Bullet Points".
	AnswerFormat *string `json:"answer_format,omitempty"`
}

type LeMURActionItemsResponse ¶

type LeMURActionItemsResponse struct {
	LeMURBaseResponse
	// The response generated by LeMUR
	Response *string `json:"response,omitempty"`
}

type LeMURBaseParams ¶

type LeMURBaseParams struct {
	// Context to provide the model. This can be a string or a free-form JSON value.
	Context interface{} `json:"context,omitempty"`

	// The model that is used for the final prompt after compression is performed.
	FinalModel LeMURModel `json:"final_model,omitempty"`

	// Custom formatted transcript data. Maximum size is the context limit of the selected model, which defaults to 100000.
	// Use either transcript_ids or input_text as input into LeMUR.
	InputText *string `json:"input_text,omitempty"`

	// Max output size in tokens, up to 4000
	MaxOutputSize *int64 `json:"max_output_size,omitempty"`

	// The temperature to use for the model.
	// Higher values result in answers that are more creative, lower values are more conservative.
	// Can be any value between 0.0 and 1.0 inclusive.
	Temperature *float64 `json:"temperature,omitempty"`

	// A list of completed transcripts with text. Up to a maximum of 100 files or 100 hours, whichever is lower.
	// Use either transcript_ids or input_text as input into LeMUR.
	TranscriptIDs []string `json:"transcript_ids,omitempty"`
}

type LeMURBaseResponse ¶

type LeMURBaseResponse struct {
	// The ID of the LeMUR request
	RequestID *string `json:"request_id,omitempty"`

	// The usage numbers for the LeMUR request
	Usage LeMURUsage `json:"usage,omitempty"`
}

type LeMURModel ¶

type LeMURModel string

The model that is used for the final prompt after compression is performed.

const (
	// Claude 3.5 Sonnet is the most intelligent model to date, outperforming
	// Claude 3 Opus on a wide range of evaluations, with the speed and cost of
	// Claude 3 Sonnet.
	LeMURModelAnthropicClaude3_5_Sonnet LeMURModel = "anthropic/claude-3-5-sonnet"

	// Claude 3 Opus is good at handling complex analysis, longer tasks with
	// many steps, and higher-order math and coding tasks.
	LeMURModelAnthropicClaude3_Opus LeMURModel = "anthropic/claude-3-opus"

	// Claude 3 Haiku is the fastest model that can execute lightweight actions.
	LeMURModelAnthropicClaude3_Haiku LeMURModel = "anthropic/claude-3-haiku"

	// Claude 3 Sonnet is a legacy model with a balanced combination of
	// performance and speed for efficient, high-throughput tasks.
	LeMURModelAnthropicClaude3_Sonnet LeMURModel = "anthropic/claude-3-sonnet"

	// Claude 2.1 is a legacy model similar to Claude 2.0. The key difference is
	// that it minimizes model hallucination and system prompts, has a larger
	// context window, and performs better in citations.
	//
	// Deprecated: Switch to Claude 3 by February 6th, 2025.
	LeMURModelAnthropicClaude2_1 LeMURModel = "anthropic/claude-2-1"

	// Claude 2.0 is a legacy model that has good complex reasoning. It offers
	// more nuanced responses and improved contextual comprehension.
	//
	// Deprecated: Switch to Claude 3 by February 6th, 2025.
	LeMURModelAnthropicClaude2 LeMURModel = "anthropic/claude-2"

	// Legacy model. The same as [LeMURModelAnthropicClaude2].
	//
	// Deprecated: Switch to Claude 3 by February 6th, 2025.
	LeMURModelDefault LeMURModel = "default"

	// Mistral 7B is an open source model that works well for summarization and
	// answering questions.
	LeMURModelAssemblyAIMistral7B LeMURModel = "assemblyai/mistral-7b"
)

type LeMURQuestion ¶

type LeMURQuestion struct {
	// How you want the answer to be returned. This can be any text. Can't be used with answer_options. Examples: "short sentence", "bullet points"
	AnswerFormat *string `json:"answer_format,omitempty"`

	// What discrete options to return. Useful for precise responses. Can't be used with answer_format. Example: ["Yes", "No"]
	AnswerOptions []string `json:"answer_options,omitempty"`

	// Any context about the transcripts you wish to provide. This can be a string or any object.
	Context interface{} `json:"context,omitempty"`

	// The question you wish to ask. For more complex questions use default model.
	Question *string `json:"question,omitempty"`
}

type LeMURQuestionAnswer ¶

type LeMURQuestionAnswer struct {
	// The answer generated by LeMUR
	Answer *string `json:"answer,omitempty"`

	// The question for LeMUR to answer
	Question *string `json:"question,omitempty"`
}

An answer generated by LeMUR and its question

type LeMURQuestionAnswerParams ¶

type LeMURQuestionAnswerParams struct {
	LeMURBaseParams
	// A list of questions to ask
	Questions []LeMURQuestion `json:"questions,omitempty"`
}

type LeMURQuestionAnswerResponse ¶

type LeMURQuestionAnswerResponse struct {
	LeMURBaseResponse
	// The answers generated by LeMUR and their questions
	Response []LeMURQuestionAnswer `json:"response,omitempty"`
}

type LeMURService ¶

type LeMURService struct {
	// contains filtered or unexported fields
}

LeMURService groups the operations related to LeMUR.

func (*LeMURService) ActionItems ¶

func (s *LeMURService) ActionItems(ctx context.Context, params LeMURActionItemsParams) (LeMURActionItemsResponse, error)

ActionItems returns a set of action items based on a set of transcripts.

https://www.assemblyai.com/docs/Models/lemur#action-items

func (*LeMURService) GetResponseData ¶ added in v1.6.0

func (s *LeMURService) GetResponseData(ctx context.Context, requestID string, response interface{}) error

Retrieve a previously generated LeMUR response.

func (*LeMURService) PurgeRequestData ¶ added in v1.3.0

func (s *LeMURService) PurgeRequestData(ctx context.Context, requestID string) (PurgeLeMURRequestDataResponse, error)

func (*LeMURService) Question ¶

func (s *LeMURService) Question(ctx context.Context, params LeMURQuestionAnswerParams) (LeMURQuestionAnswerResponse, error)

Question returns answers to free-form questions about one or more transcripts.

https://www.assemblyai.com/docs/Models/lemur#question--answer

func (*LeMURService) Summarize ¶

func (s *LeMURService) Summarize(ctx context.Context, params LeMURSummaryParams) (LeMURSummaryResponse, error)

Summarize returns a custom summary of a set of transcripts.

https://www.assemblyai.com/docs/Models/lemur#action-items

func (*LeMURService) Task ¶

func (s *LeMURService) Task(ctx context.Context, params LeMURTaskParams) (LeMURTaskResponse, error)

Task lets you submit a custom prompt to LeMUR.

https://www.assemblyai.com/docs/Models/lemur#task

type LeMURSummaryParams ¶

type LeMURSummaryParams struct {
	LeMURBaseParams
	// How you want the summary to be returned. This can be any text. Examples: "TLDR", "bullet points"
	AnswerFormat *string `json:"answer_format,omitempty"`
}

type LeMURSummaryResponse ¶

type LeMURSummaryResponse struct {
	LeMURBaseResponse
	// The response generated by LeMUR
	Response *string `json:"response,omitempty"`
}

type LeMURTaskParams ¶

type LeMURTaskParams struct {
	// Your text to prompt the model to produce a desired output, including any context you want to pass into the model.
	Prompt *string `json:"prompt,omitempty"`

	LeMURBaseParams
}

type LeMURTaskResponse ¶

type LeMURTaskResponse struct {
	// The response generated by LeMUR.
	Response *string `json:"response,omitempty"`

	LeMURBaseResponse
}

type LeMURUsage ¶ added in v1.6.0

type LeMURUsage struct {
	// The number of input tokens used by the model
	InputTokens *int64 `json:"input_tokens,omitempty"`

	// The number of output tokens generated by the model
	OutputTokens *int64 `json:"output_tokens,omitempty"`
}

The usage numbers for the LeMUR request

type ListTranscriptParams ¶

type ListTranscriptParams struct {
	// Get transcripts that were created after this transcript ID
	AfterID *string `url:"after_id,omitempty"`

	// Get transcripts that were created before this transcript ID
	BeforeID *string `url:"before_id,omitempty"`

	// Only get transcripts created on this date
	CreatedOn *string `url:"created_on,omitempty"`

	// Maximum amount of transcripts to retrieve
	Limit *int64 `url:"limit,omitempty"`

	// Filter by transcript status
	Status TranscriptStatus `url:"status,omitempty"`

	// Only get throttled transcripts, overrides the status filter
	ThrottledOnly *bool `url:"throttled_only,omitempty"`
}

type MessageType ¶

type MessageType string

const (
	MessageTypeSessionBegins      MessageType = "SessionBegins"
	MessageTypeSessionTerminated  MessageType = "SessionTerminated"
	MessageTypePartialTranscript  MessageType = "PartialTranscript"
	MessageTypeFinalTranscript    MessageType = "FinalTranscript"
	MessageTypeSessionInformation MessageType = "SessionInformation"
)

type PIIPolicy ¶

type PIIPolicy string

The type of PII to redact

type PageDetails ¶

type PageDetails struct {
	// The URL used to retrieve the current page of transcripts
	CurrentURL *string `json:"current_url,omitempty"`

	// The number of results this page is limited to
	Limit *int64 `json:"limit,omitempty"`

	// The URL to the next page of transcripts. The next URL always points to a page with newer transcripts.
	NextURL *string `json:"next_url,omitempty"`

	// The URL to the next page of transcripts. The previous URL always points to a page with older transcripts.
	PrevURL *string `json:"prev_url,omitempty"`

	// The actual number of results in the page
	ResultCount *int64 `json:"result_count,omitempty"`
}

Details of the transcript page. Transcripts are sorted from newest to oldest. The previous URL always points to a page with older transcripts.

type ParagraphsResponse ¶

type ParagraphsResponse struct {
	// The duration of the audio file in seconds
	AudioDuration *float64 `json:"audio_duration,omitempty"`

	// The confidence score for the transcript
	Confidence *float64 `json:"confidence,omitempty"`

	// The unique identifier of your transcript
	ID *string `json:"id,omitempty"`

	// An array of paragraphs in the transcript
	Paragraphs []TranscriptParagraph `json:"paragraphs,omitempty"`
}

type PartialTranscript ¶

type PartialTranscript struct {
	RealTimeBaseTranscript

	// Describes the type of message
	MessageType MessageType `json:"message_type"`
}

type PurgeLeMURRequestDataResponse ¶

type PurgeLeMURRequestDataResponse struct {
	// Whether the request data was deleted
	Deleted *bool `json:"deleted,omitempty"`

	// The ID of the deletion request of the LeMUR request
	RequestID *string `json:"request_id,omitempty"`

	// The ID of the LeMUR request to purge the data for
	RequestIDToPurge *string `json:"request_id_to_purge,omitempty"`
}

type RealTimeBaseMessage ¶

type RealTimeBaseMessage struct {
	// Describes the type of the message
	MessageType MessageType `json:"message_type"`
}

type RealTimeBaseTranscript ¶

type RealTimeBaseTranscript struct {
	// End time of audio sample relative to session start, in milliseconds
	AudioEnd int64 `json:"audio_end"`

	// Start time of audio sample relative to session start, in milliseconds
	AudioStart int64 `json:"audio_start"`

	// The confidence score of the entire transcription, between 0 and 1
	Confidence float64 `json:"confidence"`

	// The timestamp for the partial transcript
	Created string `json:"created"`

	// The partial transcript for your audio
	Text string `json:"text"`

	// An array of objects, with the information for each word in the
	// transcription text. Includes the start and end time of the word in
	// milliseconds, the confidence score of the word, and the text, which is
	// the word itself.
	Words []Word `json:"words"`
}

type RealTimeClient ¶

type RealTimeClient struct {
	// contains filtered or unexported fields
}

func NewRealTimeClient ¶

func NewRealTimeClient(apiKey string, handler RealTimeHandler) *RealTimeClient

NewRealTimeClient returns a new instance of RealTimeClient with default values. Use NewRealTimeClientWithOptions for more configuration options.

func NewRealTimeClientWithOptions ¶

func NewRealTimeClientWithOptions(options ...RealTimeClientOption) *RealTimeClient

NewRealTimeClientWithOptions returns a new instance of RealTimeClient.

func (*RealTimeClient) Connect ¶

func (c *RealTimeClient) Connect(ctx context.Context) error

Connects opens a WebSocket connection and waits for a session to begin. Closes the any open WebSocket connection in case of errors.

func (*RealTimeClient) Disconnect ¶

func (c *RealTimeClient) Disconnect(ctx context.Context, waitForSessionTermination bool) error

Disconnect sends the terminate_session message and waits for the server to send a SessionTerminated message before closing the connection.

func (*RealTimeClient) ForceEndUtterance ¶ added in v1.2.0

func (c *RealTimeClient) ForceEndUtterance(ctx context.Context) error

ForceEndUtterance manually ends an utterance.

func (*RealTimeClient) Send ¶

func (c *RealTimeClient) Send(ctx context.Context, samples []byte) error

Send sends audio samples to be transcribed.

Expected audio format:

- 16-bit signed integers - PCM-encoded - Single-channel

func (*RealTimeClient) SetEndUtteranceSilenceThreshold ¶ added in v1.2.0

func (c *RealTimeClient) SetEndUtteranceSilenceThreshold(ctx context.Context, threshold int64) error

SetEndUtteranceSilenceThreshold configures the threshold for how long to wait before ending an utterance. Default is 700ms.

type RealTimeClientOption ¶

type RealTimeClientOption func(*RealTimeClient)

func WithHandler deprecated

func WithHandler(handler RealTimeHandler) RealTimeClientOption

WithHandler configures the client to use the provided handler to handle real-time events.

Deprecated: WithHandler is deprecated. Use WithRealTimeTranscriber instead.

func WithRealTimeAPIKey ¶

func WithRealTimeAPIKey(apiKey string) RealTimeClientOption

WithRealTimeAuthToken configures the client to authenticate using an AssemblyAI API key.

func WithRealTimeAuthToken ¶ added in v1.4.0

func WithRealTimeAuthToken(token string) RealTimeClientOption

WithRealTimeAuthToken configures the client to authenticate using a temporary token generated using [CreateTemporaryToken].

func WithRealTimeBaseURL ¶

func WithRealTimeBaseURL(rawurl string) RealTimeClientOption

WithRealTimeBaseURL sets the API endpoint used by the client. Mainly used for testing.

func WithRealTimeEncoding ¶ added in v1.1.0

func WithRealTimeEncoding(encoding RealTimeEncoding) RealTimeClientOption

WithRealTimeEncoding specifies the encoding of the audio data.

func WithRealTimeSampleRate ¶

func WithRealTimeSampleRate(sampleRate int) RealTimeClientOption

WithRealTimeSampleRate sets the sample rate for the audio data. Default is 16000.

func WithRealTimeTranscriber ¶ added in v1.5.0

func WithRealTimeTranscriber(transcriber *RealTimeTranscriber) RealTimeClientOption

func WithRealTimeWordBoost ¶

func WithRealTimeWordBoost(wordBoost []string) RealTimeClientOption

WithRealTimeWordBoost sets the word boost for the real-time transcription.

type RealTimeEncoding ¶ added in v1.1.0

type RealTimeEncoding string

RealTimeEncoding is the encoding format for the audio data.

const (
	// PCM signed 16-bit little-endian (default)
	RealTimeEncodingPCMS16LE RealTimeEncoding = "pcm_s16le"

	// PCM Mu-law
	RealTimeEncodingPCMMulaw RealTimeEncoding = "pcm_mulaw"
)

type RealTimeError ¶

type RealTimeError struct {
	Error string `json:"error"`
}

type RealTimeHandler ¶

type RealTimeHandler interface {
	SessionBegins(ev SessionBegins)
	SessionTerminated(ev SessionTerminated)
	FinalTranscript(transcript FinalTranscript)
	PartialTranscript(transcript PartialTranscript)
	Error(err error)
}

Deprecated.

type RealTimeService ¶ added in v1.4.0

type RealTimeService struct {
	// contains filtered or unexported fields
}

RealTimeService groups operations related to the real-time transcription.

func (*RealTimeService) CreateTemporaryToken ¶ added in v1.4.0

func (svc *RealTimeService) CreateTemporaryToken(ctx context.Context, expiresIn int64) (*RealtimeTemporaryTokenResponse, error)

CreateTemporaryToken creates a temporary token that can be used to authenticate a real-time client.

type RealTimeTranscriber ¶ added in v1.5.0

type RealTimeTranscriber struct {
	OnSessionBegins      func(event SessionBegins)
	OnSessionTerminated  func(event SessionTerminated)
	OnSessionInformation func(event SessionInformation)
	OnPartialTranscript  func(event PartialTranscript)
	OnFinalTranscript    func(event FinalTranscript)
	OnError              func(err error)
}

type RealtimeTemporaryTokenResponse ¶

type RealtimeTemporaryTokenResponse struct {
	// The temporary authentication token for Streaming Speech-to-Text
	Token *string `json:"token,omitempty"`
}

type RedactPIIAudioQuality ¶

type RedactPIIAudioQuality string

Controls the filetype of the audio created by redact_pii_audio. Currently supports mp3 (default) and wav. See [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more details.

type RedactedAudioNotification ¶ added in v1.7.0

type RedactedAudioNotification struct {
	RedactedAudioResponse
}

The notification when the redacted audio is ready.

type RedactedAudioResponse ¶

type RedactedAudioResponse struct {
	// The URL of the redacted audio file
	RedactedAudioURL *string `json:"redacted_audio_url,omitempty"`

	// The status of the redacted audio
	Status RedactedAudioStatus `json:"status,omitempty"`
}

type RedactedAudioStatus ¶

type RedactedAudioStatus string

The status of the redacted audio

type SentencesResponse ¶

type SentencesResponse struct {
	// The duration of the audio file in seconds
	AudioDuration *float64 `json:"audio_duration,omitempty"`

	// The confidence score for the transcript
	Confidence *float64 `json:"confidence,omitempty"`

	// The unique identifier for the transcript
	ID *string `json:"id,omitempty"`

	// An array of sentences in the transcript
	Sentences []TranscriptSentence `json:"sentences,omitempty"`
}

type Sentiment ¶

type Sentiment string

type SentimentAnalysisResult ¶

type SentimentAnalysisResult struct {
	// The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially.
	Channel *string `json:"channel,omitempty"`

	// The confidence score for the detected sentiment of the sentence, from 0 to 1
	Confidence *float64 `json:"confidence,omitempty"`

	// The ending time, in milliseconds, of the sentence
	End *int64 `json:"end,omitempty"`

	// The detected sentiment for the sentence, one of POSITIVE, NEUTRAL, NEGATIVE
	Sentiment Sentiment `json:"sentiment,omitempty"`

	// The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/models/speaker-diarization) is enabled, else null
	Speaker *string `json:"speaker,omitempty"`

	// The starting time, in milliseconds, of the sentence
	Start *int64 `json:"start,omitempty"`

	// The transcript of the sentence
	Text *string `json:"text,omitempty"`
}

The result of the Sentiment Analysis model

type SessionBegins ¶

type SessionBegins struct {
	RealTimeBaseMessage

	// Timestamp when this session will expire
	ExpiresAt string `json:"expires_at"`

	// Describes the type of the message
	MessageType string `json:"message_type"`

	// Unique identifier for the established session
	SessionID string `json:"session_id"`
}

type SessionInformation ¶ added in v1.5.0

type SessionInformation struct {
	RealTimeBaseMessage

	// The duration of the audio in seconds.
	AudioDurationSeconds float64 `json:"audio_duration_seconds"`
}

type SessionTerminated ¶

type SessionTerminated struct {
	// Describes the type of the message
	MessageType MessageType `json:"message_type"`
}

type SeverityScoreSummary ¶

type SeverityScoreSummary struct {
	High *float64 `json:"high,omitempty"`

	Low *float64 `json:"low,omitempty"`

	Medium *float64 `json:"medium,omitempty"`
}

type SpeechModel ¶ added in v1.2.0

type SpeechModel string

The speech model to use for the transcription.

const (
	// The best model optimized for accuracy.
	SpeechModelBest SpeechModel = "best"

	// A lightweight, lower cost model for a wide range of languages.
	SpeechModelNano SpeechModel = "nano"

	// Conformer-2 is a heavy-duty model optimized for accuracy.
	SpeechModelConformer2 SpeechModel = "conformer-2"
)

type SubstitutionPolicy ¶

type SubstitutionPolicy string

The replacement logic for detected PII, can be "entity_name" or "hash". See [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more details.

type SubtitleFormat ¶

type SubtitleFormat string

Format of the subtitles

type SummaryModel ¶

type SummaryModel string

The model to summarize the transcript

type SummaryType ¶

type SummaryType string

The type of summary

type TerminateSession ¶

type TerminateSession struct {
	// Set to true to end your real-time session forever
	TerminateSession bool `json:"terminate_session"`
}

type Timestamp ¶

type Timestamp struct {
	// The end time in milliseconds
	End *int64 `json:"end,omitempty"`

	// The start time in milliseconds
	Start *int64 `json:"start,omitempty"`
}

Timestamp containing a start and end property in milliseconds

type TopicDetectionModelResult ¶

type TopicDetectionModelResult struct {
	// An array of results for the Topic Detection model
	Results []TopicDetectionResult `json:"results,omitempty"`

	// The status of the Topic Detection model. Either success, or unavailable in the rare case that the model failed.
	Status AudioIntelligenceModelStatus `json:"status,omitempty"`

	// The overall relevance of topic to the entire audio file
	Summary map[string]float64 `json:"summary,omitempty"`
}

The result of the Topic Detection model, if it is enabled. See [Topic Detection](https://www.assemblyai.com/docs/models/topic-detection) for more information.

type TopicDetectionResult ¶

type TopicDetectionResult struct {
	// An array of detected topics in the text
	Labels []struct {
		// The IAB taxonomical label for the label of the detected topic, where > denotes supertopic/subtopic relationship
		Label *string `json:"label,omitempty"`

		// How relevant the detected topic is of a detected topic
		Relevance *float64 `json:"relevance,omitempty"`
	} `json:"labels,omitempty"`

	// The text in the transcript in which a detected topic occurs
	Text *string `json:"text,omitempty"`

	Timestamp Timestamp `json:"timestamp,omitempty"`
}

The result of the topic detection model

type Transcript ¶

type Transcript struct {
	// Deprecated: The acoustic model that was used for the transcript
	AcousticModel *string `json:"acoustic_model,omitempty"`

	// The duration of this transcript object's media file, in seconds
	AudioDuration *float64 `json:"audio_duration,omitempty"`

	// The point in time, in milliseconds, in the file at which the transcription was terminated
	AudioEndAt *int64 `json:"audio_end_at,omitempty"`

	// The point in time, in milliseconds, in the file at which the transcription was started
	AudioStartFrom *int64 `json:"audio_start_from,omitempty"`

	// The URL of the media that was transcribed
	AudioURL *string `json:"audio_url,omitempty"`

	// Whether [Auto Chapters](https://www.assemblyai.com/docs/models/auto-chapters) is enabled, can be true or false
	AutoChapters *bool `json:"auto_chapters,omitempty"`

	// Whether Key Phrases is enabled, either true or false
	AutoHighlights *bool `json:"auto_highlights,omitempty"`

	// An array of results for the Key Phrases model, if it is enabled.
	// See [Key Phrases](https://www.assemblyai.com/docs/models/key-phrases) for more information.
	AutoHighlightsResult AutoHighlightsResult `json:"auto_highlights_result,omitempty"`

	// The word boost parameter value
	BoostParam *string `json:"boost_param,omitempty"`

	// An array of temporally sequential chapters for the audio file
	Chapters []Chapter `json:"chapters,omitempty"`

	// The confidence score for the transcript, between 0.0 (low confidence) and 1.0 (high confidence)
	Confidence *float64 `json:"confidence,omitempty"`

	// Whether [Content Moderation](https://www.assemblyai.com/docs/models/content-moderation) is enabled, can be true or false
	ContentSafety *bool `json:"content_safety,omitempty"`

	// An array of results for the Content Moderation model, if it is enabled.
	// See [Content moderation](https://www.assemblyai.com/docs/models/content-moderation) for more information.
	ContentSafetyLabels ContentSafetyLabelsResult `json:"content_safety_labels,omitempty"`

	// Customize how words are spelled and formatted using to and from values
	CustomSpelling []TranscriptCustomSpelling `json:"custom_spelling,omitempty"`

	// Whether custom topics is enabled, either true or false
	CustomTopics *bool `json:"custom_topics,omitempty"`

	// Transcribe Filler Words, like "umm", in your media file; can be true or false
	Disfluencies *bool `json:"disfluencies,omitempty"`

	// Deprecated: Whether [Dual channel transcription](https://www.assemblyai.com/docs/models/speech-recognition#dual-channel-transcription) was enabled in the transcription request, either true or false
	DualChannel *bool `json:"dual_channel,omitempty"`

	// An array of results for the Entity Detection model, if it is enabled.
	// See [Entity detection](https://www.assemblyai.com/docs/models/entity-detection) for more information.
	Entities []Entity `json:"entities,omitempty"`

	// Whether [Entity Detection](https://www.assemblyai.com/docs/models/entity-detection) is enabled, can be true or false
	EntityDetection *bool `json:"entity_detection,omitempty"`

	// Error message of why the transcript failed
	Error *string `json:"error,omitempty"`

	// Whether [Profanity Filtering](https://www.assemblyai.com/docs/models/speech-recognition#profanity-filtering) is enabled, either true or false
	FilterProfanity *bool `json:"filter_profanity,omitempty"`

	// Whether Text Formatting is enabled, either true or false
	FormatText *bool `json:"format_text,omitempty"`

	// Whether [Topic Detection](https://www.assemblyai.com/docs/models/topic-detection) is enabled, can be true or false
	IABCategories *bool `json:"iab_categories,omitempty"`

	// The result of the Topic Detection model, if it is enabled.
	// See [Topic Detection](https://www.assemblyai.com/docs/models/topic-detection) for more information.
	IABCategoriesResult TopicDetectionModelResult `json:"iab_categories_result,omitempty"`

	// The unique identifier of your transcript
	ID *string `json:"id,omitempty"`

	// The language of your audio file.
	// Possible values are found in [Supported Languages](https://www.assemblyai.com/docs/concepts/supported-languages).
	// The default value is 'en_us'.
	LanguageCode TranscriptLanguageCode `json:"language_code,omitempty"`

	// The confidence score for the detected language, between 0.0 (low confidence) and 1.0 (high confidence)
	LanguageConfidence *float64 `json:"language_confidence,omitempty"`

	// The confidence threshold for the automatically detected language.
	// An error will be returned if the language confidence is below this threshold.
	LanguageConfidenceThreshold *float64 `json:"language_confidence_threshold,omitempty"`

	// Whether [Automatic language detection](https://www.assemblyai.com/docs/models/speech-recognition#automatic-language-detection) is enabled, either true or false
	LanguageDetection *bool `json:"language_detection,omitempty"`

	// Deprecated: The language model that was used for the transcript
	LanguageModel *string `json:"language_model,omitempty"`

	// Whether [Multichannel transcription](https://www.assemblyai.com/docs/models/speech-recognition#multichannel-transcription) was enabled in the transcription request, either true or false
	Multichannel *bool `json:"multichannel,omitempty"`

	// Whether Automatic Punctuation is enabled, either true or false
	Punctuate *bool `json:"punctuate,omitempty"`

	// Whether [PII Redaction](https://www.assemblyai.com/docs/models/pii-redaction) is enabled, either true or false
	RedactPII *bool `json:"redact_pii,omitempty"`

	// Whether a redacted version of the audio file was generated,
	// either true or false. See [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more information.
	RedactPIIAudio *bool `json:"redact_pii_audio,omitempty"`

	// The audio quality of the PII-redacted audio file, if redact_pii_audio is enabled.
	// See [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more information.
	RedactPIIAudioQuality RedactPIIAudioQuality `json:"redact_pii_audio_quality,omitempty"`

	// The list of PII Redaction policies that were enabled, if PII Redaction is enabled.
	// See [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more information.
	RedactPIIPolicies []PIIPolicy `json:"redact_pii_policies,omitempty"`

	// The replacement logic for detected PII, can be "entity_type" or "hash". See [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more details.
	RedactPIISub SubstitutionPolicy `json:"redact_pii_sub,omitempty"`

	// Whether [Sentiment Analysis](https://www.assemblyai.com/docs/models/sentiment-analysis) is enabled, can be true or false
	SentimentAnalysis *bool `json:"sentiment_analysis,omitempty"`

	// An array of results for the Sentiment Analysis model, if it is enabled.
	// See [Sentiment Analysis](https://www.assemblyai.com/docs/models/sentiment-analysis) for more information.
	SentimentAnalysisResults []SentimentAnalysisResult `json:"sentiment_analysis_results,omitempty"`

	// Whether [Speaker diarization](https://www.assemblyai.com/docs/models/speaker-diarization) is enabled, can be true or false
	SpeakerLabels *bool `json:"speaker_labels,omitempty"`

	// Tell the speaker label model how many speakers it should attempt to identify, up to 10. See [Speaker diarization](https://www.assemblyai.com/docs/models/speaker-diarization) for more details.
	SpeakersExpected *int64 `json:"speakers_expected,omitempty"`

	// The speech model used for the transcription. When `null`, the default model is used.
	SpeechModel SpeechModel `json:"speech_model,omitempty"`

	// Defaults to null. Reject audio files that contain less than this fraction of speech.
	// Valid values are in the range [0, 1] inclusive.
	SpeechThreshold *float64 `json:"speech_threshold,omitempty"`

	// Deprecated: Whether speed boost is enabled
	SpeedBoost *bool `json:"speed_boost,omitempty"`

	// The status of your transcript. Possible values are queued, processing, completed, or error.
	Status TranscriptStatus `json:"status,omitempty"`

	// Whether [Summarization](https://www.assemblyai.com/docs/models/summarization) is enabled, either true or false
	Summarization *bool `json:"summarization,omitempty"`

	// The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/models/summarization) is enabled
	Summary *string `json:"summary,omitempty"`

	// The Summarization model used to generate the summary,
	// if [Summarization](https://www.assemblyai.com/docs/models/summarization) is enabled
	SummaryModel *string `json:"summary_model,omitempty"`

	// The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/models/summarization) is enabled
	SummaryType *string `json:"summary_type,omitempty"`

	// The textual transcript of your media file
	Text *string `json:"text,omitempty"`

	// True while a request is throttled and false when a request is no longer throttled
	Throttled *bool `json:"throttled,omitempty"`

	// The list of custom topics provided if custom topics is enabled
	Topics []string `json:"topics,omitempty"`

	// When dual_channel or speaker_labels is enabled, a list of turn-by-turn utterance objects.
	// See [Speaker diarization](https://www.assemblyai.com/docs/models/speaker-diarization) for more information.
	Utterances []TranscriptUtterance `json:"utterances,omitempty"`

	// Whether webhook authentication details were provided
	WebhookAuth *bool `json:"webhook_auth,omitempty"`

	// The header name to be sent with the transcript completed or failed webhook requests
	WebhookAuthHeaderName *string `json:"webhook_auth_header_name,omitempty"`

	// The status code we received from your server when delivering the transcript completed or failed webhook request, if a webhook URL was provided
	WebhookStatusCode *int64 `json:"webhook_status_code,omitempty"`

	// The URL to which we send webhook requests.
	// We sends two different types of webhook requests.
	// One request when a transcript is completed or failed, and one request when the redacted audio is ready if redact_pii_audio is enabled.
	WebhookURL *string `json:"webhook_url,omitempty"`

	// The list of custom vocabulary to boost transcription probability for
	WordBoost []string `json:"word_boost,omitempty"`

	// An array of temporally-sequential word objects, one for each word in the transcript.
	// See [Speech recognition](https://www.assemblyai.com/docs/models/speech-recognition) for more information.
	Words []TranscriptWord `json:"words,omitempty"`
}

A transcript object

type TranscriptBoostParam ¶

type TranscriptBoostParam string

How much to boost specified words

type TranscriptCustomSpelling ¶

type TranscriptCustomSpelling struct {
	// Words or phrases to replace
	From []string `json:"from,omitempty"`

	// Word or phrase to replace with
	To *string `json:"to,omitempty"`
}

Object containing words or phrases to replace, and the word or phrase to replace with

type TranscriptGetSubtitlesOptions ¶

type TranscriptGetSubtitlesOptions struct {
	CharsPerCaption int64 `json:"chars_per_caption"`
}

type TranscriptLanguageCode ¶

type TranscriptLanguageCode string

The language of your audio file. Possible values are found in [Supported Languages](https://www.assemblyai.com/docs/concepts/supported-languages). The default value is 'en_us'.

type TranscriptList ¶

type TranscriptList struct {
	// Details of the transcript page
	PageDetails PageDetails `json:"page_details,omitempty"`

	// An array of transcripts
	Transcripts []TranscriptListItem `json:"transcripts,omitempty"`
}

A list of transcripts. Transcripts are sorted from newest to oldest. The previous URL always points to a page with older transcripts.

type TranscriptListItem ¶

type TranscriptListItem struct {
	// The URL to the audio file
	AudioURL *string `json:"audio_url,omitempty"`

	// The date and time the transcript was completed
	Completed *string `json:"completed,omitempty"`

	// The date and time the transcript was created
	Created *string `json:"created,omitempty"`

	// Error message of why the transcript failed
	Error *string `json:"error,omitempty"`

	// The unique identifier for the transcript
	ID *string `json:"id,omitempty"`

	// The URL to retrieve the transcript
	ResourceURL *string `json:"resource_url,omitempty"`

	// The status of the transcript
	Status TranscriptStatus `json:"status,omitempty"`
}

type TranscriptOptionalParams ¶

type TranscriptOptionalParams struct {
	// The point in time, in milliseconds, to stop transcribing in your media file
	AudioEndAt *int64 `json:"audio_end_at,omitempty"`

	// The point in time, in milliseconds, to begin transcribing in your media file
	AudioStartFrom *int64 `json:"audio_start_from,omitempty"`

	// Enable [Auto Chapters](https://www.assemblyai.com/docs/models/auto-chapters), can be true or false
	AutoChapters *bool `json:"auto_chapters,omitempty"`

	// Enable Key Phrases, either true or false
	AutoHighlights *bool `json:"auto_highlights,omitempty"`

	// How much to boost specified words
	BoostParam TranscriptBoostParam `json:"boost_param,omitempty"`

	// Enable [Content Moderation](https://www.assemblyai.com/docs/models/content-moderation), can be true or false
	ContentSafety *bool `json:"content_safety,omitempty"`

	// The confidence threshold for the Content Moderation model. Values must be between 25 and 100.
	ContentSafetyConfidence *int64 `json:"content_safety_confidence,omitempty"`

	// Customize how words are spelled and formatted using to and from values
	CustomSpelling []TranscriptCustomSpelling `json:"custom_spelling,omitempty"`

	// Enable custom topics, either true or false
	CustomTopics *bool `json:"custom_topics,omitempty"`

	// Transcribe Filler Words, like "umm", in your media file; can be true or false
	Disfluencies *bool `json:"disfluencies,omitempty"`

	// Deprecated: Enable [Dual Channel](https://www.assemblyai.com/docs/models/speech-recognition#dual-channel-transcription) transcription, can be true or false.
	DualChannel *bool `json:"dual_channel,omitempty"`

	// Enable [Entity Detection](https://www.assemblyai.com/docs/models/entity-detection), can be true or false
	EntityDetection *bool `json:"entity_detection,omitempty"`

	// Filter profanity from the transcribed text, can be true or false
	FilterProfanity *bool `json:"filter_profanity,omitempty"`

	// Enable Text Formatting, can be true or false
	FormatText *bool `json:"format_text,omitempty"`

	// Enable [Topic Detection](https://www.assemblyai.com/docs/models/topic-detection), can be true or false
	IABCategories *bool `json:"iab_categories,omitempty"`

	// The language of your audio file. Possible values are found in [Supported Languages](https://www.assemblyai.com/docs/concepts/supported-languages).
	// The default value is 'en_us'.
	LanguageCode TranscriptLanguageCode `json:"language_code,omitempty"`

	// The confidence threshold for the automatically detected language.
	// An error will be returned if the language confidence is below this threshold.
	// Defaults to 0.
	LanguageConfidenceThreshold *float64 `json:"language_confidence_threshold,omitempty"`

	// Enable [Automatic language detection](https://www.assemblyai.com/docs/models/speech-recognition#automatic-language-detection), either true or false.
	LanguageDetection *bool `json:"language_detection,omitempty"`

	// Enable [Multichannel](https://www.assemblyai.com/docs/models/speech-recognition#multichannel-transcription) transcription, can be true or false.
	Multichannel *bool `json:"multichannel,omitempty"`

	// Enable Automatic Punctuation, can be true or false
	Punctuate *bool `json:"punctuate,omitempty"`

	// Redact PII from the transcribed text using the Redact PII model, can be true or false
	RedactPII *bool `json:"redact_pii,omitempty"`

	// Generate a copy of the original media file with spoken PII "beeped" out, can be true or false. See [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more details.
	RedactPIIAudio *bool `json:"redact_pii_audio,omitempty"`

	// Controls the filetype of the audio created by redact_pii_audio. Currently supports mp3 (default) and wav. See [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more details.
	RedactPIIAudioQuality RedactPIIAudioQuality `json:"redact_pii_audio_quality,omitempty"`

	// The list of PII Redaction policies to enable. See [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more details.
	RedactPIIPolicies []PIIPolicy `json:"redact_pii_policies,omitempty"`

	// The replacement logic for detected PII, can be "entity_type" or "hash". See [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more details.
	RedactPIISub SubstitutionPolicy `json:"redact_pii_sub,omitempty"`

	// Enable [Sentiment Analysis](https://www.assemblyai.com/docs/models/sentiment-analysis), can be true or false
	SentimentAnalysis *bool `json:"sentiment_analysis,omitempty"`

	// Enable [Speaker diarization](https://www.assemblyai.com/docs/models/speaker-diarization), can be true or false
	SpeakerLabels *bool `json:"speaker_labels,omitempty"`

	// Tells the speaker label model how many speakers it should attempt to identify, up to 10. See [Speaker diarization](https://www.assemblyai.com/docs/models/speaker-diarization) for more details.
	SpeakersExpected *int64 `json:"speakers_expected,omitempty"`

	// The speech model to use for the transcription. When `null`, the "best" model is used.
	SpeechModel SpeechModel `json:"speech_model,omitempty"`

	// Reject audio files that contain less than this fraction of speech.
	// Valid values are in the range [0, 1] inclusive.
	SpeechThreshold *float64 `json:"speech_threshold,omitempty"`

	// Enable [Summarization](https://www.assemblyai.com/docs/models/summarization), can be true or false
	Summarization *bool `json:"summarization,omitempty"`

	// The model to summarize the transcript
	SummaryModel SummaryModel `json:"summary_model,omitempty"`

	// The type of summary
	SummaryType SummaryType `json:"summary_type,omitempty"`

	// The list of custom topics
	Topics []string `json:"topics,omitempty"`

	// The header name to be sent with the transcript completed or failed webhook requests
	WebhookAuthHeaderName *string `json:"webhook_auth_header_name,omitempty"`

	// The header value to send back with the transcript completed or failed webhook requests for added security
	WebhookAuthHeaderValue *string `json:"webhook_auth_header_value,omitempty"`

	// The URL to which we send webhook requests.
	// We sends two different types of webhook requests.
	// One request when a transcript is completed or failed, and one request when the redacted audio is ready if redact_pii_audio is enabled.
	WebhookURL *string `json:"webhook_url,omitempty"`

	// The list of custom vocabulary to boost transcription probability for
	WordBoost []string `json:"word_boost,omitempty"`
}

The parameters for creating a transcript

type TranscriptParagraph ¶

type TranscriptParagraph struct {
	// The confidence score for the transcript of this paragraph
	Confidence *float64 `json:"confidence,omitempty"`

	// The ending time, in milliseconds, of the paragraph
	End *int64 `json:"end,omitempty"`

	// The starting time, in milliseconds, of the paragraph
	Start *int64 `json:"start,omitempty"`

	// The transcript of the paragraph
	Text *string `json:"text,omitempty"`

	// An array of words in the paragraph
	Words []TranscriptWord `json:"words,omitempty"`
}

type TranscriptParams ¶

type TranscriptParams struct {
	// The URL of the audio or video file to transcribe.
	AudioURL *string `json:"audio_url,omitempty"`

	TranscriptOptionalParams
}

The parameters for creating a transcript

type TranscriptReadyNotification ¶ added in v1.5.1

type TranscriptReadyNotification struct {
	// The status of the transcript. Either completed or error.
	Status TranscriptReadyStatus `json:"status,omitempty"`

	// The ID of the transcript
	TranscriptID *string `json:"transcript_id,omitempty"`
}

The notification when the transcript status is completed or error.

type TranscriptReadyStatus ¶ added in v1.5.1

type TranscriptReadyStatus string

The status of the transcript. Either completed or error.

type TranscriptSentence ¶

type TranscriptSentence struct {
	// The channel of the sentence. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially.
	Channel *string `json:"channel,omitempty"`

	// The confidence score for the transcript of this sentence
	Confidence *float64 `json:"confidence,omitempty"`

	// The ending time, in milliseconds, for the sentence
	End *int64 `json:"end,omitempty"`

	// The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/models/speaker-diarization) is enabled, else null
	Speaker *string `json:"speaker,omitempty"`

	// The starting time, in milliseconds, for the sentence
	Start *int64 `json:"start,omitempty"`

	// The transcript of the sentence
	Text *string `json:"text,omitempty"`

	// An array of words in the sentence
	Words []TranscriptWord `json:"words,omitempty"`
}

type TranscriptService ¶

type TranscriptService struct {
	// contains filtered or unexported fields
}

TranscriptService groups the operations related to transcribing audio.

func (*TranscriptService) Delete ¶

func (s *TranscriptService) Delete(ctx context.Context, transcriptID string) (Transcript, error)

Delete permanently deletes a transcript.

https://www.assemblyai.com/docs/API%20reference/listing_and_deleting#deleting-transcripts-from-the-api

func (*TranscriptService) Get ¶

func (s *TranscriptService) Get(ctx context.Context, transcriptID string) (Transcript, error)

Get returns a transcript.

https://www.assemblyai.com/docs/API%20reference/transcript

func (*TranscriptService) GetParagraphs ¶

func (s *TranscriptService) GetParagraphs(ctx context.Context, transcriptID string) (ParagraphsResponse, error)

GetParagraphs returns the paragraphs for a transcript.

func (*TranscriptService) GetRedactedAudio ¶

func (s *TranscriptService) GetRedactedAudio(ctx context.Context, transcriptID string) (RedactedAudioResponse, error)

GetRedactedAudio returns the redacted audio for a transcript.

https://www.assemblyai.com/docs/Models/pii_redaction#create-a-redacted-audio-file

func (*TranscriptService) GetSentences ¶

func (s *TranscriptService) GetSentences(ctx context.Context, transcriptID string) (SentencesResponse, error)

GetSentences returns the sentences for a transcript.

func (*TranscriptService) GetSubtitles ¶

func (s *TranscriptService) GetSubtitles(ctx context.Context, transcriptID string, format SubtitleFormat, opts *TranscriptGetSubtitlesOptions) ([]byte, error)

func (*TranscriptService) List ¶

func (s *TranscriptService) List(ctx context.Context, options ListTranscriptParams) (TranscriptList, error)

List returns a collection of transcripts based on a filter.

https://www.assemblyai.com/docs/API%20reference/listing_and_deleting#listing-historical-transcripts

func (*TranscriptService) SubmitFromReader ¶

func (s *TranscriptService) SubmitFromReader(ctx context.Context, reader io.Reader, params *TranscriptOptionalParams) (Transcript, error)

SubmitFromReader submits audio for transcription without waiting for it to finish.

func (*TranscriptService) SubmitFromURL ¶

func (s *TranscriptService) SubmitFromURL(ctx context.Context, audioURL string, opts *TranscriptOptionalParams) (Transcript, error)

SubmitFromURL submits an audio file for transcription without waiting for it to finish.

https://www.assemblyai.com/docs/API%20reference/transcript#create-a-transcript

func (*TranscriptService) TranscribeFromReader ¶

func (s *TranscriptService) TranscribeFromReader(ctx context.Context, reader io.Reader, opts *TranscriptOptionalParams) (Transcript, error)

TranscribeFromReader submits audio for transcription and waits for it to finish.

func (*TranscriptService) TranscribeFromURL ¶

func (s *TranscriptService) TranscribeFromURL(ctx context.Context, audioURL string, opts *TranscriptOptionalParams) (Transcript, error)

TranscribeFromURL submits a URL to an audio file for transcription and waits for it to finish.

func (*TranscriptService) Wait ¶

func (s *TranscriptService) Wait(ctx context.Context, transcriptID string) (Transcript, error)

Wait returns once a transcript has completed or failed.

func (*TranscriptService) WordSearch ¶ added in v1.3.0

func (s *TranscriptService) WordSearch(ctx context.Context, transcriptID string, words []string) (WordSearchResponse, error)

WordSearch searches a transcript for any occurrences of the provided words.

type TranscriptStatus ¶

type TranscriptStatus string

The status of your transcript. Possible values are queued, processing, completed, or error.

const (
	TranscriptStatusQueued     TranscriptStatus = "queued"
	TranscriptStatusProcessing TranscriptStatus = "processing"
	TranscriptStatusCompleted  TranscriptStatus = "completed"
	TranscriptStatusError      TranscriptStatus = "error"
)

type TranscriptUtterance ¶

type TranscriptUtterance struct {
	// The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially.
	Channel *string `json:"channel,omitempty"`

	// The confidence score for the transcript of this utterance
	Confidence *float64 `json:"confidence,omitempty"`

	// The ending time, in milliseconds, of the utterance in the audio file
	End *int64 `json:"end,omitempty"`

	// The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.
	Speaker *string `json:"speaker,omitempty"`

	// The starting time, in milliseconds, of the utterance in the audio file
	Start *int64 `json:"start,omitempty"`

	// The text for this utterance
	Text *string `json:"text,omitempty"`

	// The words in the utterance.
	Words []TranscriptWord `json:"words,omitempty"`
}

type TranscriptWebhookNotification ¶ added in v1.7.0

type TranscriptWebhookNotification struct{}

The notifications sent to the webhook URL.

type TranscriptWord ¶

type TranscriptWord struct {
	// The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially.
	Channel *string `json:"channel,omitempty"`

	// The confidence score for the transcript of this word
	Confidence *float64 `json:"confidence,omitempty"`

	// The ending time, in milliseconds, for the word
	End *int64 `json:"end,omitempty"`

	// The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/models/speaker-diarization) is enabled, else null
	Speaker *string `json:"speaker,omitempty"`

	// The starting time, in milliseconds, for the word
	Start *int64 `json:"start,omitempty"`

	// The text of the word
	Text *string `json:"text,omitempty"`
}

type UploadedFile ¶

type UploadedFile struct {
	// A URL that points to your audio file, accessible only by AssemblyAI's servers
	UploadURL *string `json:"upload_url,omitempty"`
}

type Word ¶

type Word struct {
	// Confidence score of the word
	Confidence float64 `json:"confidence"`

	// End time of the word in milliseconds
	End int64 `json:"end"`

	// Start time of the word in milliseconds
	Start int64 `json:"start"`

	// The word itself
	Text string `json:"text"`
}

type WordSearchMatch ¶

type WordSearchMatch struct {
	// The total amount of times the word is in the transcript
	Count *int64 `json:"count,omitempty"`

	// An array of all index locations for that word within the `words` array of the completed transcript
	Indexes []int64 `json:"indexes,omitempty"`

	// The matched word
	Text *string `json:"text,omitempty"`

	// An array of timestamps
	Timestamps []WordSearchTimestamp `json:"timestamps,omitempty"`
}

type WordSearchResponse ¶

type WordSearchResponse struct {
	// The ID of the transcript
	ID *string `json:"id,omitempty"`

	// The matches of the search
	Matches []WordSearchMatch `json:"matches,omitempty"`

	// The total count of all matched instances. For e.g., word 1 matched 2 times, and word 2 matched 3 times, `total_count` will equal 5.
	TotalCount *int64 `json:"total_count,omitempty"`
}

type WordSearchTimestamp ¶

type WordSearchTimestamp []int64

An array of timestamps structured as [`start_time`, `end_time`] in milliseconds

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
examples
realtime Module
upload-with-progress Module

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL