openapi: 3.1.0
info:
  title: CreatorNode Postproduction API - Describe Scenes
  description: >
    Generate per-scene visual descriptions and optional narration-aware
    interpretations from ordered scene images and optional full-story narration
    context.


    Use this endpoint to prepare scene-level text input for timeline and
    postproduction workflows.

    Responses may also include best-effort anchorText and startCueText hints
    when narration context supports them.
  version: 1.0.0
  contact:
    name: CreatorNode Support
    url: https://creatornode.io/support
  license:
    name: Proprietary
    url: https://creatornode.io/legal
servers:
  - url: https://api.creatornode.io/postproduction
    description: Production
tags:
  - name: Postproduction
    description: Postproduction processing endpoints
paths:
  /v1/describe-scenes:
    post:
      operationId: describeScenes
      tags:
        - Postproduction
      summary: Generate per-scene descriptions from images and optional narration text
      description: Generate one visual description per uploaded image and optional
        narration-aware interpretation. Requests are multipart-only. Responses
        stay index-aligned and may also include anchorText and startCueText for
        downstream alignment. Premium and Enterprise callers can mark up to 5
        scenes with metadata.sceneOptions[].extraDetail=true for deeper analysis
        at +1 credit each.
      security:
        - ApiKeyAuth: []
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              $ref: "#/components/schemas/DescribeScenesMultipartRequest"
            examples:
              basicThreeScenes:
                summary: Three ordered scene images with narration metadata
                value:
                  metadata: '{"narrationText":"A quiet morning in the city, then a crowded market,
                    and finally a sunset
                    skyline.","sceneIds":["scene-1","scene-2","scene-3"],"sceneOptions":[{},
                    {"extraDetail":true},
                    {}],"hints":{"languageCode":"en","style":"normal"}}'
                  images:
                    - (bridge-shot.jpg)
                    - (market-shot.jpg)
                    - (sunset-shot.jpg)
      responses:
        "200":
          description: Scene descriptions generated successfully
          headers:
            X-Request-Id:
              description: Unique request identifier
              schema:
                type: string
                example: req_abc123
            X-Credits-Used:
              description: Credits consumed by this request (present only for paid tiers)
              schema:
                type: integer
                example: 5
            X-Credits-Remaining:
              description: Remaining credits on the API key (present only for paid tiers with
                prepaid credits)
              schema:
                type: integer
                example: 127
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/DescribeScenesSuccessResponse"
              examples:
                baselineCompatible:
                  summary: Baseline-compatible response without anchorText on scenes
                  value:
                    success: true
                    data:
                      narrationHash: 7dc8f3ebf53f8dd8af3fd4f2ab5e4f0f7bfa5ecf0f6fae17c3f1152e9f8c8d6b
                      characters: []
                      scenes:
                        - index: 0
                          id: scene-1
                          imageName: bridge-shot.jpg
                          detectedVisibleCharactersIds: []
                          description: Cyclist crossing a bridge in soft morning light.
                          interpretation: This establishes the morning ride before the route enters the
                            city center.
                        - index: 1
                          id: scene-2
                          imageName: market-shot.jpg
                          detectedVisibleCharactersIds: []
                          description: Busy market street with people moving between stalls.
                    meta:
                      requestId: req_abc123
                      processingTimeMs: 412
                      imageCount: 2
                withCueHints:
                  summary: Response with additive anchorText and startCueText for downstream
                    alignment
                  value:
                    success: true
                    data:
                      narrationHash: 7dc8f3ebf53f8dd8af3fd4f2ab5e4f0f7bfa5ecf0f6fae17c3f1152e9f8c8d6b
                      characters: []
                      scenes:
                        - index: 0
                          id: scene-1
                          imageName: bridge-shot.jpg
                          detectedVisibleCharactersIds: []
                          description: Cyclist crossing a bridge in soft morning light.
                          interpretation: This establishes the morning ride before the route enters the
                            city center.
                          startCueText: As the cyclist starts across the bridge
                          anchorText: The cyclist heads across the bridge at daybreak.
                        - index: 1
                          id: scene-2
                          imageName: market-shot.jpg
                          detectedVisibleCharactersIds: []
                          description: Busy market street with people moving between stalls.
                          interpretation: This marks the busier middle beat as the rider reaches the
                            market district.
                          startCueText: The route pushes into the market
                          anchorText: The story moves into the crowded market streets.
                        - index: 2
                          id: scene-3
                          imageName: sunset-shot.jpg
                          detectedVisibleCharactersIds: []
                          description: City skyline glowing in warm sunset colors.
                          interpretation: This closes the sequence with the final city-wide sunset payoff.
                          startCueText: The city settles into sunset
                          anchorText: The city settles into a warm sunset skyline.
                    meta:
                      requestId: req_abc123
                      processingTimeMs: 412
                      imageCount: 3
        "400":
          description: >
            Request validation or safety rejection. Common codes:

            `VALIDATION_ERROR`, `TOO_MANY_FILES`, `UPLOAD_SIZE_EXCEEDED`,

            `IMAGE_UNSUPPORTED_FORMAT`, `UNSAFE_PROMPT`, `UNSAFE_IMAGE`.

            Free tier rejects `metadata.sceneOptions[].extraDetail`, and paid
            tiers are limited to 5 such scenes per request.
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorResponse"
              examples:
                tooManyFiles:
                  summary: Tier image count exceeded
                  value:
                    success: false
                    error:
                      code: TOO_MANY_FILES
                      message: "Too many files: maximum 5 images for tier free"
                      details:
                        fileCount: 7
                        maxFiles: 5
                    meta:
                      requestId: req_abc123
                unsafePrompt:
                  summary: narrationText rejected by safety checks
                  value:
                    success: false
                    error:
                      code: UNSAFE_PROMPT
                      message: Narration text rejected by safety checks
                    meta:
                      requestId: req_abc123
                unsafeImage:
                  summary: One uploaded image rejected by safety checks
                  value:
                    success: false
                    error:
                      code: UNSAFE_IMAGE
                      message: One or more images were rejected by safety checks
                      details:
                        flaggedIndex: 1
                    meta:
                      requestId: req_abc123
        "401":
          description: Unauthorized - invalid or missing API key
        "429":
          description: Rate limited - too many requests
        "500":
          description: Unexpected scene description processing error
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorResponse"
              example:
                success: false
                error:
                  code: AI_DESCRIPTION_FAILED
                  message: Scene description generation failed
                meta:
                  requestId: req_abc123
        "503":
          description: Upstream service temporarily unavailable
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorResponse"
              examples:
                safetyUnavailable:
                  summary: Safety service unavailable
                  value:
                    success: false
                    error:
                      code: SAFETY_CHECK_UNAVAILABLE
                      message: Safety check failed
                    meta:
                      requestId: req_abc123
                visionRateLimited:
                  summary: Image analysis service temporarily rate limited
                  value:
                    success: false
                    error:
                      code: VISION_RATE_LIMITED
                      message: Image analysis service is temporarily rate limited
                    meta:
                      requestId: req_abc123
        "504":
          description: Upstream timeout
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorResponse"
              examples:
                safetyTimeout:
                  summary: Safety check timed out
                  value:
                    success: false
                    error:
                      code: SAFETY_CHECK_TIMEOUT
                      message: Safety check failed
                    meta:
                      requestId: req_abc123
                visionTimeout:
                  summary: Image analysis timed out
                  value:
                    success: false
                    error:
                      code: VISION_TIMEOUT
                      message: Image analysis timed out
                    meta:
                      requestId: req_abc123
components:
  securitySchemes:
    ApiKeyAuth:
      type: apiKey
      in: header
      name: X-API-Key
      description: APIM subscription key for authenticated access. Without a key,
        requests use free tier limits.
  schemas:
    Recommendation:
      type: object
      required:
        - type
        - title
        - message
      properties:
        type:
          type: string
          enum:
            - upgrade
            - top_up
            - feature
            - tip
            - warning
            - fix
        title:
          type: string
        message:
          type: string
        action:
          type: object
          required:
            - label
            - url
          properties:
            label:
              type: string
            url:
              type: string
              format: uri
        priority:
          type: string
          enum:
            - low
            - medium
            - high
          default: low
    ErrorResponse:
      type: object
      required:
        - success
        - error
      properties:
        success:
          type: boolean
          enum:
            - false
        error:
          type: object
          required:
            - code
            - message
          properties:
            code:
              type: string
              description: Endpoint-specific error code, for example VALIDATION_ERROR,
                UPLOAD_SIZE_EXCEEDED, AUDIO_PROBE_FAILED, CANNOT_ALIGN, or
                PROCESSING_TIMEOUT.
            message:
              type: string
            details:
              type: object
              additionalProperties: true
        meta:
          type: object
          properties:
            requestId:
              type: string
        recommendations:
          type: array
          items:
            $ref: "#/components/schemas/Recommendation"
    DescribeScenesHints:
      type: object
      properties:
        languageCode:
          type: string
          minLength: 1
          maxLength: 20
          description: BCP-47-like language hint used for output language preference.
          example: en
        style:
          type: string
          enum:
            - short
            - normal
            - detailed
          default: normal
          description: >
            short = label-like output, normal = concise informative output,
            detailed = longer descriptions when enabled by the current
            plan/configuration.
    DescribeScenesMetadata:
      type: object
      properties:
        narrationText:
          type: string
          minLength: 1
          description: Optional full-story narration context. Not returned in responses.
            Length limits depend on the current plan and configuration.
          example: A quiet morning in the city, then a crowded market, and finally a
            sunset skyline.
        sceneIds:
          type: array
          description: Optional stable client IDs for scenes (must match images[] length
            when provided).
          items:
            type: string
            minLength: 1
            maxLength: 100
          example:
            - scene-1
            - scene-2
            - scene-3
        sceneOptions:
          type: array
          description: Optional per-scene options aligned by index to images[]. If
            provided, length must match images[]. Premium and Enterprise callers
            may set extraDetail=true on up to 5 scenes. Free tier rejects it.
          items:
            $ref: "#/components/schemas/DescribeSceneOption"
          example:
            - {}
            - extraDetail: true
            - {}
        hints:
          $ref: "#/components/schemas/DescribeScenesHints"
      example:
        narrationText: A quiet morning in the city, then a crowded market, and finally a
          sunset skyline.
        sceneIds:
          - scene-1
          - scene-2
          - scene-3
        sceneOptions:
          - {}
          - extraDetail: true
          - {}
        hints:
          languageCode: en
          style: normal
    DescribeScenesMultipartRequest:
      type: object
      description: Multipart request with JSON metadata and ordered scene images.
        Image-count and narration-length limits depend on the current plan and
        configuration.
      required:
        - metadata
        - images
      properties:
        metadata:
          type: string
          description: JSON string containing DescribeScenesMetadata.
          example: '{"narrationText":"A quiet morning in the city, then a crowded market,
            and finally a sunset
            skyline.","sceneIds":["scene-1","scene-2","scene-3"],"sceneOptions":[{},
            {"extraDetail":true},
            {}],"hints":{"languageCode":"en","style":"normal"}}'
        images:
          type: array
          description: |
            Ordered scene images.
            Supported formats: PNG, JPEG, WEBP.
          items:
            type: string
            format: binary
          minItems: 1
          maxItems: 100
    SceneDescription:
      type: object
      required:
        - index
        - imageName
        - detectedVisibleCharactersIds
        - description
      properties:
        index:
          type: integer
          minimum: 0
          description: Zero-based scene index in request order.
          example: 0
        id:
          type: string
          description: Echoed value from sceneIds[] when provided.
          example: scene-1
        imageName:
          type: string
          description: Uploaded image filename.
          example: bridge-shot.jpg
        detectedVisibleCharactersIds:
          type: array
          description: Stable ids of the visible characters assigned to this scene.
          items:
            type: string
          example:
            - cyclist
        description:
          type: string
          description: Human-facing visual summary of what is visibly shown in the image.
          example: Cyclist crossing a bridge in soft morning light.
        interpretation:
          type: string
          description: Optional narration-aware explanation of what the visible moment
            means in story context.
          example: This establishes the morning ride before the story moves deeper into
            the city.
        anchorText:
          type: string
          description: Optional short script-facing scene-to-script mapping hint for
            downstream narration alignment.
          example: The cyclist heads across the bridge at daybreak.
        startCueText:
          type: string
          description: Optional short scene-entry cue focused on the earliest narration
            words after which the scene should already count as active.
          example: As the cyclist starts across the bridge
    DescribeScenesSuccessResponse:
      type: object
      required:
        - success
        - data
        - meta
      properties:
        success:
          type: boolean
          enum:
            - true
        data:
          type: object
          required:
            - narrationHash
            - characters
            - scenes
          properties:
            narrationHash:
              type: string
              description: SHA-256 hash of narrationText for client-side correlation.
              example: 7dc8f3ebf53f8dd8af3fd4f2ab5e4f0f7bfa5ecf0f6fae17c3f1152e9f8c8d6b
            characters:
              type: array
              description: Stable story character roster resolved from narration and image
                evidence.
              items:
                $ref: "#/components/schemas/StoryCharacter"
            scenes:
              type: array
              items:
                $ref: "#/components/schemas/SceneDescription"
        meta:
          type: object
          required:
            - requestId
            - processingTimeMs
            - imageCount
          properties:
            requestId:
              type: string
              example: req_abc123
            processingTimeMs:
              type: integer
              example: 412
            imageCount:
              type: integer
              example: 3
            cached:
              type: boolean
              description: Whether response was served from cache.
        recommendations:
          type: array
          items:
            $ref: "#/components/schemas/Recommendation"
    DescribeScenesDemoRequest:
      type: object
      description: >
        Demo endpoint accepts JSON metadata only.

        This mirrors the metadata shape used in the real multipart request.

        narrationText is optional in demo mode; the demo only validates sceneIds
        shape.
      properties:
        narrationText:
          type: string
          minLength: 1
        sceneIds:
          type: array
          items:
            type: string
          description: Optional; if provided in demo, use exactly 3 items.
        hints:
          $ref: "#/components/schemas/DescribeScenesHints"
      example:
        narrationText: A quiet morning in the city, then a crowded market, and finally a
          sunset skyline.
        sceneIds:
          - scene-1
          - scene-2
          - scene-3
        hints:
          languageCode: en
          style: normal
    DescribeScenesDemoResponse:
      allOf:
        - $ref: "#/components/schemas/DescribeScenesSuccessResponse"
        - type: object
          required:
            - demoMode
            - warning
          properties:
            demoMode:
              type: boolean
              enum:
                - true
            warning:
              type: string
              description: Explains that demo response is deterministic sample data.
    StoryCharacter:
      type: object
      required:
        - id
        - name
        - appearance
      properties:
        id:
          type: string
          description: Stable machine-friendly id used for scene character references.
          example: cyclist
        name:
          type: string
          description: Preferred story-facing name or stable visual label for the character.
          example: cyclist
        appearance:
          type: string
          description: Short image-grounded appearance summary used to distinguish similar
            characters.
          example: person riding a bicycle wearing a dark jacket
    DescribeSceneOption:
      type: object
      properties:
        extraDetail:
          type: boolean
          description: Paid-tier-only per-scene override. When true, this scene is
            re-analyzed with a higher-detail multimodal model instead of using
            only the default Vision result.
