Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: nbformat #10

Merged
merged 1 commit into from
Aug 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions documentloader/notebook.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"context"
"fmt"
"io"
"strings"

"github.com/hupe1980/golc/integration/nbformat"
"github.com/hupe1980/golc/schema"
Expand Down Expand Up @@ -75,7 +74,7 @@ func (l *Notebook) Load(ctx context.Context) ([]schema.Document, error) {
pageContent += fmt.Sprintf("'%s' cell: '%s'\n, gives error '%s', with description '%s'\n\n", c.CellType, c.Source, eName, eValue)
}
} else if c.Outputs[0].OutputType == "stream" {
output := strings.Join(c.Outputs[0].Text, "")
output := c.Outputs[0].Text
minOutput := len(output)

if minOutput > int(l.opts.MaxOutputLength) {
Expand Down
102 changes: 93 additions & 9 deletions integration/nbformat/nbformat.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,111 @@ package nbformat
import (
"encoding/json"
"io"
"strings"
)

// Notebook represents a Jupyter Notebook containing multiple cells.
type Notebook struct {
Cells []Cell `json:"cells"`
Metadata Metadata `json:"metadata"`
Nbformat int `json:"nbformat"`
NbformatMinor int `json:"nbformat_minor"`
Cells []Cell `json:"cells"`
}

// Metadata represents the metadata of a Jupyter Notebook.
type Metadata struct {
KernelSpec KernelSpec `json:"kernelspec"`
LanguageInfo LanguageInfo `json:"language_info"`
}

// KernelSpec represents the kernel specification in the metadata.
type KernelSpec struct {
Name string `json:"name"`
DisplayName string `json:"display_name"`
}

// LanguageInfo represents the language information in the metadata.
type LanguageInfo struct {
Name string `json:"name"`
Version string `json:"version"`
}

// Cell represents a single cell within a Jupyter Notebook.
type Cell struct {
CellType string `json:"cell_type"`
Source string `json:"source"`
Outputs []Output `json:"outputs"`
CellType string `json:"cell_type"`
Source string `json:"source"` // Could be []string, but we always convert it to a single string
Metadata map[string]interface{} `json:"metadata"`
Outputs []Output `json:"outputs,omitempty"`
}

// UnmarshalJSON custom unmarshals a Cell to ensure Source is always a single string.
func (c *Cell) UnmarshalJSON(data []byte) error {
type Alias Cell

aux := &struct {
Source interface{} `json:"source"`
*Alias
}{
Alias: (*Alias)(c),
}

if err := json.Unmarshal(data, &aux); err != nil {
return err
}

switch v := aux.Source.(type) {
case string:
c.Source = v
case []interface{}:
var lines []string
for _, line := range v {
lines = append(lines, line.(string))
}

c.Source = strings.Join(lines, "\n")
}

return nil
}

// Output represents the output of a cell in a Jupyter Notebook.
type Output struct {
ErrorName string `json:"ename"`
ErrorValue string `json:"evalue"`
Traceback []string `json:"traceback"`
OutputType string `json:"output_type"`
Text []string `json:"text"`
OutputType string `json:"output_type"`
Text string `json:"text,omitempty"` // Could be []string, but we always convert it to a single string
Data map[string]interface{} `json:"data,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
ErrorName string `json:"ename,omitempty"`
ErrorValue string `json:"evalue,omitempty"`
Traceback []string `json:"traceback,omitempty"`
}

// UnmarshalJSON custom unmarshals an Output to ensure Text is always a single string.
func (o *Output) UnmarshalJSON(data []byte) error {
type Alias Output

aux := &struct {
Text interface{} `json:"text,omitempty"`
*Alias
}{
Alias: (*Alias)(o),
}
if err := json.Unmarshal(data, &aux); err != nil {
return err
}

switch v := aux.Text.(type) {
case string:
o.Text = v
case []interface{}:
var lines []string
for _, line := range v {
lines = append(lines, line.(string))
}

o.Text = strings.Join(lines, "\n")
}

return nil
}

// ReadNBFormat reads and parses a Jupyter Notebook from the given io.Reader.
Expand Down
Loading