Skip to content

Commit

Permalink
Add purl.oclc.org namespace support (unidoc#265)
Browse files Browse the repository at this point in the history
* Regenerate code to handle purl.oclc.org docs correctly
* Make purl.oclc.org docs be opened just as the openxml ones
* Modify opening routine to change strict docs to non-strict
- `CustomXMLType` and `CustomXMLTypeStrict` were added to properly replace
these in strict docs
- `Decode` of `zippkg` was modified to replace the namespace in
relationships
- `TestOpenStrict` was modified to open the strict file, save it as
non-strict, reopen non-strict and validate its correctness
  • Loading branch information
nkryuchkov authored and gunnsth committed Apr 29, 2019
1 parent 5db1a51 commit e65ced8
Show file tree
Hide file tree
Showing 1,049 changed files with 12,407 additions and 6,157 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,5 @@ coverage*
**/.coverprofile
gover.coverprofile
spreadsheet/formula/y.output

.idea
28 changes: 14 additions & 14 deletions document/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,7 @@ func (d *Document) onNewRelationship(decMap *zippkg.DecodeMap, target, typ strin
dt := gooxml.DocTypeDocument

switch typ {
case gooxml.OfficeDocumentType:
case gooxml.OfficeDocumentType, gooxml.OfficeDocumentTypeStrict:
d.x = wml.NewDocument()
decMap.AddTarget(target, d.x, typ, 0)
// look for the document relationships file as well
Expand All @@ -694,11 +694,11 @@ func (d *Document) onNewRelationship(decMap *zippkg.DecodeMap, target, typ strin
decMap.AddTarget(target, d.CoreProperties.X(), typ, 0)
rel.TargetAttr = gooxml.RelativeFilename(dt, src.Typ, typ, 0)

case gooxml.ExtendedPropertiesType:
case gooxml.ExtendedPropertiesType, gooxml.ExtendedPropertiesTypeStrict:
decMap.AddTarget(target, d.AppProperties.X(), typ, 0)
rel.TargetAttr = gooxml.RelativeFilename(dt, src.Typ, typ, 0)

case gooxml.ThumbnailType:
case gooxml.ThumbnailType, gooxml.ThumbnailTypeStrict:
// read our thumbnail
for i, f := range files {
if f == nil {
Expand All @@ -718,21 +718,21 @@ func (d *Document) onNewRelationship(decMap *zippkg.DecodeMap, target, typ strin
}
}

case gooxml.SettingsType:
case gooxml.SettingsType, gooxml.SettingsTypeStrict:
decMap.AddTarget(target, d.Settings.X(), typ, 0)
rel.TargetAttr = gooxml.RelativeFilename(dt, src.Typ, typ, 0)

case gooxml.NumberingType:
case gooxml.NumberingType, gooxml.NumberingTypeStrict:
d.Numbering = NewNumbering()
decMap.AddTarget(target, d.Numbering.X(), typ, 0)
rel.TargetAttr = gooxml.RelativeFilename(dt, src.Typ, typ, 0)

case gooxml.StylesType:
case gooxml.StylesType, gooxml.StylesTypeStrict:
d.Styles.Clear()
decMap.AddTarget(target, d.Styles.X(), typ, 0)
rel.TargetAttr = gooxml.RelativeFilename(dt, src.Typ, typ, 0)

case gooxml.HeaderType:
case gooxml.HeaderType, gooxml.HeaderTypeStrict:
hdr := wml.NewHdr()
decMap.AddTarget(target, hdr, typ, uint32(len(d.headers)))
d.headers = append(d.headers, hdr)
Expand All @@ -743,7 +743,7 @@ func (d *Document) onNewRelationship(decMap *zippkg.DecodeMap, target, typ strin
decMap.AddTarget(zippkg.RelationsPathFor(target), hdrRel.X(), typ, 0)
d.hdrRels = append(d.hdrRels, hdrRel)

case gooxml.FooterType:
case gooxml.FooterType, gooxml.FooterTypeStrict:
ftr := wml.NewFtr()
decMap.AddTarget(target, ftr, typ, uint32(len(d.footers)))
d.footers = append(d.footers, ftr)
Expand All @@ -754,33 +754,33 @@ func (d *Document) onNewRelationship(decMap *zippkg.DecodeMap, target, typ strin
decMap.AddTarget(zippkg.RelationsPathFor(target), ftrRel.X(), typ, 0)
d.ftrRels = append(d.ftrRels, ftrRel)

case gooxml.ThemeType:
case gooxml.ThemeType, gooxml.ThemeTypeStrict:
thm := dml.NewTheme()
decMap.AddTarget(target, thm, typ, uint32(len(d.themes)))
d.themes = append(d.themes, thm)
rel.TargetAttr = gooxml.RelativeFilename(dt, src.Typ, typ, len(d.themes))

case gooxml.WebSettingsType:
case gooxml.WebSettingsType, gooxml.WebSettingsTypeStrict:
d.webSettings = wml.NewWebSettings()
decMap.AddTarget(target, d.webSettings, typ, 0)
rel.TargetAttr = gooxml.RelativeFilename(dt, src.Typ, typ, 0)

case gooxml.FontTableType:
case gooxml.FontTableType, gooxml.FontTableTypeStrict:
d.fontTable = wml.NewFonts()
decMap.AddTarget(target, d.fontTable, typ, 0)
rel.TargetAttr = gooxml.RelativeFilename(dt, src.Typ, typ, 0)

case gooxml.EndNotesType:
case gooxml.EndNotesType, gooxml.EndNotesTypeStrict:
d.endNotes = wml.NewEndnotes()
decMap.AddTarget(target, d.endNotes, typ, 0)
rel.TargetAttr = gooxml.RelativeFilename(dt, src.Typ, typ, 0)

case gooxml.FootNotesType:
case gooxml.FootNotesType, gooxml.FootNotesTypeStrict:
d.footNotes = wml.NewFootnotes()
decMap.AddTarget(target, d.footNotes, typ, 0)
rel.TargetAttr = gooxml.RelativeFilename(dt, src.Typ, typ, 0)

case gooxml.ImageType:
case gooxml.ImageType, gooxml.ImageTypeStrict:
var iref common.ImageRef
for i, f := range files {
if f == nil {
Expand Down
31 changes: 31 additions & 0 deletions document/document_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ package document_test

import (
"bytes"
"io/ioutil"
"os"
"testing"

"baliance.com/gooxml/common"
Expand Down Expand Up @@ -44,6 +46,35 @@ func TestOpen(t *testing.T) {
testhelper.CompareZip(t, "simple-1.docx", got.Bytes(), true)
}

func TestOpenStrict(t *testing.T) {
strict, err := document.Open("testdata/strict.docx")
if err != nil {
t.Errorf("error opening document: %s", err)
}

gotStrict := bytes.Buffer{}
if err := strict.Validate(); err != nil {
t.Errorf("created an invalid document: %s", err)
}
strict.Save(&gotStrict)
ioutil.WriteFile("testdata/non-strict.docx", gotStrict.Bytes(), 0644)

// run test assuming that the doc is a valid non-strict doc
nonStrict, err := document.Open("testdata/non-strict.docx")
if err != nil {
t.Errorf("error opening document: %s", err)
}

gotNonStrict := bytes.Buffer{}
if err := nonStrict.Validate(); err != nil {
t.Errorf("created an invalid document: %s", err)
}
nonStrict.Save(&gotNonStrict)
testhelper.CompareZip(t, "non-strict.docx", gotNonStrict.Bytes(), true)

os.Remove("testdata/non-strict.docx")
}

func TestOpenHeaderFooter(t *testing.T) {
wb, err := document.Open("testdata/header-footer-multiple.docx")
if err != nil {
Expand Down
Binary file added document/testdata/strict.docx
Binary file not shown.
44 changes: 22 additions & 22 deletions filenames.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,12 @@ func AbsoluteFilename(dt DocType, typ string, index int) string {
switch typ {
case CorePropertiesType:
return "docProps/core.xml"
case ExtendedPropertiesType:
case ExtendedPropertiesType, ExtendedPropertiesTypeStrict:
return "docProps/app.xml"
case ThumbnailType:
case ThumbnailType, ThumbnailTypeStrict:
return "docProps/thumbnail.jpeg"

case OfficeDocumentType:
case OfficeDocumentType, OfficeDocumentTypeStrict:
switch dt {
case DocTypeSpreadsheet:
return "xl/workbook.xml"
Expand All @@ -87,7 +87,7 @@ func AbsoluteFilename(dt DocType, typ string, index int) string {
Log("unsupported type %s pair and %v", typ, dt)
}

case ThemeType, ThemeContentType:
case ThemeType, ThemeTypeStrict, ThemeContentType:
switch dt {
case DocTypeSpreadsheet:
return fmt.Sprintf("xl/theme/theme%d.xml", index)
Expand All @@ -99,7 +99,7 @@ func AbsoluteFilename(dt DocType, typ string, index int) string {
Log("unsupported type %s pair and %v", typ, dt)
}

case StylesType:
case StylesType, StylesTypeStrict:
switch dt {
case DocTypeSpreadsheet:
return "xl/styles.xml"
Expand All @@ -111,41 +111,41 @@ func AbsoluteFilename(dt DocType, typ string, index int) string {
Log("unsupported type %s pair and %v", typ, dt)
}

case ChartType, ChartContentType:
case ChartType, ChartTypeStrict, ChartContentType:
switch dt {
case DocTypeSpreadsheet:
return fmt.Sprintf("xl/charts/chart%d.xml", index)
default:
Log("unsupported type %s pair and %v", typ, dt)
}
case TableType, TableContentType:
case TableType, TableTypeStrict, TableContentType:
return fmt.Sprintf("xl/tables/table%d.xml", index)

case DrawingType, DrawingContentType:
case DrawingType, DrawingTypeStrict, DrawingContentType:
switch dt {
case DocTypeSpreadsheet:
return fmt.Sprintf("xl/drawings/drawing%d.xml", index)
default:
Log("unsupported type %s pair and %v", typ, dt)
}

case CommentsType, CommentsContentType:
case CommentsType, CommentsTypeStrict, CommentsContentType:
switch dt {
case DocTypeSpreadsheet:
return fmt.Sprintf("xl/comments%d.xml", index)
default:
Log("unsupported type %s pair and %v", typ, dt)
}

case VMLDrawingType, VMLDrawingContentType:
case VMLDrawingType, VMLDrawingTypeStrict, VMLDrawingContentType:
switch dt {
case DocTypeSpreadsheet:
return fmt.Sprintf("xl/drawings/vmlDrawing%d.vml", index)
default:
Log("unsupported type %s pair and %v", typ, dt)
}

case ImageType:
case ImageType, ImageTypeStrict:
switch dt {
case DocTypeDocument:
return fmt.Sprintf("word/media/image%d.png", index)
Expand All @@ -157,31 +157,31 @@ func AbsoluteFilename(dt DocType, typ string, index int) string {
Log("unsupported type %s pair and %v", typ, dt)
}
// SML
case WorksheetType, WorksheetContentType:
case WorksheetType, WorksheetTypeStrict, WorksheetContentType:
return fmt.Sprintf("xl/worksheets/sheet%d.xml", index)
case SharedStingsType, SharedStringsContentType:
case SharedStingsType, SharedStingsTypeStrict, SharedStringsContentType:
return "xl/sharedStrings.xml"

// WML
case FontTableType:
case FontTableType, FontTableTypeStrict:
return "word/fontTable.xml"
case EndNotesType:
case EndNotesType, EndNotesTypeStrict:
return "word/endnotes.xml"
case FootNotesType:
case FootNotesType, FootNotesTypeStrict:
return "word/footnotes.xml"
case NumberingType:
case NumberingType, NumberingTypeStrict:
return "word/numbering.xml"
case WebSettingsType:
case WebSettingsType, WebSettingsTypeStrict:
return "word/webSettings.xml"
case SettingsType:
case SettingsType, SettingsTypeStrict:
return "word/settings.xml"
case HeaderType:
case HeaderType, HeaderTypeStrict:
return fmt.Sprintf("word/header%d.xml", index)
case FooterType:
case FooterType, FooterTypeStrict:
return fmt.Sprintf("word/footer%d.xml", index)

// PML
case SlideType:
case SlideType, SlideTypeStrict:
return fmt.Sprintf("ppt/slides/slide%d.xml", index)
case SlideLayoutType:
return fmt.Sprintf("ppt/slideLayouts/slideLayout%d.xml", index)
Expand Down
Loading

0 comments on commit e65ced8

Please sign in to comment.