summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'extract/src/odt.c')
-rw-r--r--extract/src/odt.c134
1 files changed, 83 insertions, 51 deletions
diff --git a/extract/src/odt.c b/extract/src/odt.c
index 9e369078..e2e45e2d 100644
--- a/extract/src/odt.c
+++ b/extract/src/odt.c
@@ -128,7 +128,7 @@ static int s_odt_styles_definitions(
);
extract_astring_cat(alloc, out, "<style:paragraph-properties style:writing-mode=\"lr-tb\"/>\n");
extract_astring_cat(alloc, out, "</style:style>\n");
-
+
/* Style for images. */
extract_astring_cat(alloc, out, "<style:style style:name=\"fr1\" style:family=\"graphic\" style:parent-style-name=\"Graphics\">\n");
extract_astring_cat(alloc, out, "<style:graphic-properties"
@@ -156,8 +156,8 @@ static int s_odt_styles_definitions(
" draw:color-mode=\"standard\""
"/>\n");
extract_astring_cat(alloc, out, "</style:style>\n");
-
-
+
+
if (extract_astring_cat(alloc, out, "</office:automatic-styles>")) return -1;
return 0;
}
@@ -308,9 +308,9 @@ change font. */
if (s_odt_run_finish(alloc, content_state, content)) goto end;
}
if (s_odt_paragraph_finish(alloc, content)) goto end;
-
+
e = 0;
-
+
end:
return e;
}
@@ -335,14 +335,14 @@ static int s_odt_append_image(
);
extract_astring_cat(alloc, content, "</draw:frame>\n");
extract_astring_cat(alloc, content, "</text:p>\n");
-
+
return 0;
}
static int s_odt_output_rotated_paragraphs(
extract_alloc_t* alloc,
- extract_page_t* page,
+ subpage_t* subpage,
int paragraph_begin,
int paragraph_end,
double rotation_rad,
@@ -361,13 +361,13 @@ static int s_odt_output_rotated_paragraphs(
int p;
double pt_to_inch = 1/72.0;
outf("rotated paragraphs: rotation_rad=%f (x y)=(%f %f) (w h)=(%f %f)", rotation_rad, x_pt, y_pt, w_pt, h_pt);
-
+
// https://docs.oasis-open.org/office/OpenDocument/v1.3/cs02/part3-schema/OpenDocument-v1.3-cs02-part3-schema.html#attribute-draw_transform
// says rotation is in degrees, but we seem to require -radians.
//
-
+
if (!e) e = extract_astring_cat(alloc, content, "\n");
-
+
if (!e) e = extract_astring_cat(alloc, content, "<text:p text:style-name=\"Standard\">\n");
if (!e) e = extract_astring_catf(alloc, content, "<draw:frame"
" text:anchor-type=\"paragraph\""
@@ -388,19 +388,19 @@ static int s_odt_output_rotated_paragraphs(
y_pt * pt_to_inch
);
if (!e) e = extract_astring_cat(alloc, content, "<draw:text-box>\n");
-
+
for (p=paragraph_begin; p<paragraph_end; ++p)
{
- paragraph_t* paragraph = page->paragraphs[p];
+ paragraph_t* paragraph = subpage->paragraphs[p];
if (!e) e = s_document_to_odt_content_paragraph(alloc, content_state, paragraph, content, styles);
}
-
+
if (!e) e = extract_astring_cat(alloc, content, "\n");
if (!e) e = extract_astring_cat(alloc, content, "</draw:text-box>\n");
if (!e) e = extract_astring_cat(alloc, content, "</draw:frame>\n");
-
+
if (!e) e = extract_astring_cat(alloc, content, "</text:p>\n");
-
+
return e;
}
@@ -409,7 +409,7 @@ static int s_odt_append_table(extract_alloc_t* alloc, table_t* table, extract_as
{
int e = -1;
int y;
-
+
{
int x;
static int table_number = 0;
@@ -438,7 +438,7 @@ static int s_odt_append_table(extract_alloc_t* alloc, table_t* table, extract_as
if (extract_astring_cat(alloc, content,
" <table:table-row>\n"
)) goto end;
-
+
for (x=0; x<table->cells_num_x; ++x)
{
cell_t* cell = table->cells[y*table->cells_num_x + x];
@@ -447,7 +447,7 @@ static int s_odt_append_table(extract_alloc_t* alloc, table_t* table, extract_as
if (extract_astring_cat(alloc, content, " <table:covered-table-cell/>\n")) goto end;
continue;
}
-
+
if (extract_astring_cat(alloc, content, " <table:table-cell")) goto end;
if (cell->extend_right > 1)
{
@@ -458,7 +458,7 @@ static int s_odt_append_table(extract_alloc_t* alloc, table_t* table, extract_as
if (extract_astring_catf(alloc, content, " table:number-rows-spanned=\"%i\"", cell->extend_down)) goto end;
}
if (extract_astring_catf(alloc, content, ">\n")) goto end;
-
+
/* Write contents of this cell. */
{
int p;
@@ -482,7 +482,7 @@ static int s_odt_append_table(extract_alloc_t* alloc, table_t* table, extract_as
}
if (extract_astring_cat(alloc, content, " </table:table>\n")) goto end;
e = 0;
-
+
end:
return e;
}
@@ -490,7 +490,7 @@ static int s_odt_append_table(extract_alloc_t* alloc, table_t* table, extract_as
static int s_odt_append_rotated_paragraphs(
extract_alloc_t* alloc,
- extract_page_t* page,
+ subpage_t* subpage,
content_state_t* content_state,
int* p,
int* text_box_id,
@@ -499,7 +499,7 @@ static int s_odt_append_rotated_paragraphs(
extract_astring_t* content,
extract_odt_styles_t* styles
)
-/* Appends paragraphs with same rotation, starting with page->paragraphs[*p]
+/* Appends paragraphs with same rotation, starting with subpage->paragraphs[*p]
and updates *p. */
{
/* Find extent of paragraphs with this same rotation. extent
@@ -509,7 +509,7 @@ and updates *p. */
point_t extent = {0, 0};
int p0 = *p;
int p1;
- paragraph_t* paragraph = page->paragraphs[*p];
+ paragraph_t* paragraph = subpage->paragraphs[*p];
outf("rotate=%.2frad=%.1fdeg ctm: ef=(%f %f) abcd=(%f %f %f %f)",
rotate, rotate * 180 / pi,
@@ -546,9 +546,9 @@ and updates *p. */
ctm->a, ctm->b, ctm->c, ctm->d);
}
- for (*p=p0; *p<page->paragraphs_num; ++*p)
+ for (*p=p0; *p<subpage->paragraphs_num; ++*p)
{
- paragraph = page->paragraphs[*p];
+ paragraph = subpage->paragraphs[*p];
ctm = &paragraph->lines[0]->spans[0]->ctm;
rotate = atan2(ctm->b, ctm->a);
if (rotate != rotate0)
@@ -597,7 +597,7 @@ and updates *p. */
if (s_odt_output_rotated_paragraphs(
alloc,
- page,
+ subpage,
p0,
p1,
rotate,
@@ -612,15 +612,15 @@ and updates *p. */
)) goto end;
*p = p1 - 1;
e = 0;
-
+
end:
return e;
}
-int extract_document_to_odt_content(
+static int extract_page_to_odt_content(
extract_alloc_t* alloc,
- document_t* document,
+ extract_page_t* page,
int spacing,
int rotation,
int images,
@@ -630,12 +630,12 @@ int extract_document_to_odt_content(
{
int ret = -1;
int text_box_id = 0;
- int p;
+ int c;
/* Write paragraphs into <content>. */
- for (p=0; p<document->pages_num; ++p)
+ for (c=0; c<page->subpages_num; ++c)
{
- extract_page_t* page = document->pages[p];
+ subpage_t* subpage = page->subpages[c];
int p = 0;
int t = 0;
content_state_t content_state;
@@ -644,17 +644,17 @@ int extract_document_to_odt_content(
content_state.font.bold = 0;
content_state.font.italic = 0;
content_state.ctm_prev = NULL;
-
+
for(;;)
{
- paragraph_t* paragraph = (p == page->paragraphs_num) ? NULL : page->paragraphs[p];
- table_t* table = (t == page->tables_num) ? NULL : page->tables[t];
+ paragraph_t* paragraph = (p == subpage->paragraphs_num) ? NULL : subpage->paragraphs[p];
+ table_t* table = (t == subpage->tables_num) ? NULL : subpage->tables[t];
double y_paragraph;
double y_table;
if (!paragraph && !table) break;
y_paragraph = (paragraph) ? paragraph->lines[0]->spans[0]->chars[0].y : DBL_MAX;
y_table = (table) ? table->pos.y : DBL_MAX;
-
+
if (paragraph && y_paragraph < y_table)
{
const matrix_t* ctm = &paragraph->lines[0]->spans[0]->ctm;
@@ -683,7 +683,7 @@ int extract_document_to_odt_content(
if (rotation && rotate != 0)
{
- if (s_odt_append_rotated_paragraphs(alloc, page, &content_state, &p, &text_box_id, ctm, rotate, content, styles)) goto end;
+ if (s_odt_append_rotated_paragraphs(alloc, subpage, &content_state, &p, &text_box_id, ctm, rotate, content, styles)) goto end;
}
else
{
@@ -697,15 +697,15 @@ int extract_document_to_odt_content(
t += 1;
}
}
-
+
outf("images=%i", images);
if (images)
{
int i;
- outf("page->images_num=%i", page->images_num);
- for (i=0; i<page->images_num; ++i)
+ outf("subpage->images_num=%i", subpage->images_num);
+ for (i=0; i<subpage->images_num; ++i)
{
- s_odt_append_image(alloc, content, &page->images[i]);
+ s_odt_append_image(alloc, content, &subpage->images[i]);
}
}
}
@@ -716,6 +716,38 @@ int extract_document_to_odt_content(
return ret;
}
+int extract_document_to_odt_content(
+ extract_alloc_t* alloc,
+ document_t* document,
+ int spacing,
+ int rotation,
+ int images,
+ extract_astring_t* content,
+ extract_odt_styles_t* styles
+ )
+{
+ int p;
+ int ret = 0;
+
+ /* Write paragraphs into <content>. */
+ for (p=0; p<document->pages_num; ++p)
+ {
+ extract_page_t* page = document->pages[p];
+
+ ret = extract_page_to_odt_content(
+ alloc,
+ page,
+ spacing,
+ rotation,
+ images,
+ content,
+ styles
+ );
+ if (ret) break;
+ };
+
+ return ret;
+}
#if 0
static int s_find_mid(const char* text, const char* begin, const char* end, const char** o_begin, const char** o_end)
@@ -749,7 +781,7 @@ int extract_odt_content_item(
extract_astring_t temp;
extract_astring_init(&temp);
*text2 = NULL;
-
+
(void) images;
if (0)
{}
@@ -771,10 +803,10 @@ int extract_odt_content_item(
&text_intermediate
)) goto end;
outf("text_intermediate: %s", text_intermediate);
-
+
/* Convert <styles> to text. */
if (s_odt_styles_definitions(alloc, styles, &styles_definitions)) goto end;
-
+
/* To make tables work, we seem to need to specify table and column
styles, and these can be empty. todo: maybe specify exact sizes based
on the pdf table and cell dimensions. */
@@ -783,7 +815,7 @@ int extract_odt_content_item(
"<style:style style:name=\"extract.table\" style:family=\"table\"/>\n"
"<style:style style:name=\"extract.table.column\" style:family=\"table-column\"/>\n"
)) goto end;
-
+
/* Replace '<office:automatic-styles/>' with text from
<styles_definitions>. */
e = extract_content_insert(
@@ -845,7 +877,7 @@ int extract_odt_content_item(
return e;
}
-
+
int extract_odt_write_template(
extract_alloc_t* alloc,
@@ -867,7 +899,7 @@ int extract_odt_write_template(
assert(path_out);
assert(path_template);
-
+
if (extract_check_path_shell_safe(path_out))
{
outf("path_out is unsafe: %s", path_out);
@@ -896,7 +928,7 @@ int extract_odt_write_template(
/* Might be nice to iterate through all items in path_tempdir, but for now
we look at just the items that we know extract_odt_content_item() will
modify. */
-
+
{
const char* names[] =
{
@@ -912,7 +944,7 @@ int extract_odt_write_template(
extract_free(alloc, &text2);
if (extract_asprintf(alloc, &path, "%s/%s", path_tempdir, name) < 0) goto end;
if (extract_read_all_path(alloc, path, &text)) goto end;
-
+
outf("before extract_odt_content_item() styles->styles_num=%i", styles->styles_num);
if (extract_odt_content_item(
alloc,
@@ -928,7 +960,7 @@ int extract_odt_write_template(
outf("extract_odt_content_item() failed");
goto end;
}
-
+
outf("after extract_odt_content_item styles->styles_num=%i", styles->styles_num);
{
@@ -954,7 +986,7 @@ int extract_odt_write_template(
if (extract_asprintf(alloc, &path, "%s/Pictures/%s", path_tempdir, image->name) < 0) goto end;
if (extract_write_all(image->data, image->data_size, path)) goto end;
}
-
+
outf("Zipping tempdir to create %s", path_out);
{
const char* path_out_leaf = strrchr(path_out, '/');