From dfd83b5565e8ba28f87fe6a6389356c974bf699f Mon Sep 17 00:00:00 2001 From: Hayden Schiff Date: Thu, 21 Jan 2016 22:58:30 -0500 Subject: [PATCH 1/2] tabula: add page --- pages/common/tabula.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 pages/common/tabula.md diff --git a/pages/common/tabula.md b/pages/common/tabula.md new file mode 100644 index 0000000000..94df8dd369 --- /dev/null +++ b/pages/common/tabula.md @@ -0,0 +1,23 @@ +# tabula + +> Extract tables from PDF files. + +- Extract all tables from a PDF to a CSV file: + +`tabula {{file.pdf}} -o {{file.csv}}` + +- Extract tables from pages 1, 2, 3, and 6 of a PDF: + +`tabula --pages {{1-3,6}} {{file.pdf}}` + +- Extract tables from page 1 of a PDF, guessing which portion of the page to examine: + +`tabula {{file.pdf}} --guess --pages {{1}}` + +- Extract all tables from a PDF, using ruling lines to determine cell boundaries: + +`tabula {{file.pdf}} --spreadsheet` + +- Extract all tables from a PDF, using blank space to determine cell boundaries: + +`tabula {{file.pdf}} --no-spreadsheet` From 5a9da6c4d684af6648b1865d73dc92063cf31a5f Mon Sep 17 00:00:00 2001 From: Hayden Schiff Date: Thu, 21 Jan 2016 23:01:44 -0500 Subject: [PATCH 2/2] tabula: added alt format example also made ordering of arguments consistent --- pages/common/tabula.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pages/common/tabula.md b/pages/common/tabula.md index 94df8dd369..58d0bde68a 100644 --- a/pages/common/tabula.md +++ b/pages/common/tabula.md @@ -4,7 +4,11 @@ - Extract all tables from a PDF to a CSV file: -`tabula {{file.pdf}} -o {{file.csv}}` +`tabula -o {{file.csv}} {{file.pdf}}` + +- Extract all tables from a PDF to a JSON file: + +`tabula --format JSON -o {{file.json}} {{file.pdf}}` - Extract tables from pages 1, 2, 3, and 6 of a PDF: @@ -12,12 +16,12 @@ - Extract tables from page 1 of a PDF, guessing which portion of the page to examine: -`tabula {{file.pdf}} --guess --pages {{1}}` +`tabula --guess --pages {{1}} {{file.pdf}}` - Extract all tables from a PDF, using ruling lines to determine cell boundaries: -`tabula {{file.pdf}} --spreadsheet` +`tabula --spreadsheet {{file.pdf}}` - Extract all tables from a PDF, using blank space to determine cell boundaries: -`tabula {{file.pdf}} --no-spreadsheet` +`tabula --no-spreadsheet {{file.pdf}}`