PDA

View Full Version : حرفه ای: خواندن Bookmark برنامه Word مایکروسافت



sir_djalireza
پنج شنبه 01 فروردین 1392, 11:17 صبح
با سلام
سوال سختی هستش و هرچی در اینترنت گشتم نتونستم چیزی پیدا کنم.
میخوام برنامه ای با جاوا بنویسم و با MS Office Word کار کنه!
سوال اینجاست که میخوام لیست بوک مارک یک برنامه Word رو باز کنم و key رو بخونم و value رو مقدار دهی کنم. مثل متد Hashmap
کسی نظری داره و یا راهنمایی داره؟

sir_djalireza
سه شنبه 06 فروردین 1392, 15:19 عصر
خب پیدا کردم جواب رو! خیلی کلاس بدرد بخوری هستش، متاسفانه چون تعداد کاراکتر داکومنتیشن بالا بود نتونستم همه رو پیدا کنم


import java.io.*;
import org.apache.poi.xwpf.usermodel.*;
import org.openxmlformats.schemas.wordprocessingml.x2006. main.CTBookmark;
import org.openxmlformats.schemas.wordprocessingml.x2006. main.CTMarkupRange;
import org.openxmlformats.schemas.wordprocessingml.x2006. main.CTR;
import org.openxmlformats.schemas.wordprocessingml.x2006. main.CTText;
import org.openxmlformats.schemas.wordprocessingml.x2006. main.CTRPr;
import java.util.List;
import java.util.Iterator;
import java.util.Stack;
import org.apache.xmlbeans.XmlCursor;
import org.apache.xmlbeans.XmlException;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/**
* It is possible to create a document and to nest one bookmark within another.
* Typically, a bookmark is inserted into a piece of text, that is then selected
* and another bookmark is added to that selection. The XML markup might look
* something like this
*
* <pre>
* <w:p w:rsidR="00945150" w:rsidRDefault="00945150">
* <w:r>
* <w:t xml:space="preserve">
* Imagine I want to insert one bookmark at the start of this
* </w:t>
* </w:r>
* <w:bookmarkStart w:id="0" w:name="OUTER"/>
* <w:r>
* <w:t xml:space="preserve">piece of text and another just
* </w:t>
* </w:r>
* <w:proofErr w:type="gramStart"/>
* <w:r>
* <w:t xml:space="preserve">here
* </w:t>
* </w:r>
* <w:bookmarkStart w:id="1" w:name="INNER"/>
* <w:bookmarkEnd w:id="1"/>
* <w:r>
* <w:t>.
* </w:t>
* </w:r>
* <w:bookmarkEnd w:id="0"/>
* <w:proofErr w:type="gramEnd"/>
* </w:p>
* </pre>
* * @version 1.00 16th June 2012
* 1.10 20th June 2012 - Added the ability to replace the text between the opening
* and closing brackets ([ and ]) that appear when the document
* is open in Word.
*/
public class DOCXTest {

public static final int INSERT_BEFORE = 0;
public static final int INSERT_AFTER = 1;
public static final int REPLACE = 2;
private XWPFDocument document = null;

public DOCXTest() {
}

/**
* Opens a Word OOXML file.
*
* @param filename An instance of the String class that encapsulates the
* path to and name of a Word OOXML (.docx) file.
* @throws IOException Thrown if a problem occurs within the underlying file
* system.
*/
public final void openFile(String filename) throws IOException {
File file = null;
FileInputStream fis = null;
try {
// Simply open the file and store a reference into the 'document'
// local variable.
file = new File(filename);
fis = new FileInputStream(file);
this.document = new XWPFDocument(fis);
} finally {
try {
if (fis != null) {
fis.close();
fis = null;
}
} catch (IOException ioEx) {
// Swallow this exception. It would have occured onyl
// when releasing the file handle and should not pose
// problems to later processing.
}
}
}

/**
* Saves a Word OOXML file away under the name, and to the location,
* specified.
*
* @param filename An instance of the String class that encapsulates the of
* the file and the location into which it should be stored.
* @throws IOException Thrown if a problem occurs in the underlying file
* system.
*/
public final void saveAs(String filename) throws IOException {
File file = null;
FileOutputStream fos = null;
try {
file = new File(filename);
fos = new FileOutputStream(file);
this.document.write(fos);
} finally {
if (fos != null) {
fos.close();
fos = null;
}
}
}
public final void insertAtBookmark(String bookmarkName,
String bookmarkValue, int where) throws XmlException {
List<XWPFTable> tableList = null;
Iterator<XWPFTable> tableIter = null;
List<XWPFTableRow> rowList = null;
Iterator<XWPFTableRow> rowIter = null;
List<XWPFTableCell> cellList = null;
Iterator<XWPFTableCell> cellIter = null;
XWPFTable table = null;
XWPFTableRow row = null;
XWPFTableCell cell = null;

// Firstly, deal with any paragraphs in the body of the document.
this.procParaList(this.document.getParagraphs(), bookmarkName, bookmarkValue, where);

// Then check to see if there are any bookmarks in table cells. To do this
// it is necessary to get at the list of paragraphs 'stored' within the
// individual table cell, hence this code which get the tables from the
// document, the rows from each table, the cells from each row and the
// paragraphs from each cell.
tableList = this.document.getTables();
tableIter = tableList.iterator();
while (tableIter.hasNext()) {
table = tableIter.next();
rowList = table.getRows();
rowIter = rowList.iterator();
while (rowIter.hasNext()) {
row = rowIter.next();
cellList = row.getTableCells();
cellIter = cellList.iterator();
while (cellIter.hasNext()) {
cell = cellIter.next();
this.procParaList(cell.getParagraphs(),
bookmarkName,
bookmarkValue,
where);
}
}
}
}
private final void procParaList(List<XWPFParagraph> paraList,
String bookmarkName, String bookmarkValue, int where) throws XmlException {
Iterator<XWPFParagraph> paraIter = null;
XWPFParagraph para = null;
List<CTBookmark> bookmarkList = null;
Iterator<CTBookmark> bookmarkIter = null;
CTBookmark bookmark = null;
XWPFRun run = null;

// Get an Iterator for the XWPFParagraph object and step through them
// one at a time.
paraIter = paraList.iterator();
while (paraIter.hasNext()) {
para = paraIter.next();

// Get a List of the CTBookmark object sthat the paragraph
// 'contains' and step through these one at a time.
bookmarkList = para.getCTP().getBookmarkStartList();
bookmarkIter = bookmarkList.iterator();
while (bookmarkIter.hasNext()) {
bookmark = bookmarkIter.next();

// If the name of the CTBookmakr object matches the value
// encapsulated within the argumnet passed to the bookmarkName
// parameter then this is where the text should be inserted.
if (bookmark.getName().equals(bookmarkName)) {

// Create a new character run to hold the value encapsulated
// within the argument passed to the bookmarkValue parameter
// and then test whether this new run shouold be inserted
// into the document before or after the bookmark.
run = para.createRun();
run.setText(bookmarkValue);
switch (where) {
case DOCXTest.INSERT_AFTER:
this.insertAfterBookmark(bookmark, run, para);
break;
case DOCXTest.INSERT_BEFORE:
this.insertBeforeBookmark(bookmark, run, para);
break;
case DOCXTest.REPLACE:
this.replaceBookmark(bookmark, run, para);
break;

}
}
}
}
}
private void insertAfterBookmark(CTBookmark bookmark, XWPFRun run,
XWPFParagraph para) {
Node nextNode = null;
Node insertBeforeNode = null;
Node styleNode = null;
int bookmarkStartID = 0;
int bookmarkEndID = -1;

// Capture the id of the bookmarkStart tag. The code will step through
// the document nodes 'contained' within the start and end tags that have
// matching id numbers.
bookmarkStartID = bookmark.getId().intValue();

// Get the node for the bookmark start tag and then enter a loop that
// will step from one node to the next until the bookmarkEnd tag with
// a matching id is fouind.
nextNode = bookmark.getDomNode();
while (bookmarkStartID != bookmarkEndID) {

// Get the next node along and check to see if it is a bookmarkEnd
// tag. If it is, get its id so that the containing while loop can
// be terminated once the correct end tag is found. Note that the
// id will be obtained as a String and must be converted into an
// integer. This has been coded to fail safely so that if an error
// is encuntered converting the id to an int value, the while loop
// will still terminate.
nextNode = nextNode.getNextSibling();
if (nextNode.getNodeName().contains("bookmarkEnd")) {
try {
bookmarkEndID = Integer.parseInt(
nextNode.getAttributes().getNamedItem("w:id").getNodeValue());
} catch (NumberFormatException nfe) {
bookmarkEndID = bookmarkStartID;
}
} // If we are not dealing with a bookmarkEnd node, are we dealing
// with a run node that MAY contains styling information. If so,
// then get that style information from the run.
else {
if (nextNode.getNodeName().equals("w:r")) {
styleNode = this.getStyleNode(nextNode);
}
}
}
if (styleNode != null) {
run.getCTR().getDomNode().insertBefore(
styleNode.cloneNode(true), run.getCTR().getDomNode().getFirstChild());
}

// Finally, check to see if there was a node after the bookmarkEnd
// tag. If there was, then this code will insert the run in front of
// that tag. If there was no node following the bookmarkEnd tag then the
// run will be inserted at the end of the paragarph and this was taken
// care of at the point of creation.
if (insertBeforeNode != null) {
para.getCTP().getDomNode().insertBefore(
run.getCTR().getDomNode(), insertBeforeNode);
}
}
private void insertBeforeBookmark(CTBookmark bookmark, XWPFRun run,
XWPFParagraph para) {
Node insertBeforeNode = null;
Node childNode = null;
Node styleNode = null;

// Get the dom node from the bookmarkStart tag and look for another
// node immediately preceding it.
insertBeforeNode = bookmark.getDomNode();
childNode = insertBeforeNode.getPreviousSibling();

// If a node is found, try to get the styling from it.
if (childNode != null) {
styleNode = this.getStyleNode(childNode);

// If that previous node was styled, then apply this style to the
// text which will be inserted.
if (styleNode != null) {
run.getCTR().getDomNode().insertBefore(
styleNode.cloneNode(true), run.getCTR().getDomNode().getFirstChild());
}
}

// Insert the text into the paragraph immediately in front of the
// bookmarkStart tag.
para.getCTP().getDomNode().insertBefore(
run.getCTR().getDomNode(), insertBeforeNode);
}
private void replaceBookmark(CTBookmark bookmark, XWPFRun run,
XWPFParagraph para) {
Node nextNode = null;
Node styleNode = null;
Node lastRunNode = null;
NodeList childNodes = null;
Stack<Node> nodeStack = null;
boolean textNodeFound = false;
int bookmarkStartID = 0;
int bookmarkEndID = -1;
int numChildNodes = 0;

nodeStack = new Stack<Node>();
bookmarkStartID = bookmark.getId().intValue();
nextNode = bookmark.getDomNode();

// Loop through the nodes looking for a matching bookmarkEnd tag
while (bookmarkStartID != bookmarkEndID) {

nextNode = nextNode.getNextSibling();

// If an end tag is found, does it match the start tag? If so, end
// the while loop.
if (nextNode.getNodeName().contains("bookmarkEnd")) {
try {
bookmarkEndID = Integer.parseInt(
nextNode.getAttributes().getNamedItem("w:id").getNodeValue());
} catch (NumberFormatException nfe) {
bookmarkEndID = bookmarkStartID;
}
} else {
// If this is not a bookmark end tag, store the reference to the
// node on the stack for later deletion. This is easier that
// trying to delete the nodes as they are found.
nodeStack.push(nextNode);
}
}

// If the stack of nodes found between the bookmark tags is not empty
// then they have to be removed.
if (!nodeStack.isEmpty()) {

// Check the node at the top of the stack. If it is a run, get it's
// style - if any - and apply to the run that will be replacing it.
lastRunNode = nodeStack.pop();
if ((lastRunNode.getNodeName().equals("w:r"))) {
styleNode = this.getStyleNode(lastRunNode);
if (styleNode != null) {
run.getCTR().getDomNode().insertBefore(
styleNode.cloneNode(true), run.getCTR().getDomNode().getFirstChild());
}
}

// Delete any and all node that were found in between the start and
// end tags. This is slightly safer that trying to delete the nodes
// as they are found wile stepping through them in the loop above.
para.getCTP().getDomNode().removeChild(lastRunNode );
// Now, delete the remaing Nodes on the stack
while (!nodeStack.isEmpty()) {
para.getCTP().getDomNode().removeChild(nodeStack.p op());
}
}

// Place the text into position, between the bookmark tags.
para.getCTP().getDomNode().insertBefore(
run.getCTR().getDomNode(), nextNode);
}
private Node getStyleNode(Node parentNode) {
Node childNode = null;
Node styleNode = null;
if (parentNode != null) {

// If the node represents a run and it has child nodes then
// it can be processed further. Note, whilst testing the code, it
// was observed that although it is possible to get a list of a nodes
// children, even when a node did have children, trying to obtain this
// list would often return a null value. This is the reason why the
// technique of stepping from one node to the next is used here.
if (parentNode.getNodeName().equalsIgnoreCase("w:r")
&& parentNode.hasChildNodes()) {

// Get the first node and catch it's reference for return if
// the first child node is a style node (w:rPr).
childNode = parentNode.getFirstChild();
if (childNode.getNodeName().equals("w:rPr")) {
styleNode = childNode;
} else {
// If the first node was not a style node and there are other
// child nodes remaining to be checked, then step through
// the remaining child nodes until either a style node is
// found or until all child nodes have been processed.
while ((childNode = childNode.getNextSibling()) != null) {
if (childNode.getNodeName().equals("w:rPr")) {
styleNode = childNode;
// Note setting to null here if a style node is
// found in order order to terminate any further
// checking
childNode = null;
}
}
}
}
}
return (styleNode);
}
public String getBookmarkText(String bookmarkName) throws XmlException,
IOException {
List<XWPFTable> tableList = null;
Iterator<XWPFTable> tableIter = null;
List<XWPFTableRow> rowList = null;
Iterator<XWPFTableRow> rowIter = null;
List<XWPFTableCell> cellList = null;
Iterator<XWPFTableCell> cellIter = null;
XWPFTable table = null;
XWPFTableRow row = null;
XWPFTableCell cell = null;
String text = null;

// Firstly, deal with any paragraphs in the body of the document.
text = this.procParasForBookmarkText(this.document.getPar agraphs(),
bookmarkName);

// Then check to see if there are any bookmarks in table cells. To do this
// it is necessary to get at the list of paragraphs 'stored' within the
// individual table cell, hence this code which get the tables from the
// document, the rows from each table, the cells from each row and the
// paragraphs from each cell.
if (text == null) {
tableList = this.document.getTables();
tableIter = tableList.iterator();
while (tableIter.hasNext()) {
table = tableIter.next();
rowList = table.getRows();
rowIter = rowList.iterator();
while (rowIter.hasNext()) {
row = rowIter.next();
cellList = row.getTableCells();
cellIter = cellList.iterator();
while (cellIter.hasNext()) {
cell = cellIter.next();
text = this.procParasForBookmarkText(cell.getParagraphs() ,
bookmarkName);
if (text != null) {
return (text);
}
}
}
}
}
return (text);
}
public String procParasForBookmarkText(List<XWPFParagraph> paraList,
String bookmarkName) throws XmlException, IOException {
Iterator<XWPFParagraph> paraIter = null;
XWPFParagraph para = null;
XWPFRun run = null;
List<CTBookmark> bookmarkList = null;
Iterator<CTBookmark> bookmarkIter = null;
CTBookmark bookmark = null;
StringBuilder builder = null;

// Get an Iterator to step through the contents of the paragraph list.
paraIter = paraList.iterator();
while (paraIter.hasNext()) {

// Get the paragraph, a llist of CTBookmark objects and an Iterator
// to step through the list of CTBookmarks.
para = paraIter.next();
bookmarkList = para.getCTP().getBookmarkStartList();
bookmarkIter = bookmarkList.iterator();
while (bookmarkIter.hasNext()) {

// Get a Bookmark and check it's name. If the name of the
// bookmark matches the name the user has specified then get the
// bookmarks ID. This is required to cope with the situation where
// one bookmark is nested within another; we do not want to end
// processing until we hit the matching bookmarkEnd tag.
bookmark = bookmarkIter.next();
if (bookmark.getName().equals(bookmarkName)) {
builder = this.getTextFromBookmark(bookmark);
}
}
}
return (builder == null ? null : builder.toString());
}
private StringBuilder getTextFromBookmark(CTBookmark bookmark)
throws IOException, XmlException {
int startBookmarkID = 0;
int endBookmarkID = -1;
Node nextNode = null;
Node childNode = null;
CTText text = null;
ByteArrayOutputStream baos = null;
StringBuilder builder = null;
String rawXML = null;

// Get the ID of the bookmark from it's start tag, the DOM node from the
// bookmark (to make looping easier) and initialise the StringBuilder.
startBookmarkID = bookmark.getId().intValue();
nextNode = bookmark.getDomNode();
builder = new StringBuilder();

// Loop through the nodes held between the bookmark's start and end
// tags.
while (startBookmarkID != endBookmarkID) {

// Get the next node and, if it is a bookmarkEnd tag, get it's ID
// as matching ids will terminate the while loop..
nextNode = nextNode.getNextSibling();
if (nextNode.getNodeName().contains("bookmarkEnd")) {

// Get the ID attribute from the node. It is a String that must
// be converted into an int. An exception could be thrown and so
// the catch clause will ensure the loop ends neatly even if the
// value might be incorrect. Must inform the user.
try {
endBookmarkID = Integer.parseInt(
nextNode.getAttributes().
getNamedItem("w:id").getNodeValue());
} catch (NumberFormatException nfe) {
endBookmarkID = startBookmarkID;
}
} else {
// This is not a bookmarkEnd node and can processed it for any
// text it may contain. Note the check for both type - it must
// be a run - and contain children. Interestingly, it seems as
// though the node may contain children and yet the call to
// nextNode.getChildNodes() will still return an empty list,
// hence the need to step through the child nodes.
if (nextNode.getNodeName().equals("w:r")
&& nextNode.hasChildNodes()) {
// Get the text from the child nodes.
builder.append(this.getTextFromChildNodes(nextNode ));
}
}
}
return (builder);
}
private String getTextFromChildNodes(Node node) throws IOException,
XmlException {
NodeList childNodes = null;
Node childNode = null;
CTText text = null;
StringBuilder builder = new StringBuilder();
int numChildNodes = 0;

// Get a list of chid nodes from the node passed to the method and
// find out how many children there are in the list.
childNodes = node.getChildNodes();
numChildNodes = childNodes.getLength();

// Iterate through the children one at a time - it is possible for a
// run to ciontain zero, one or more text nodes - and recover the text
// from an text type child nodes.
for (int i = 0; i < numChildNodes; i++) {

// Get a node and check it's name. If this is 'w:t' then process as
// text type node.
childNode = childNodes.item(i);

if (childNode.getNodeName().equals("w:t")) {

// If the node reports it's type as txet, then simply call the
// getNodeValue() method to get at it's text.
if (childNode.getNodeType() == Node.TEXT_NODE) {
builder.append(childNode.getNodeValue());
} else {
// Correct the type by parsing the node's XML markup and
// creating a CTText object. Call the getStringValue()
// method on that to get the text.
text = CTText.Factory.parse(childNode);
builder.append(text.getStringValue());
}
}
}
return (builder.toString());
}

public static void main(String[] args) {
try {
// open the existing workbook and get the text of two bookmarks.
DOCXTest docxTest = new DOCXTest();
docxTest.openFile("C:/temp/BeforeBookMarkValuesReplacement.docx");
System.out.println("EDMS_Bookmark1 contains: " + docxTest.getBookmarkText("EDMS_Bookmark1"));
System.out.println("EDMS_Bookmark5 contains: " + docxTest.getBookmarkText("EDMS_Bookmark5"));
// Replace the text at those two bookmakrs and then save the file away
// using a different name
docxTest.insertAtBookmark("EDMS_Bookmark1", "This should replace the EDMS_Bookmark1", DOCXTest.REPLACE);
docxTest.insertAtBookmark("EDMS_Bookmark5", "This should replace the EDMS_Bookmark5", DOCXTest.REPLACE);
docxTest.saveAs("C:/temp/AfterBookMarkValuesReplacement.docx");
// Open the new file and demonstrate that the bookamrk text has changed.
docxTest.openFile("C:/temp/AfterBookMarkValuesReplacement.docx");
System.out.println("EDMS_Bookmark1 contains: " + docxTest.getBookmarkText("EDMS_Bookmark1"));
System.out.println("EDMS_Bookmark5 contains: " + docxTest.getBookmarkText("EDMS_Bookmark5"));
} catch (Exception ex) {
System.out.println("Caught a: " + ex.getClass().getName());
System.out.println("Message: " + ex.getMessage());
System.out.println("Stacktrace follows:.....");
ex.printStackTrace(System.out);
}
}
}

sir_djalireza
سه شنبه 06 فروردین 1392, 15:20 عصر
فقط اگه سوال داشتید بپرسید در موردش تا بهتون جواب بدم