Python docx.Document() Examples
The following are 30
code examples of docx.Document().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
docx
, or try the search function
.
Example #1
Source File: test_properties.py From docxcompose with MIT License | 8 votes |
def test_finds_run_nodes_in_complex_field_without_separate_correctly(self): document = Document(docx_path('complex_field_without_separate.docx')) properties = CustomProperties(document).find_docprops_in_document() assert 2 == len(properties), \ 'input should contain two complex field docproperties' # The "User.FullName" docproperty should be the one without a separate run # In this field, there are the following runs: begin, docprop and end matches = [prop for prop in properties if prop.name == 'User.FullName'] assert 1 == len(matches), \ "There should be only one User.FullName docproperty" prop = matches[0] assert prop.get_separate_run() is None, \ "This complex field should not have a separate run." assert [] == prop.get_runs_for_update(), \ "As there is no separate run, there should be no run to update" # As there are no separate, all runs should be removed when dissolving # the property. runs = prop.get_runs_to_replace_field_with_value() assert 3 == len(runs) assert runs[0] == prop.begin_run assert runs[1] == prop.w_r assert runs[2] == prop.end_run
Example #2
Source File: RastLeak_1_2.py From RastLeak with GNU General Public License v3.0 | 7 votes |
def Analyze_Metadata_pdf(filename): ####### FUNCTION AnalyzeMetadata ###### pdfFile = PdfFileReader(file(filename, 'rb')) metadata = pdfFile.getDocumentInfo() print ' - Document: ' + str(filename) for meta in metadata: value=(metadata[meta]) print ' - ' + meta + ':' + metadata[meta] if meta == "/Author": if value not in meta_author_array: meta_author_array.append(value) elif meta =="/Producer": if value not in meta_producer_array: meta_producer_array.append(value) elif meta == "/Creator": if value not in meta_creator_array: meta_creator_array.append(value) #Group the different arrays in one with all metadata metadata_files.append(meta_author_array) metadata_files.append(meta_producer_array) metadata_files.append(meta_creator_array) #print metadata_files ####### FUNCTION AnalyzeMetadata doc ######
Example #3
Source File: translate_word_doc.py From py-googletrans with MIT License | 7 votes |
def translate_doc(filename, destination='zh-CN', mix=True): """ translate a word document type of file and save the result as document and keep the exactly same file format. :param filename: word doc file :param destination='zh-CN': :param mix=True: if True, will have original language and target language into the same doc. paragraphs by paragraphs. """ def tx(t): return Translator().translate(t, dest=destination).text doc = Document(filename) for p in doc.paragraphs: txd = tx(p.text) p.text = p.text + ('\n' + txd if mix else '') for table in doc.tables: for row in table.rows: for cell in row.cells: txd = tx(cell.text) p.text = cell.text + ('\n' + txd if mix else '') f = filename.replace('.doc', destination.lower() + '.doc') doc.save(f)
Example #4
Source File: test_properties.py From docxcompose with MIT License | 6 votes |
def test_dissolving_field_when_three_complex_docprop_in_same_paragraph(self): document = Document(docx_path('three_props_in_same_paragraph.docx')) assert 1 == len(document.paragraphs), 'input file should contains one paragraph' paragraph = document.paragraphs[0] properties = CustomProperties(document) assert 3 == len(properties.find_docprops_in_document()), \ 'input should contain three complex field docproperties' text = u'{text} / {num} mor between the fields {text} and some afte the three fields' assert text.format(text="I was spellcecked", num=0) == paragraph.text properties.dissolve_fields("Text Property") assert 1 == len(document.paragraphs) assert 1 == len(properties.find_docprops_in_document()), \ 'document should contain one complex field after removal' assert text.format(text="I was spellcecked", num=0) == paragraph.text
Example #5
Source File: RastLeak_1_0.py From RastLeak with GNU General Public License v3.0 | 6 votes |
def Analyze_Metadata_doc(fileName): #Open file docxFile = docx.Document(file(fileName,'rb')) #Get the structure docxInfo= docxFile.core_properties #Print the metadata which it wants to display attribute = ["author", "category", "comments", "content_status", "created", "identifier", "keywords", "language", "last_modified_by", "last_printed", "modified", "revision", "subject", "title", "version"] #run the list in a for loop to print the value of each metadata print ' - Document: ' + str(fileName) for meta in attribute: metadata = getattr(docxInfo,meta) if metadata: #Separate the values unicode and time date if isinstance(metadata, unicode): print " \n\t" + str(meta)+": " + str(metadata) elif isinstance(metadata, datetime.datetime): print " \n\t" + str(meta)+": " + str(metadata)
Example #6
Source File: test_properties.py From docxcompose with MIT License | 6 votes |
def test_complex_docprop_fields_with_multiple_textnodes_are_updated(self): document = Document(docx_path('spellchecked_docproperty.docx')) paragraphs = xpath(document.element.body, '//w:p') assert 1 == len(paragraphs), 'input file contains one paragraph' assert 1 == len(xpath(document.element.body, '//w:instrText')), \ 'input contains one complex field docproperty' w_p = paragraphs[0] cached_values = cached_complex_field_values(w_p) assert 4 == len(cached_values), \ 'doc property value is scattered over 4 parts' assert 'i will be spllchecked!' == ''.join(cached_values) CustomProperties(document).update_all() w_p = xpath(document.element.body, '//w:p')[0] cached_values = cached_complex_field_values(w_p) assert 1 == len(cached_values), \ 'doc property value has been reset to one cached value' assert 'i will be spllchecked!' == cached_values[0]
Example #7
Source File: rastleak_2_0.py From RastLeak with GNU General Public License v3.0 | 6 votes |
def Analyze_Metadata_pdf(filename): ####### FUNCTION AnalyzeMetadata ###### pdfFile = PdfFileReader(file(filename, 'rb')) metadata = pdfFile.getDocumentInfo() print ' - Document: ' + str(filename) for meta in metadata: value=(metadata[meta]) print ' - ' + meta + ':' + metadata[meta] if meta == "/Author": if value not in meta_author_array: meta_author_array.append(value) elif meta =="/Producer": if value not in meta_producer_array: meta_producer_array.append(value) elif meta == "/Creator": if value not in meta_creator_array: meta_creator_array.append(value) #Group the different arrays in one with all metadata metadata_files.append(meta_author_array) metadata_files.append(meta_producer_array) metadata_files.append(meta_creator_array) ####### FUNCTION AnalyzeMetadata doc ######
Example #8
Source File: RastLeak_1_3.py From RastLeak with GNU General Public License v3.0 | 6 votes |
def Analyze_Metadata_pdf(filename): ####### FUNCTION AnalyzeMetadata ###### pdfFile = PdfFileReader(file(filename, 'rb')) metadata = pdfFile.getDocumentInfo() print ' - Document: ' + str(filename) for meta in metadata: value=(metadata[meta]) print ' - ' + meta + ':' + metadata[meta] if meta == "/Author": if value not in meta_author_array: meta_author_array.append(value) elif meta =="/Producer": if value not in meta_producer_array: meta_producer_array.append(value) elif meta == "/Creator": if value not in meta_creator_array: meta_creator_array.append(value) #Group the different arrays in one with all metadata metadata_files.append(meta_author_array) metadata_files.append(meta_producer_array) metadata_files.append(meta_creator_array) #print metadata_files ####### FUNCTION AnalyzeMetadata doc ######
Example #9
Source File: rastleak_1_4.py From RastLeak with GNU General Public License v3.0 | 6 votes |
def Analyze_Metadata_pdf(filename): ####### FUNCTION AnalyzeMetadata ###### pdfFile = PdfFileReader(file(filename, 'rb')) metadata = pdfFile.getDocumentInfo() print ' - Document: ' + str(filename) for meta in metadata: value=(metadata[meta]) print ' - ' + meta + ':' + metadata[meta] if meta == "/Author": if value not in meta_author_array: meta_author_array.append(value) elif meta =="/Producer": if value not in meta_producer_array: meta_producer_array.append(value) elif meta == "/Creator": if value not in meta_creator_array: meta_creator_array.append(value) #Group the different arrays in one with all metadata metadata_files.append(meta_author_array) metadata_files.append(meta_producer_array) metadata_files.append(meta_creator_array) ####### FUNCTION AnalyzeMetadata doc ######
Example #10
Source File: downloadfiles.py From RastLeak with GNU General Public License v3.0 | 6 votes |
def Analyze_Metadata_pdf(filename): ####### FUNCTION AnalyzeMetadata ###### pdfFile = PdfFileReader(file(filename, 'rb')) metadata = pdfFile.getDocumentInfo() print ' - Document: ' + str(filename) for meta in metadata: value=(metadata[meta]) print ' - ' + meta + ':' + metadata[meta] if meta == "/Author": if value not in meta_author_array: meta_author_array.append(value) elif meta =="/Producer": if value not in meta_producer_array: meta_producer_array.append(value) elif meta == "/Creator": if value not in meta_creator_array: meta_creator_array.append(value) #Group the different arrays in one with all metadata metadata_files.append(meta_author_array) metadata_files.append(meta_producer_array) metadata_files.append(meta_creator_array) ####### FUNCTION AnalyzeMetadata doc ######
Example #11
Source File: downloadfiles.py From RastLeak with GNU General Public License v3.0 | 6 votes |
def Analyze_Metadata_pdf(filename): ####### FUNCTION AnalyzeMetadata ###### pdfFile = PdfFileReader(file(filename, 'rb')) metadata = pdfFile.getDocumentInfo() print ' - Document: ' + str(filename) for meta in metadata: value=(metadata[meta]) print ' - ' + meta + ':' + metadata[meta] if meta == "/Author": if value not in meta_author_array: meta_author_array.append(value) elif meta =="/Producer": if value not in meta_producer_array: meta_producer_array.append(value) elif meta == "/Creator": if value not in meta_creator_array: meta_creator_array.append(value) #Group the different arrays in one with all metadata metadata_files.append(meta_author_array) metadata_files.append(meta_producer_array) metadata_files.append(meta_creator_array) ####### FUNCTION AnalyzeMetadata doc ######
Example #12
Source File: test_properties.py From docxcompose with MIT License | 6 votes |
def test_complex_field_gets_updated(self): document = Document(docx_path('docproperties.docx')) assert 6 == len(document.paragraphs), 'input file should contain 6 paragraphs' properties = xpath(document.element.body, './/w:instrText') assert 5 == len(properties),\ 'input should contain five complex field docproperties' expected_paragraphs = [u'Custom Doc Properties', u'Text: Foo Bar', u'Number: 123', u'Boolean: Y', u'Date: 11.06.2019', u'Float: 1.1'] actual_paragraphs = [paragraph.text for paragraph in document.paragraphs] assert actual_paragraphs == expected_paragraphs CustomProperties(document).update("Number Property", 423) expected_paragraphs[2] = u'Number: 423' actual_paragraphs = [paragraph.text for paragraph in document.paragraphs] assert actual_paragraphs == expected_paragraphs
Example #13
Source File: mock_osp.py From open-syllabus-project with Apache License 2.0 | 6 votes |
def _write_docx(self, path, content): """ Write a .docx file. Args: path (str): The file path. content (str): The file content. """ docx = Document() docx.add_paragraph(content) docx.core_properties.created = datetime.now() docx.save(path)
Example #14
Source File: RastLeak_1_1.py From RastLeak with GNU General Public License v3.0 | 6 votes |
def Analyze_Metadata_doc(fileName): #Open file docxFile = docx.Document(file(fileName,'rb')) #Get the structure docxInfo= docxFile.core_properties #Print the metadata which it wants to display attribute = ["author", "category", "comments", "content_status", "created", "identifier", "keywords", "language", "last_modified_by", "last_printed", "modified", "revision", "subject", "title", "version"] #run the list in a for loop to print the value of each metadata print ' - Document: ' + str(fileName) for meta in attribute: metadata = getattr(docxInfo,meta) if metadata: #Separate the values unicode and time date if isinstance(metadata, unicode): print " \n\t" + str(meta)+": " + str(metadata) elif isinstance(metadata, datetime.datetime): print " \n\t" + str(meta)+": " + str(metadata)
Example #15
Source File: logic.py From janeway with GNU Affero General Public License v3.0 | 6 votes |
def serve_review_file(assignment): """ Produces a word document representing the review form. :param assignment: ReviewAssignment object :return: HttpStreamingResponse """ elements = assignment.form.elements.all() document = Document() document.add_heading('Review #{pk}'.format(pk=assignment.pk), 0) document.add_heading('Review of `{article_title}` by {reviewer}'.format(article_title=assignment.article.title, reviewer=assignment.reviewer.full_name()), level=1) document.add_paragraph() document.add_paragraph('Complete the form below, then upload it under the "FILE UPLOAD" section on your review page' '. There is no need to complete the form on the web page if you are uploading this ' 'document.') document.add_paragraph() for element in elements: document.add_heading(element.name, level=2) document.add_paragraph(element.help_text) if element.choices: choices = render_choices(element.choices) table = document.add_table(rows=1, cols=2) hdr_cells = table.rows[0].cells hdr_cells[0].text = 'Choice' hdr_cells[1].text = 'Indication' for choice in element.choices.split('|'): row_cells = table.add_row().cells row_cells[0].text = str(choice) document.add_paragraph() filename = '{uuid}.docx'.format(uuid=uuid4()) filepath = os.path.join(settings.BASE_DIR, 'files', 'temp', filename) document.save(filepath) return files.serve_temp_file(filepath, filename)
Example #16
Source File: RastLeak_1_2.py From RastLeak with GNU General Public License v3.0 | 6 votes |
def Analyze_Metadata_pdf(filename): ####### FUNCTION AnalyzeMetadata ###### pdfFile = PdfFileReader(file(filename, 'rb')) metadata = pdfFile.getDocumentInfo() print ' - Document: ' + str(filename) for meta in metadata: value=(metadata[meta]) print ' - ' + meta + ':' + metadata[meta] if meta == "/Author": if value not in meta_author_array: meta_author_array.append(value) elif meta =="/Producer": if value not in meta_producer_array: meta_producer_array.append(value) elif meta == "/Creator": if value not in meta_creator_array: meta_creator_array.append(value) #Group the different arrays in one with all metadata metadata_files.append(meta_author_array) metadata_files.append(meta_producer_array) metadata_files.append(meta_creator_array) #print metadata_files ####### FUNCTION AnalyzeMetadata doc ######
Example #17
Source File: test_properties.py From docxcompose with MIT License | 6 votes |
def test_removes_simple_field_but_keeps_value(self): document = Document(docx_path('outdated_docproperty_with_umlauts.docx')) assert 1 == len(document.paragraphs), 'input file should contain 1 paragraph' fields = xpath( document.element.body, simple_field_expression(u"F\xfc\xfc")) assert 1 == len(fields), 'should contain one simple field docproperty' assert u'Hie chund ds property: ' == document.paragraphs[0].text assert u'xxx' == fields[0].text CustomProperties(document).dissolve_fields(u"F\xfc\xfc") fields = xpath( document.element.body, simple_field_expression(u"F\xfc\xfc")) assert 0 == len(fields), 'should not contain any docproperties anymore' # when simple field is removed, the value is moved one up in the hierarchy assert u'Hie chund ds property: xxx' == document.paragraphs[0].text
Example #18
Source File: rastleak_2_0.py From RastLeak with GNU General Public License v3.0 | 6 votes |
def Analyze_Metadata_pdf(filename): ####### FUNCTION AnalyzeMetadata ###### pdfFile = PdfFileReader(file(filename, 'rb')) metadata = pdfFile.getDocumentInfo() print ' - Document: ' + str(filename) for meta in metadata: value=(metadata[meta]) print ' - ' + meta + ':' + metadata[meta] if meta == "/Author": if value not in meta_author_array: meta_author_array.append(value) elif meta =="/Producer": if value not in meta_producer_array: meta_producer_array.append(value) elif meta == "/Creator": if value not in meta_creator_array: meta_creator_array.append(value) #Group the different arrays in one with all metadata metadata_files.append(meta_author_array) metadata_files.append(meta_producer_array) metadata_files.append(meta_creator_array) ####### FUNCTION AnalyzeMetadata doc ######
Example #19
Source File: wl_file_area.py From Wordless with GNU General Public License v3.0 | 6 votes |
def iter_block_items(self, parent): """ Yield each paragraph and table child within *parent*, in document order. Each returned value is an instance of either Table or Paragraph. *parent* would most commonly be a reference to a main Document object, but also works for a _Cell object, which itself can contain paragraphs and tables. """ if isinstance(parent, Document): parent_elm = parent.element.body elif isinstance(parent, _Cell): parent_elm = parent._tc else: raise ValueError("something's not right") for child in parent_elm.iterchildren(): if isinstance(child, CT_P): yield Paragraph(child, parent) elif isinstance(child, CT_Tbl): yield Table(child, parent)
Example #20
Source File: RastLeak_1_3.py From RastLeak with GNU General Public License v3.0 | 6 votes |
def Analyze_Metadata_pdf(filename): ####### FUNCTION AnalyzeMetadata ###### pdfFile = PdfFileReader(file(filename, 'rb')) metadata = pdfFile.getDocumentInfo() print ' - Document: ' + str(filename) for meta in metadata: value=(metadata[meta]) print ' - ' + meta + ':' + metadata[meta] if meta == "/Author": if value not in meta_author_array: meta_author_array.append(value) elif meta =="/Producer": if value not in meta_producer_array: meta_producer_array.append(value) elif meta == "/Creator": if value not in meta_creator_array: meta_creator_array.append(value) #Group the different arrays in one with all metadata metadata_files.append(meta_author_array) metadata_files.append(meta_producer_array) metadata_files.append(meta_creator_array) #print metadata_files ####### FUNCTION AnalyzeMetadata doc ######
Example #21
Source File: test_properties.py From docxcompose with MIT License | 6 votes |
def test_dissolves_all_instances_of_given_field(self): document = Document(docx_path('multiple_identical_properties.docx')) assert 3 == len(document.paragraphs), 'input file should contain 3 paragraphs' assert 3 == len(xpath(document.element.body, './/w:instrText')), \ 'document should contain three complex field docproperties' for paragraph in document.paragraphs: assert u'Foo' == paragraph.text CustomProperties(document).dissolve_fields("Text Property") assert 3 == len(document.paragraphs) assert 0 == len(xpath(document.element.body, './/w:instrText')), \ 'document should not contain any complex field anymore' for paragraph in document.paragraphs: assert u'Foo' == paragraph.text, "value should have been kept in document"
Example #22
Source File: rastleak_1_4.py From RastLeak with GNU General Public License v3.0 | 6 votes |
def Analyze_Metadata_pdf(filename): ####### FUNCTION AnalyzeMetadata ###### pdfFile = PdfFileReader(file(filename, 'rb')) metadata = pdfFile.getDocumentInfo() print ' - Document: ' + str(filename) for meta in metadata: value=(metadata[meta]) print ' - ' + meta + ':' + metadata[meta] if meta == "/Author": if value not in meta_author_array: meta_author_array.append(value) elif meta =="/Producer": if value not in meta_producer_array: meta_producer_array.append(value) elif meta == "/Creator": if value not in meta_creator_array: meta_creator_array.append(value) #Group the different arrays in one with all metadata metadata_files.append(meta_author_array) metadata_files.append(meta_producer_array) metadata_files.append(meta_creator_array) ####### FUNCTION AnalyzeMetadata doc ######
Example #23
Source File: new_hire_orientation.py From Automate-it with MIT License | 6 votes |
def generate_document(employee_data, agenda): document = Document() for emp in employee_data: if emp['isDue']: name = emp['name'] document.add_heading('Your New Hire Orientation\n', level=1) document.add_paragraph('Dear %s,' % name) document.add_paragraph('Welcome to Google Inc. You have been selected for our new hire orientation.') document.add_paragraph('Based on your department you will go through below sessions:') department = emp['department'] for session in agenda[department]: document.add_paragraph( session , style='ListBullet' ) document.add_paragraph('Thanks,\n HR Manager') document.save('orientation_%s.docx' % emp['id'])
Example #24
Source File: test_properties.py From docxcompose with MIT License | 5 votes |
def test_add_utf8_property(): document = Document(docx_path('docproperties.docx')) props = CustomProperties(document) props.add('My Text Property', u'f\xfc\xfc'.encode('utf-8')) assert props.get('My Text Property') == u'f\xfc\xfc'
Example #25
Source File: test_properties.py From docxcompose with MIT License | 5 votes |
def test_get_doc_properties(): document = Document(docx_path('docproperties.docx')) props = CustomProperties(document) assert props['Text Property'] == 'Foo Bar' assert props['Number Property'] == 123 assert props['Boolean Property'] is True assert props['Date Property'] == datetime(2019, 6, 11, 10, 0) assert props.get('Text Property') == 'Foo Bar' assert props.get('Number Property') == 123 assert props.get('Boolean Property') is True assert props.get('Date Property') == datetime(2019, 6, 11, 10, 0)
Example #26
Source File: test_properties.py From docxcompose with MIT License | 5 votes |
def test_set_doc_properties(): document = Document(docx_path('docproperties.docx')) props = CustomProperties(document) props['Text Property'] = 'baz' assert props['Text Property'] == 'baz' props['Boolean Property'] = False assert props['Boolean Property'] is False props['Number Property'] = 456 assert props['Number Property'] == 456 props['Date Property'] = datetime(2019, 10, 20, 12, 0) assert props['Date Property'] == datetime(2019, 10, 20, 12, 0)
Example #27
Source File: test_styles.py From docxcompose with MIT License | 5 votes |
def merged_styles(): composer = Composer(Document(docx_path("styles_en.docx"))) composer.append(Document(docx_path("styles_de.docx"))) return composer
Example #28
Source File: test_properties.py From docxcompose with MIT License | 5 votes |
def test_doc_properties_keys(): document = Document(docx_path('docproperties.docx')) props = CustomProperties(document) assert props.keys() == [ 'Text Property', 'Number Property', 'Boolean Property', 'Date Property', 'Float Property', ]
Example #29
Source File: test_properties.py From docxcompose with MIT License | 5 votes |
def test_doc_properties_items(): document = Document(docx_path('docproperties.docx')) props = CustomProperties(document) assert props.items() == [ ('Text Property', 'Foo Bar'), ('Number Property', 123), ('Boolean Property', True), ('Date Property', datetime(2019, 6, 11, 10, 0)), ('Float Property', 1.1), ]
Example #30
Source File: RastLeak_1_2.py From RastLeak with GNU General Public License v3.0 | 5 votes |
def Analyze_Metadata_doc(fileName): #Open file docxFile = docx.Document(file(fileName,'rb')) #Get the structure docxInfo= docxFile.core_properties #Print the metadata which it wants to display attribute = ["author", "category", "comments", "content_status", "created", "identifier", "keywords", "language", "last_modified_by", "last_printed", "modified", "revision", "subject", "title", "version"] #run the list in a for loop to print the value of each metadata print ' - Document: ' + str(fileName) for meta in attribute: metadata = getattr(docxInfo,meta) value = metadata([meta]) if metadata: if meta =="/Author": if value not in meta_author_array: meta_author_array.append(value) elif meta == "/Producer": if value not in meta_producer_array: meta_producer_array.append(value) elif meta =="/Creator": if value not in meta_creator_array: meta_creator_array.append(value) #Separate the values unicode and time date if isinstance(metadata, unicode): print " \n\t" + str(meta)+": " + str(metadata) elif isinstance(metadata, datetime.datetime): print " \n\t" + str(meta)+": " + str(metadata)