libreoffice python 操作word及excel文檔的方法
1、開始、關(guān)閉libreoffice服務(wù);
開始之前同步字體文件時(shí)間,是因?yàn)閯?chuàng)建soffice服務(wù)時(shí),服務(wù)會(huì)檢查所需加載的文件的時(shí)間,如果其認(rèn)為時(shí)間不符,則其可能會(huì)重新加載,耗時(shí)較長,因此需事先統(tǒng)一時(shí)間。
使用時(shí)如果需要多次調(diào)用,最后每次調(diào)用均開啟后關(guān)閉,否則libreoffice會(huì)創(chuàng)建一個(gè)緩存文檔并越用越大,處理時(shí)間會(huì)增加。
class OfficeProcess(object): def __init__(self): self.p = 0 subprocess.Popen('find /usr/share/fonts | xargs touch -m -t 201801010000.00', shell=True) def start_office(self): self.p = subprocess.Popen('soffice --pidfile=sof.pid --invisible --accept="socket,host=localhost,port=2002;urp;"', shell=True) while True: try: local_context = uno.getComponentContext() resolver = local_context.getServiceManager().createInstanceWithContext('com.sun.star.bridge.UnoUrlResolver', local_context) resolver.resolve('uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext') return except: print(ts(), "wait for connecting soffice...") time.sleep(1) continue def stop_office(self): with open("sof.pid", "rb") as f: try: os.kill(int(f.read()), signal.SIGTERM) self.p.wait() except: pass
2、init service manager
local_context = uno.getComponentContext() service_manager = local_context.getServiceManager() resolver = service_manager.createInstanceWithContext('com.sun.star.bridge.UnoUrlResolver', local_context) self.ctx = resolver.resolve('uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext') self.smgr = self.ctx.ServiceManager self.desktop = self.smgr.createInstanceWithContext('com.sun.star.frame.Desktop', self.ctx)
3、從二進(jìn)制數(shù)據(jù)中讀取doc文檔
def ImportFromMemory(self, data): istream = self.smgr.createInstanceWithContext('com.sun.star.io.SequenceInputStream', self.ctx) istream.initialize((uno.ByteSequence(data), )) pv = PropertyValue() pv.Name = 'InputStream' pv.Value = istream self.doc = {'doc': []} try: self.document = self.desktop.loadComponentFromURL('private:stream/swriter', '_blank', 0, (pv, )) self.text = self.document.getText() except: self.text = None
4、讀取doc文檔中的數(shù)據(jù)
def ExportToJson(self): try: l = self.__ParseText(self.text, self.__Callback(self.doc['doc'])) self.doc['length'] = l except: self.doc = {'doc': [], 'length': 0} return json.dumps(self.doc) @staticmethod def __Callback(alist): def Append(sth): alist.append(sth) return Append
def __ParseText(self, text, func): l = 0 text_it = text.createEnumeration() while text_it.hasMoreElements(): element = text_it.nextElement() if element.supportsService('com.sun.star.text.Paragraph'): l += self.__ParseParagraph(element, func) elif element.supportsService('com.sun.star.text.TextTable'): l += self.__ParseTable(element, func) else: pass return l
def __ParseParagraph(self, paragraph, func): p = {'paragraph': []} l = 0 paragraph_it = paragraph.createEnumeration() while paragraph_it.hasMoreElements(): portion = paragraph_it.nextElement() if portion.TextPortionType == 'Text': l += self.__ParsePortionText(portion, self.__Callback(p['paragraph'])) elif portion.TextPortionType == 'SoftPageBreak': pass elif portion.TextPortionType == 'TextField': l += self.__ParsePortionText(portion, self.__Callback(p['paragraph'])) else: l += self.__ParseTextContent(portion, self.__Callback(p['paragraph'])) if hasattr(paragraph, 'createContentEnumeration'): l += self.__ParseTextContent(paragraph, self.__Callback(p['paragraph'])) p['length'] = l func(p) return l def __ParseTextContent(self, textcontent, func): l = 0 content_it = textcontent.createContentEnumeration('com.sun.star.text.TextContent') while content_it.hasMoreElements(): element = content_it.nextElement() if element.supportsService('com.sun.star.text.TextGraphicObject'): l += self.__ParsePortionGraphic(element, func) elif element.supportsService('com.sun.star.text.TextEmbeddedObject'): pass elif element.supportsService('com.sun.star.text.TextFrame'): l += self.__ParseFrame(element, func) elif element.supportsService('com.sun.star.drawing.GroupShape'): l += self.__ParseGroup(element, func) else: pass return l def __ParseFrame(self, frame, func): f = {'frame': []} l = self.__ParseText(frame.getText(), self.__Callback(f['frame'])) f['length'] = l func(f) return l def __ParseGroup(self, group, func): l = 0 for i in range(group.getCount()): it = group.getByIndex(i) if it.supportsService('com.sun.star.drawing.Text'): l += self.__ParseFrame(it, func) else: pass return l def __ParsePortionText(self, portion_text, func): func({'portion': portion_text.String, 'length': len(portion_text.String)}) return len(portion_text.String) def __ParsePortionGraphic(self, portion_graphic, func): gp = self.smgr.createInstanceWithContext('com.sun.star.graphic.GraphicProvider', self.ctx) stream = self.smgr.createInstanceWithContext('com.sun.star.io.TempFile', self.ctx) pv1 = PropertyValue() pv1.Name = 'OutputStream' pv1.Value = stream pv2 = PropertyValue() pv2.Name = 'MimeType' pv2.Value = 'image/png' gp.storeGraphic(portion_graphic.Graphic, (pv1, pv2)) stream.getOutputStream().flush() stream.seek(0) l = stream.getInputStream().available() b = uno.ByteSequence(b'') stream.seek(0) l, b = stream.getInputStream().readBytes(b, l) img = {'image': base64.b64encode(b.value).decode('ascii')} img['height'] = portion_graphic.Height img['width'] = portion_graphic.Width img['actualheight'] = portion_graphic.ActualSize.Height img['actualwidth'] = portion_graphic.ActualSize.Width img['croptop'] = portion_graphic.GraphicCrop.Top img['cropbottom'] = portion_graphic.GraphicCrop.Bottom img['cropleft'] = portion_graphic.GraphicCrop.Left img['cropright'] = portion_graphic.GraphicCrop.Right img['length'] = 0 func(img) return 0 def __ParseTable(self, table, func): l = 0 try: matrix = self.__GetTableMatrix(table) seps = self.__GetTableSeparators(table) t = {} count = 0 for ri in matrix.keys(): t[ri] = {} for ci in matrix[ri].keys(): t[ri][ci] = dict(matrix[ri][ci]) del t[ri][ci]['cell'] t[ri][ci]['content'] = [] l += self.__ParseText(matrix[ri][ci]['cell'], self.__Callback(t[ri][ci]['content'])) count += t[ri][ci]['rowspan'] * t[ri][ci]['colspan'] if count != len(t) * len(seps): raise ValueError('count of cells error') func({'table': t, 'row': len(t), 'column': len(seps), 'length': l, 'tableid': self.table_id}) self.table_id += 1 except: l = 0 print('discard wrong table') return l @staticmethod def __GetTableSeparators(table): result = [table.TableColumnRelativeSum] for ri in range(table.getRows().getCount()): result += [s.Position for s in table.getRows().getByIndex(ri).TableColumnSeparators] result = sorted(set(result)) for i in range(len(result) - 1): result[i] += 1 if result[i] + 1 == result[i + 1] else 0 return sorted(set(result)) @staticmethod def __NameToRC(name): r = int(re.sub('[A-Za-z]', '', name)) - 1 cstr = re.sub('[0-9]', '', name) c = 0 for i in range(len(cstr)): if cstr[i] >= 'A' and cstr[i] <= 'Z': c = c * 52 + ord(cstr[i]) - ord('A') else: c = c * 52 + 26 + ord(cstr[i]) - ord('a') return r, c @staticmethod def __GetTableMatrix(table): result = {} for name in table.getCellNames(): ri, ci = WordToJson.__NameToRC(name) cell = table.getCellByName(name) if ri not in result: result[ri] = {} result[ri][ci] = {'cell': cell, 'rowspan': cell.RowSpan, 'name': name} seps = WordToJson.__GetTableSeparators(table) for ri in result.keys(): sep = [s.Position for s in table.getRows().getByIndex(ri).TableColumnSeparators] + [table.TableColumnRelativeSum] sep = sorted(set(sep)) for ci in result[ri].keys(): right = seps.index(sep[ci]) if sep[ci] in seps else seps.index(sep[ci] + 1) left = -1 if ci == 0 else seps.index(sep[ci - 1]) if sep[ci - 1] in seps else seps.index(sep[ci - 1] + 1) result[ri][ci]['colspan'] = right - left return result
5、寫doc文檔
self.doco = self.desktop.loadComponentFromURL('private:factory/swriter', '_blank', 0, ()) self.texto = self.doco.getText() self.cursoro = self.texto.createTextCursor() self.cursoro.ParaBottomMargin = 500
def __WriteText(self, text, texto, cursoro): for it in text: if 'paragraph' in it: self.__WriteParagraph(it, texto, cursoro) elif 'image' in it: self.__WritePortionGraphic(it, texto, cursoro) elif 'table' in it: self.__WriteTable(it, texto, cursoro) def __WriteParagraph(self, paragraph, texto, cursoro): if paragraph['length'] > 0: if 'result' in paragraph: for it in paragraph['result']: texto.insertString(cursoro, it['trans_sen'], False) else: texto.insertString(cursoro, paragraph['paragraph'], False) texto.insertControlCharacter(cursoro, ControlCharacter.PARAGRAPH_BREAK, False) def __WritePortionGraphic(self, portion_graphic, texto, cursoro): png_base64 = portion_graphic['image'] png = base64.b64decode(png_base64) gp = self.smgr.createInstanceWithContext('com.sun.star.graphic.GraphicProvider', self.ctx) istream = self.smgr.createInstanceWithContext('com.sun.star.io.SequenceInputStream', self.ctx) istream.initialize((uno.ByteSequence(png), )) pv = PropertyValue() pv.Name = 'InputStream' pv.Value = istream actualsize = uno.createUnoStruct('com.sun.star.awt.Size') actualsize.Height = portion_graphic['actualheight'] if 'actualheight' in portion_graphic else portion_graphic['height'] actualsize.Width = portion_graphic['actualwidth'] if 'actualwidth' in portion_graphic else portion_graphic['width'] graphiccrop = uno.createUnoStruct('com.sun.star.text.GraphicCrop') graphiccrop.Top = portion_graphic['croptop'] if 'croptop' in portion_graphic else 0 graphiccrop.Bottom = portion_graphic['cropbottom'] if 'cropbottom' in portion_graphic else 0 graphiccrop.Left = portion_graphic['cropleft'] if 'cropleft' in portion_graphic else 0 graphiccrop.Right = portion_graphic['cropright'] if 'cropright' in portion_graphic else 0 image = self.doco.createInstance('com.sun.star.text.TextGraphicObject') image.Surround = NONE image.Graphic = gp.queryGraphic((pv, )) image.Height = portion_graphic['height'] image.Width = portion_graphic['width'] image.setPropertyValue('ActualSize', actualsize) image.setPropertyValue('GraphicCrop', graphiccrop) texto.insertTextContent(cursoro, image, False) texto.insertControlCharacter(cursoro, ControlCharacter.PARAGRAPH_BREAK, False) def __WriteTable(self, table, texto, cursoro): tableo = self.doco.createInstance('com.sun.star.text.TextTable') tableo.initialize(table['row'], table['column']) texto.insertTextContent(cursoro, tableo, False) # texto.insertControlCharacter(cursoro, ControlCharacter.PARAGRAPH_BREAK, False) tcursoro = tableo.createCursorByCellName("A1") hitbug = False if table['row'] > 1: tcursoro.goDown(1, True) hitbug = tcursoro.getRangeName() == 'A1' for ri in sorted([int(r) for r in table['table'].keys()]): rs = table['table'][str(ri)] for ci in sorted([int(c) for c in rs.keys()]): cell = rs[str(ci)] if hitbug == False and (cell['rowspan'] > 1 or cell['colspan'] > 1): tcursoro.gotoCellByName(cell['name'], False) if cell['rowspan'] > 1: tcursoro.goDown(cell['rowspan'] - 1, True) if cell['colspan'] > 1: tcursoro.goRight(cell['colspan'] - 1, True) tcursoro.mergeRange() ctexto = tableo.getCellByName(cell['name']) if ctexto == None: continue ccursoro = ctexto.createTextCursor() ccursoro.CharWeight = FontWeight.NORMAL ccursoro.CharWeightAsian = FontWeight.NORMAL ccursoro.ParaAdjust = LEFT self.__WriteText(cell['content'], ctexto, ccursoro)
6、生成二進(jìn)制的doc文檔數(shù)據(jù)
streamo = self.smgr.createInstanceWithContext('com.sun.star.io.Pipe', self.ctx) self.doco.storeToURL('private:stream', (PropertyValue('FilterName', 0, 'MS Word 2007 XML', 0), PropertyValue('OutputStream', 0, streamo, 0))) streamo.flush() _, datao = streamo.readBytes(None, streamo.available())
7、從doc文檔數(shù)據(jù)生成pdf的二進(jìn)制數(shù)據(jù)
streamo = self.smgr.createInstanceWithContext('com.sun.star.io.Pipe', self.ctx) self.doco.storeToURL('private:stream', (PropertyValue('FilterName', 0, 'writer_pdf_Export', 0), PropertyValue('OutputStream', 0, streamo, 0))) streamo.flush() _, datap = streamo.readBytes(None, streamo.available())
8、讀取excel二進(jìn)制數(shù)據(jù)
def ImportFromMemory(self, data): istream = self.smgr.createInstanceWithContext('com.sun.star.io.SequenceInputStream', self.ctx) istream.initialize((uno.ByteSequence(data), )) pv = PropertyValue() pv.Name = 'InputStream' pv.Value = istream self.doc = {'doc': []} try: print("before loadComponentFromURL") self.document = self.desktop.loadComponentFromURL('private:stream/scalc', '_blank', 0, (pv, )) self.sheets = self.document.getSheets() print("ImportFromMemory done") except: print("ImportFromMemory failed") self.sheets = None
9、讀取excel的文本數(shù)據(jù)
def ExportToJson(self): try: l = self.__ParseText(self.sheets, self.__Callback(self.doc['doc'])) self.doc['length'] = l except: self.doc = {'doc': [], 'length': 0} return json.dumps(self.doc)
def __ParseText(self, sheets, func): l = 0 sheets_it = sheets.createEnumeration() while sheets_it.hasMoreElements(): element = sheets_it.nextElement() if element.supportsService('com.sun.star.sheet.Spreadsheet'): l += self.__ParseSpreadsheet(element, func) return l def __ParseSpreadsheet(self, spreadsheet, func): l = 0 p = {'spreadsheet': []} visible_cells_it = spreadsheet.queryVisibleCells().getCells().createEnumeration() while visible_cells_it.hasMoreElements(): cell = visible_cells_it.nextElement() type = cell.getType() if type == self.EMPTY: print("cell.type==empty") elif type == self.VALUE: print("cell.type==VALUE", "value=", cell.getValue(), cell.getCellAddress ()) elif type == self.TEXT: print("cell.type==TEXT","content=", cell.getString().encode("UTF-8"), cell.getCellAddress ()) l += self.__ParseCellText(spreadsheet, cell, self.__Callback(p['spreadsheet'])) print("__ParseCellText=", p) elif type == self.FORMULA: print("cell.type==FORMULA", "formula=", cell.getValue()) p['length'] = l func(p) return l def __ParseCellText(self, sheet, cell, func): try: x = cell.getCellAddress().Column y = cell.getCellAddress().Row sheetname = sheet.getName() except: x = -1 y = -1 sheetname = None func({'celltext': cell.getString(), 'x': x, 'y': y, 'sheetname': sheetname, 'length': len(cell.getString())}) return len(cell.getString())
self.EMPTY = uno.Enum("com.sun.star.table.CellContentType", "EMPTY") self.TEXT = uno.Enum("com.sun.star.table.CellContentType", "TEXT") self.FORMULA = uno.Enum("com.sun.star.table.CellContentType", "FORMULA") self.VALUE = uno.Enum("com.sun.star.table.CellContentType", "VALUE")
10、替換excel的文本信息
def ImportFromJson(self, data): doc = json.loads(data) try: self.__WriteText(doc['doc']) except: pass
def __WriteText(self, text): print("__WriteText begin:", text) sheet = None for it in text: if 'paragraph' in it and 'sheetname' in it: if sheet == None or sheet.getName() != it['sheetname']: try: sheet = self.sheets.getByName(it['sheetname']) print("getsheet:", it['sheetname'], "=", sheet.getName()) except: sheet = None continue self.__WriteParagraph(it, sheet) def __WriteParagraph(self, paragraph, sheet): print("__WriteParagraph") if paragraph['length'] > 0: try: x = paragraph['x'] y = paragraph['y'] print("getcell:", x, y) cell = sheet.getCellByPosition(x, y) print("getcell done") except: return if 'result' in paragraph: for it in paragraph['result']: print("cell=", cell.getString()) cell.setString(it['trans_sen']) print("cell,", cell.getString(), ",done")
11、生成excel文檔二進(jìn)制數(shù)據(jù)
streamo = self.smgr.createInstanceWithContext('com.sun.star.io.Pipe', self.ctx) self.document.storeToURL('private:stream', (PropertyValue('FilterName', 0, 'Calc MS Excel 2007 XML', 0), PropertyValue('OutputStream', 0, streamo, 0))) streamo.flush() _, datao = streamo.readBytes(None, streamo.available())
12、生成excel的pdf文檔
streamo = self.smgr.createInstanceWithContext('com.sun.star.io.Pipe', self.ctx) self.document.storeToURL('private:stream', (PropertyValue('FilterName', 0, 'calc_pdf_Export', 0), PropertyValue('OutputStream', 0, streamo, 0))) streamo.flush() _, datap = streamo.readBytes(None, streamo.available())
以上就是本文的全部內(nèi)容,希望對(duì)大家的學(xué)習(xí)有所幫助,也希望大家多多支持腳本之家。
相關(guān)文章
Python實(shí)現(xiàn)定時(shí)監(jiān)測(cè)網(wǎng)站運(yùn)行狀態(tài)的示例代碼
這篇文章主要介紹了Python實(shí)現(xiàn)定時(shí)監(jiān)測(cè)網(wǎng)站狀態(tài)的示例代碼,幫助大家更好的管理自己的網(wǎng)站,感興趣的朋友可以了解下2020-09-09Python+OpenCV之形態(tài)學(xué)操作詳解
這篇文章主要為大家詳細(xì)介紹了Python?OpenCV中的形態(tài)學(xué)操作(開運(yùn)算、閉運(yùn)算)的實(shí)現(xiàn),文中的示例代碼講解詳細(xì),感興趣的小伙伴可以了解一下2022-09-09用 Python 爬了爬自己的微信朋友(實(shí)例講解)
下面小編就為大家?guī)硪黄?Python 爬了爬自己的微信朋友(實(shí)例講解)。小編覺得挺不錯(cuò)的,現(xiàn)在就分享給大家,也給大家做個(gè)參考。一起跟隨小編過來看看吧2017-08-08python使用PIL實(shí)現(xiàn)多張圖片垂直合并
這篇文章主要為大家詳細(xì)介紹了python使用PIL實(shí)現(xiàn)多張圖片垂直合并,具有一定的參考價(jià)值,感興趣的小伙伴們可以參考一下2019-01-01解決python中顯示圖片的plt.imshow plt.show()內(nèi)存泄漏問題
這篇文章主要介紹了解決python中顯示圖片的plt.imshow plt.show()內(nèi)存泄漏問題,具有很好的參考價(jià)值,希望對(duì)大家有所幫助。一起跟隨小編過來看看吧2020-04-04Pandas時(shí)間序列:時(shí)期(period)及其算術(shù)運(yùn)算詳解
今天小編就為大家分享一篇Pandas時(shí)間序列:時(shí)期(period)及其算術(shù)運(yùn)算詳解,具有很好的參考價(jià)值,希望對(duì)大家有所幫助。一起跟隨小編過來看看吧2020-02-02更改Ubuntu默認(rèn)python版本的兩種方法python-> Anaconda
當(dāng)你安裝 Debian Linux 時(shí),安裝過程有可能同時(shí)為你提供多個(gè)可用的 Python 版本,因此系統(tǒng)中會(huì)存在多個(gè) Python 的可執(zhí)行二進(jìn)制文件。一般Ubuntu默認(rèn)的Python版本都為2.x, 如何改變Python的默認(rèn)版本呢?下面來一起看看吧。2016-12-12