// Copyright 2016 PDFium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com #ifndef CORE_FPDFTEXT_CPDF_TEXTPAGE_H_ #define CORE_FPDFTEXT_CPDF_TEXTPAGE_H_ #include #include #include #include "core/fpdfapi/page/cpdf_pageobjectholder.h" #include "core/fxcrt/cfx_widetextbuf.h" #include "core/fxcrt/fx_coordinates.h" #include "core/fxcrt/fx_string.h" #include "core/fxcrt/unowned_ptr.h" #include "third_party/base/optional.h" class CPDF_Font; class CPDF_FormObject; class CPDF_Page; class CPDF_TextObject; struct PDFTEXT_Obj { PDFTEXT_Obj(); PDFTEXT_Obj(const PDFTEXT_Obj& that); ~PDFTEXT_Obj(); UnownedPtr m_pTextObj; CFX_Matrix m_formMatrix; }; class CPDF_TextPage { public: enum class CharType : uint8_t { kNormal, kGenerated, kNotUnicode, kHyphen, kPiece, }; class CharInfo { public: CharInfo(); CharInfo(const CharInfo&); ~CharInfo(); int m_Index = 0; uint32_t m_CharCode = 0; wchar_t m_Unicode = 0; CharType m_CharType = CharType::kNormal; CFX_PointF m_Origin; CFX_FloatRect m_CharBox; UnownedPtr m_pTextObj; CFX_Matrix m_Matrix; }; CPDF_TextPage(const CPDF_Page* pPage, bool rtl); ~CPDF_TextPage(); int CharIndexFromTextIndex(int text_index) const; int TextIndexFromCharIndex(int char_index) const; size_t size() const { return m_CharList.size(); } int CountChars() const; // These methods CHECK() to make sure |index| is within bounds. const CharInfo& GetCharInfo(size_t index) const; float GetCharFontSize(size_t index) const; std::vector GetRectArray(int start, int nCount) const; int GetIndexAtPos(const CFX_PointF& point, const CFX_SizeF& tolerance) const; WideString GetTextByRect(const CFX_FloatRect& rect) const; WideString GetTextByObject(const CPDF_TextObject* pTextObj) const; // Returns string with the text from |m_TextBuf| that are covered by the input // range. |start| and |count| are in terms of the |m_CharIndices|, so the // range will be converted into appropriate indices. WideString GetPageText(int start, int count) const; WideString GetAllPageText() const { return GetPageText(0, CountChars()); } int CountRects(int start, int nCount); bool GetRect(int rectIndex, CFX_FloatRect* pRect) const; private: enum class TextOrientation { kUnknown, kHorizontal, kVertical, }; enum class GenerateCharacter { kNone, kSpace, kLineBreak, kHyphen, }; enum class MarkedContentState { kPass = 0, kDone, kDelay }; void Init(); bool IsHyphen(wchar_t curChar) const; void ProcessObject(); void ProcessFormObject(CPDF_FormObject* pFormObj, const CFX_Matrix& formMatrix); void ProcessTextObject(PDFTEXT_Obj pObj); void ProcessTextObject(CPDF_TextObject* pTextObj, const CFX_Matrix& formMatrix, const CPDF_PageObjectHolder* pObjList, CPDF_PageObjectHolder::const_iterator ObjPos); GenerateCharacter ProcessInsertObject(const CPDF_TextObject* pObj, const CFX_Matrix& formMatrix); const CharInfo* GetPrevCharInfo() const; Optional GenerateCharInfo(wchar_t unicode); bool IsSameAsPreTextObject(CPDF_TextObject* pTextObj, const CPDF_PageObjectHolder* pObjList, CPDF_PageObjectHolder::const_iterator iter) const; bool IsSameTextObject(CPDF_TextObject* pTextObj1, CPDF_TextObject* pTextObj2) const; void CloseTempLine(); MarkedContentState PreMarkedContent(PDFTEXT_Obj pObj); void ProcessMarkedContent(PDFTEXT_Obj pObj); void FindPreviousTextObject(); void AddCharInfoByLRDirection(wchar_t wChar, const CharInfo& info); void AddCharInfoByRLDirection(wchar_t wChar, const CharInfo& info); TextOrientation GetTextObjectWritingMode( const CPDF_TextObject* pTextObj) const; TextOrientation FindTextlineFlowOrientation() const; void AppendGeneratedCharacter(wchar_t unicode, const CFX_Matrix& formMatrix); void SwapTempTextBuf(int32_t iCharListStartAppend, int32_t iBufStartAppend); WideString GetTextByPredicate( const std::function& predicate) const; UnownedPtr const m_pPage; std::vector m_CharIndices; std::deque m_CharList; std::deque m_TempCharList; CFX_WideTextBuf m_TextBuf; CFX_WideTextBuf m_TempTextBuf; UnownedPtr m_pPrevTextObj; CFX_Matrix m_PrevMatrix; const bool m_rtl; const CFX_Matrix m_DisplayMatrix; std::vector m_SelRects; std::vector m_LineObj; TextOrientation m_TextlineDir = TextOrientation::kUnknown; CFX_FloatRect m_CurlineRect; }; #endif // CORE_FPDFTEXT_CPDF_TEXTPAGE_H_