编程知识 cdmana.com

Core data structure of PE file

PE file (Portable File) yes Windows platform portable File Format( Portable files ) Abbreviation . common PE Document has :exe,sys,dll etc. . understand PE File formats help deepen understanding of the operating system , Master the data structure of executable file, machine operation mechanism , For reverse cracking , It's very important to have students in the field of safety, such as shelling

explain :

  1. because PE There are so many file structures , Add * We need to focus on
  2. understand RVA and FOA How to transform , This for PE File parsing is very important
  3. Be sure to operate it manually , Write code analysis , Otherwise, it's impossible to really grasp PE

PE The composition of the document :DOS head ,NT head (PE identification +PE head + Optional PE head ), Section table , section , Resources and so on .

1.DOS head

struct _IMAGE_DOS_HEADER {
    WORD e_magic;// 5a 4d * #MZ,dos Magic number of head 
    WORD e_cblp;//00 90
    WORD e_cp;//00 03 
    WORD e_crlc;//00 00
    WORD e_cparhdr;//00 04
    WORD e_minalloc;//00 00
    WORD e_maxalloc;//ff ff
    WORD e_ss;//00 00
    WORD e_sp;//00 b8
    WORD e_csum;//00 00
    WORD e_ip;//00 00
    WORD e_cs;//00 00
    WORD e_lfarlc;//00 40
    WORD e_ovno;//00 00
    WORD e_res[4];//00 00 00 00 00 00 00 00
    WORD e_oemid;//00 00
    WORD e_oeminfo;//00 00
    WORD e_res2[10];//00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 
    DWORD e_lfanew;//00 00 00 f8 * #PE The offset of the header from the file , Used to locate PE file ( The specific values will vary depending on the compiler , The specific value is not necessarily )
};

2.NT head

struct _IMAGE_NT_HEADERS {
    DWORD Signature;//00 00 45 50 
    _IMAGE_FILE_HEADER FileHeader;
    _IMAGE_OPTIONAL_HEADER OptionalHeader;
};

2.1 standard PE head

struct _IMAGE_FILE_HEADER {
    WORD Machine;//01 4c * # program-running CPU model :0x0  Any processor /0x14C 386 And subsequent processors 
    WORD NumberOfSections;//00 06 * # section (Section) Count ,PE The file was segmented , namely :PE The total number of sections that exist in the file , If you want to add a new section or merge a section   You have to change this value .
    DWORD TimeDateStamp;//5f d2 c6 a7 * # Time stamp : File creation time ( It has nothing to do with the creation time of the operating system ), The compiler fills in .
    DWORD PointerToSymbolTable;//00 00 00 00 #
    DWORD NumberOfSymbols;//00 00 00 00 #
    WORD SizeOfOptionalHeader;//00 e0 * # Optional PE The size of the head ,32 position PE File default E0h 64 position PE The file defaults to F0h   The size can be customized .
    WORD Characteristics;//01 02 * # Each bit has a different meaning , The executable value is 10F  namely 0 1 2 3 8 Location 1 
};

2.2 Optional PE head ( Uncertain size )

struct _IMAGE_OPTIONAL_HEADER {
    WORD Magic;//01 0b * # Description file type :10B 32 Bitwise PE file      20B 64 Bitwise PE file 
    BYTE MajorLinkerVersion;//0e #
    BYTE MinorLinkerVersion;//00 #
    DWORD SizeOfCode;//00 00 0c 00 * # The sum of all code sections , Must be FileAlignment Integer multiple   The compiler filled in    useless 
    DWORD SizeOfInitializedData;//00 00 16 00* # Initialized sum of data sizes , Must be FileAlignment Integer multiple   The compiler filled in    useless 
    DWORD SizeOfUninitializedData;//00 00 00 00 * # Sum of uninitialized data sizes , Must be FileAlignment Integer multiple   The compiler filled in    useless 
    DWORD AddressOfEntryPoint;//00 00 12 57 * # Program entrance ,ImageBase+AddressOfEntryPoint It's the real program entrance 
    DWORD BaseOfCode;//00 00 10 00 * # Base address at the beginning of the code , The compiler filled in     useless 
    DWORD BaseOfData;//00 00 20 00 * # The base address of the beginning of the data , The compiler filled in     useless 
    DWORD ImageBase;//00 40 00 00 * # Memory mirror base address ,ImageBase+AddressOfEntryPoint It's the real program entrance 
    DWORD SectionAlignment;//00 00 10 00 * # Memory alignment 
    DWORD FileAlignment;//00 00 02 00 * # File Alignment 
    WORD MajorOperatingSystemVersion;//00 06 #
    WORD MinorOperatingSystemVersion;//00 00 #
    WORD MajorImageVersion;//00 00 #
    WORD MinorImageVersion;//00 00 #
    WORD MajorSubsystemVersion;//00 06 #
    WORD MinorSubsystemVersion;//00 00 #
    DWORD Win32VersionValue;//00 00 00 00 #
    DWORD SizeOfImage;//00 00 70 00 * # In memory PE The size of the file's mapping ( Size that has been aligned to memory ), It can be larger than the actual value , But it has to be SectionAlignment Integer multiple 
    DWORD SizeOfHeaders;//00 00 04 00 * # All head + The section table is aligned to the size of the file , Otherwise there will be an error loading 
    DWORD CheckSum;//00 00 00 00 * # The checksum , Some system files require . Used to determine whether a file has been modified .
    WORD Subsystem;//00 03 #
    WORD DllCharacteristics;//81 40 #
    DWORD SizeOfStackReserve;//00 10 00 00 * # The size of the stack retained at initialization  
    DWORD SizeOfStackCommit;//00 00 10 00 * # The actual committed size at initialization  
    DWORD SizeOfHeapReserve;//00 10 00 00 * # The heap size reserved at initialization  
    DWORD SizeOfHeapCommit;//00 10 00 00 * # The size of the actual commit at initialization  
    DWORD LoaderFlags;//00 00 00 00 #
    DWORD NumberOfRvaAndSizes;//00 00 00 10 # Number of directory entries 
    _IMAGE_DATA_DIRECTORY DataDirectory[16];// #
};

3. Section table ( Offset from the beginning of the file :DOS head +PE head + Optional PE head )

The number of section tables : Pass standard PE head (_IMAGE_FILE_HEADER) Of NumberOfSections Fields determine


#define IMAGE_SIZEOF_SHORT_NAME 8
typedef struct _IMAGE_SECTION_HEADER {
    BYTE Name[IMAGE_SIZEOF_SHORT_NAME]; // name , length :8 position (16 byte ) Of ASCII code  .text .data
    union {
        DWORD PhysicalAddress;
        DWORD VirtualSize;
    } Misc;//V(VS), Size in memory ( The length before alignment ), The true length of the section before it is aligned ( The actual data size , I want to explain : Such as 0x200 Size alignment ,0x192 It's going to go through the patch 0 become 0x200), This value may not be accurate ( It could be modified by someone else )
    DWORD VirtualAddress;//V(VO), Offset in memory ( The size of the block RVA),VirtualAddress  Offset in memory   be relative to ImageBase The offset ( Simple understanding : leave ImageBase How far is it? ), It makes sense in memory 
    DWORD SizeOfRawData;//R(RS), Size in file ( The length after alignment ), The size of the aligned section in the file 
    DWORD PointerToRawData;//R(RO), Offset in file . The offset of the section in the file ( After alignment ), In the document 
    DWORD PointerToRelocations;// stay OBJ Use... In the document , Relocation offset . stay OBJ Use... In the document , Relocation offset . stay obj Use... In the document   Yes exe meaningless 
    DWORD PointerToLinenumbers;// Offset of row number table , Provide debugging . The location of the row number table   Use when debugging 
    WORD NumberOfRelocations;// stay obj Use... In the document   Number of relocations   Yes exe meaningless 
    WORD NumberOfLinenumbers;// The number of row Numbers in the row number table   Use when debugging 
    DWORD Characteristics;// Section of the properties 
};

difference :VirtualAddress( In the memory ) and PointerToRawData( In file )

RVA: Relative offset address , Or relative virtual address , It can be understood as a file being loaded into virtual memory ( The tensile ) First, for the offset address of the base address .
FOA: File offset address , It can be understood as the offset address relative to the beginning of the file when the file is stored on the disk .
RVA = VA( Virtual address ) - ImageBase( Base address )

4. Export table

typedef struct _IMAGE_EXPORT_DIRECTORY {                                    
    DWORD   Characteristics;                //  not used                   
    DWORD   TimeDateStamp;                  //  Time stamp                   
    WORD    MajorVersion;                   //  not used                    
    WORD    MinorVersion;                   //  not used                    
    DWORD   Name;                           // * Point to the export table file name string  
    DWORD   Base;                           // * Derived function starting sequence number          
    DWORD   NumberOfFunctions;              // * The number of all derived functions         
    DWORD   NumberOfNames;                  // * The number of functions exported by the name of the function                     
    DWORD   AddressOfFunctions;             // * Export function address table RVA                                  
    DWORD   AddressOfNames;                 // * Export the function name table RVA                                  
    DWORD   AddressOfNameOrdinals;          // * Export function sequence number table RVA                                  
} IMAGE_EXPORT_DIRECTORY, *PIMAGE_EXPORT_DIRECTORY;                                 

5. Relocation table

When the program loads , In especial DLL, Probably ImageBase It's all repetitive , So when loading , It is possible that the load will not succeed , So I need a new address ImageBase load , But this leads to a lot of data need to modify the address , That's what relocation tables mean .

typedef struct _IMAGE_BASE_RELOCATION {
    DWORD VirtualAddress; // The base address of the address to be modified (RVA)
    DWORD SizeOfBlock; // Current entire block size ( contain IMAGE_BASE_RELOCATION The structure and the part behind it , Until the next IMAGE_BASE_RELOCATION Before the structure )
} IMAGE_BASE_RELOCATION;
typedef IMAGE_BASE_RELOCATION UNALIGNED *PIMAGE_BASE_RELOCATION;

Explain the structure , There's this structure 8 byte , stay 8 After the byte is SizeOfBlock-8 byte , For the following content, press 2 Sub section alignment ,VirtualAddress+WORD Low value 12 position ( high 4 Is it 0x3 When , You need to change the address ): Represents the address to be modified

General memory press 0x1000 alignment , That's the memory in a page
// When will these IMAGE_BASE_RELOCATION It's over ? answer : whole 0 When ,VirtualAddress=0,SizeOfBlock=0

How many items need to be judged :(SizeOfBlock-8)/2 individual , And then judge high 4 position , Determine if you need to modify

6. The import table

typedef struct _IMAGE_IMPORT_DESCRIPTOR {                                   
    union {                                 
        DWORD   Characteristics;                                            
        DWORD   OriginalFirstThunk;      //*  Use this ,RVA  Point to IMAGE_THUNK_DATA Structure array          
    };                                  
    DWORD   TimeDateStamp;               //*  Time stamp          The value is 0: Express IAT The table is not bound yet ,  The value is FFFFFFFF: Express IAT The table is bound ( The program starts quickly ) 
    DWORD   ForwarderChain;                                                 
    DWORD   Name;                        //RVA, Point to dll name , The name has been 0 ending           
    DWORD   FirstThunk;                  //* RVA, Point to IMAGE_THUNK_DATA Structure array          
} IMAGE_IMPORT_DESCRIPTOR;                                  
typedef IMAGE_IMPORT_DESCRIPTOR UNALIGNED *PIMAGE_IMPORT_DESCRIPTOR;

typedef struct _IMAGE_THUNK_DATA32 {                                                
    union {                                             
        PBYTE  ForwarderString;                                             
        PDWORD Function;                                                
        DWORD Ordinal;                             // Serial number                      
        PIMAGE_IMPORT_BY_NAME  AddressOfData;      //* Point to IMAGE_IMPORT_BY_NAME                        
    } u1;                                               
} IMAGE_THUNK_DATA32;                                               
typedef IMAGE_THUNK_DATA32 * PIMAGE_THUNK_DATA32;                                               

typedef struct _IMAGE_IMPORT_BY_NAME {                                              
    WORD    Hint;                                  // May is empty , The compiler decided   If it's not empty   Is the index of the function in the exported table                        
    BYTE    Name[1];                               //* The name of the function , With 0 ending                      
} IMAGE_IMPORT_BY_NAME, *PIMAGE_IMPORT_BY_NAME;                 

7. Bind import table

PE load EXE dependent DLL when , First of all, according to IMAGE_IMPORT_DESCRIPTOR The structure of the TimeDateStamp To determine whether to recalculate IAT The address in the table .
TimeDateStamp == 0 Unbound
TimeDateStamp == -1 Bound The real binding time is IMAGE_BOUND_IMPORT_DESCRIPTOR Of TimeDateStamp

Some applications , Such as :windows Of notepad.exe, To start fast , hold DLL The function address in is directly bound to exe In file , This is the binding import table .
benefits : Fast start , But if DLL changes , It still needs to be repositioned

typedef struct _IMAGE_BOUND_IMPORT_DESCRIPTOR {                             
    DWORD   TimeDateStamp;                        //* The real time stamp ,     Used to determine whether to bind to dll It's the same version ; That's to say, look at the time stamp and dll Of pe Whether the time stamp in the header is the same ;
    WORD    OffsetModuleName;                     //*DLL Name . PE The name of the file  
    WORD    NumberOfModuleForwarderRefs;          //* Dependent on another DLL There are several                    
// Array of zero or more IMAGE_BOUND_FORWARDER_REF follows                              
} IMAGE_BOUND_IMPORT_DESCRIPTOR,  *PIMAGE_BOUND_IMPORT_DESCRIPTOR;      

typedef struct _IMAGE_BOUND_FORWARDER_REF {                             
    DWORD   TimeDateStamp;                  //* Time stamp       
    WORD    OffsetModuleName;               //* member . This member is not RVA  Neither FOA  It's the first binding import table address  +  The value of this member , It's a pointer , This pointer is where the real file name is located . No matter what number you print   It's always   The first binding imports the value of the table  + OffsetModuleName Value         
    WORD    Reserved;                       // Retain , It's useless      
} IMAGE_BOUND_FORWARDER_REF, *PIMAGE_BOUND_FORWARDER_REF;                           

When IMAGE_BOUND_IMPORT_DESCRIPTOR The structure of the TimeDateStamp And DLL Document standards PE In the header TimeDateStamp When the values don't match , perhaps DLL When it comes to repositioning , Will recalculate IAT The value in .

8. Resource table

The resource list is PE The most complex table in the file , There are three levels
explain : The data related to the resource table are UNICODE

8.1 Resource directory ( From optional PE head RVA to FOA To come over )

typedef struct _IMAGE_RESOURCE_DIRECTORY {                              
    DWORD   Characteristics;                        // Resource attribute    Retain  0        
    DWORD   TimeDateStamp;                          // When the resource was created        
    WORD    MajorVersion;                           // Resource version number   not used  0       
    WORD    MinorVersion;                           // Resource version number   not used  0       
    WORD    NumberOfNamedEntries;                   //* Number of resources named by name        
    WORD    NumberOfIdEntries;                      //* With ID Number of named resources        
//  IMAGE_RESOURCE_DIRECTORY_ENTRY DirectoryEntries[];                              
} IMAGE_RESOURCE_DIRECTORY, *PIMAGE_RESOURCE_DIRECTORY;                             

8.2 Resource catalog entry ( first stage , Next to the resource catalog , The resource type : cursor (1), Bitmap (2), Icon (3) common 16 Kind of )

If the highest order is 1: low 31 position + Resources to address (IMAGE_RESOURCE_DIRECTORY Address ) == The starting position of the next level directory node , Point to IMAGE_RESOURCE_DIR_STRING_U
If the highest order is 0: Point to IMAGE_RESOURCE_DATA_ENTRY

typedef struct _IMAGE_RESOURCE_DIRECTORY_ENTRY {                                
    union {                     // The name of the directory entry 、 perhaps ID       
        struct {                                
            DWORD NameOffset:31;                // Resource name offset              
            DWORD NameIsString:1;               // highest ,1:NameOffset Work ,NameOffset+ Resources to address (IMAGE_RESOURCE_DIRECTORY Address ) ==  The starting position of the next level directory node , 0: Express ID               
        };                              
        DWORD   Name;               // resources / Language type                
        WORD    Id;                 // Resource numbers ID            
    };                              
    union {                             
        DWORD   OffsetToData;                       // Directory entry pointer      
        struct {                                
            DWORD   OffsetToDirectory:31;                               
            DWORD   DataIsDirectory:1;                              
        };                              
    };                              
} IMAGE_RESOURCE_DIRECTORY_ENTRY, *PIMAGE_RESOURCE_DIRECTORY_ENTRY;                                             

8.3 resources ID

from NameString Start reading Length individual Unicode character

typedef struct _IMAGE_RESOURCE_DIR_STRING_U {                       
    WORD    Length;             // length                 
    WCHAR   NameString[ 1 ];    // First character          
} IMAGE_RESOURCE_DIR_STRING_U, *PIMAGE_RESOURCE_DIR_STRING_U;   

8.4 Resource data information

typedef struct _IMAGE_RESOURCE_DATA_ENTRY {
    DWORD   OffsetToData;// Resource data RVA
    DWORD   Size;// The length of the resource data 
    DWORD   CodePage;// code page 
    DWORD   Reserved;// Keep field 
} IMAGE_RESOURCE_DATA_ENTRY, *PIMAGE_RESOURCE_DATA_ENTRY;                   

版权声明
本文为[osc_ ayj0crwi]所创,转载请带上原文链接,感谢
https://cdmana.com/2020/12/20201225134628296d.html

Scroll to Top