summaryrefslogtreecommitdiff
path: root/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp')
-rw-r--r--lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp329
1 files changed, 205 insertions, 124 deletions
diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
index 777b20e9bb0f..492b44186720 100644
--- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
+++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
@@ -71,6 +71,47 @@ GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
return std::string(toStringRef(llvm::ArrayRef(str_storage)));
}
+/// An "init expr" refers to a constant expression used to determine the initial
+/// value of certain elements within a module during instantiation. These
+/// expressions are restricted to operations that can be evaluated at module
+/// instantiation time. Currently we only support simple constant opcodes.
+static lldb::offset_t GetWasmOffsetFromInitExpr(DataExtractor &data,
+ lldb::offset_t &offset) {
+ lldb::offset_t init_expr_offset = LLDB_INVALID_OFFSET;
+
+ uint8_t opcode = data.GetU8(&offset);
+ switch (opcode) {
+ case llvm::wasm::WASM_OPCODE_I32_CONST:
+ case llvm::wasm::WASM_OPCODE_I64_CONST:
+ init_expr_offset = data.GetSLEB128(&offset);
+ break;
+ case llvm::wasm::WASM_OPCODE_GLOBAL_GET:
+ init_expr_offset = data.GetULEB128(&offset);
+ break;
+ case llvm::wasm::WASM_OPCODE_F32_CONST:
+ case llvm::wasm::WASM_OPCODE_F64_CONST:
+ // Not a meaningful offset.
+ data.GetFloat(&offset);
+ break;
+ case llvm::wasm::WASM_OPCODE_REF_NULL:
+ // Not a meaningful offset.
+ data.GetULEB128(&offset);
+ break;
+ }
+
+ // Make sure the opcodes we read aren't part of an extended init expr.
+ opcode = data.GetU8(&offset);
+ if (opcode == llvm::wasm::WASM_OPCODE_END)
+ return init_expr_offset;
+
+ // Extended init expressions are not supported, but we still have to parse
+ // them to skip over them and read the next segment.
+ do {
+ opcode = data.GetU8(&offset);
+ } while (opcode != llvm::wasm::WASM_OPCODE_END);
+ return LLDB_INVALID_OFFSET;
+}
+
/// Checks whether the data buffer starts with a valid Wasm module header.
static bool ValidateModuleHeader(const DataBufferSP &data_sp) {
if (!data_sp || data_sp->GetByteSize() < kWasmHeaderSize)
@@ -261,17 +302,20 @@ bool ObjectFileWasm::ParseHeader() {
return true;
}
-static llvm::Expected<std::vector<AddressRange>>
-ParseFunctions(SectionSP code_section_sp) {
- DataExtractor data;
- code_section_sp->GetSectionData(data);
+struct WasmFunction {
+ lldb::offset_t section_offset = LLDB_INVALID_OFFSET;
+ uint32_t size = 0;
+};
+
+static llvm::Expected<std::vector<WasmFunction>>
+ParseFunctions(DataExtractor &data) {
lldb::offset_t offset = 0;
llvm::Expected<uint32_t> function_count = GetULEB32(data, offset);
if (!function_count)
return function_count.takeError();
- std::vector<AddressRange> functions;
+ std::vector<WasmFunction> functions;
functions.reserve(*function_count);
for (uint32_t i = 0; i < *function_count; ++i) {
@@ -281,7 +325,7 @@ ParseFunctions(SectionSP code_section_sp) {
// llvm-objdump considers the ULEB with the function size to be part of the
// function. We can't do that here because that would break symbolic
// breakpoints, as that address is never executed.
- functions.emplace_back(code_section_sp, offset, *function_size);
+ functions.push_back({offset, *function_size});
std::optional<lldb::offset_t> next_offset =
llvm::checkedAddUnsigned<lldb::offset_t>(offset, *function_size);
@@ -294,17 +338,22 @@ ParseFunctions(SectionSP code_section_sp) {
}
struct WasmSegment {
- WasmSegment(SectionSP section_sp, lldb::offset_t offset, uint32_t size)
- : address_range(section_sp, offset, size) {};
+ enum SegmentType {
+ Active,
+ Passive,
+ };
+
std::string name;
- AddressRange address_range;
-};
+ SegmentType type = Passive;
+ lldb::offset_t section_offset = LLDB_INVALID_OFFSET;
+ uint32_t size = 0;
+ uint32_t memory_index = 0;
+ lldb::offset_t init_expr_offset = 0;
-static llvm::Expected<std::vector<WasmSegment>>
-ParseData(SectionSP data_section_sp) {
- DataExtractor data;
- data_section_sp->GetSectionData(data);
+ lldb::offset_t GetFileOffset() const { return section_offset & 0xffffffff; }
+};
+static llvm::Expected<std::vector<WasmSegment>> ParseData(DataExtractor &data) {
lldb::offset_t offset = 0;
llvm::Expected<uint32_t> segment_count = GetULEB32(data, offset);
@@ -319,27 +368,34 @@ ParseData(SectionSP data_section_sp) {
if (!flags)
return flags.takeError();
+ WasmSegment segment;
+
// Data segments have a mode that identifies them as either passive or
// active. An active data segment copies its contents into a memory during
// instantiation, as specified by a memory index and a constant expression
// defining an offset into that memory.
+ segment.type = (*flags & llvm::wasm::WASM_DATA_SEGMENT_IS_PASSIVE)
+ ? WasmSegment::Passive
+ : WasmSegment::Active;
+
if (*flags & llvm::wasm::WASM_DATA_SEGMENT_HAS_MEMINDEX) {
+ assert(segment.type == WasmSegment::Active);
llvm::Expected<uint32_t> memidx = GetULEB32(data, offset);
if (!memidx)
return memidx.takeError();
+ segment.memory_index = *memidx;
}
- if ((*flags & llvm::wasm::WASM_DATA_SEGMENT_IS_PASSIVE) == 0) {
- // Skip over the constant expression.
- for (uint8_t b = 0; b != llvm::wasm::WASM_OPCODE_END;)
- b = data.GetU8(&offset);
- }
+ if (segment.type == WasmSegment::Active)
+ segment.init_expr_offset = GetWasmOffsetFromInitExpr(data, offset);
llvm::Expected<uint32_t> segment_size = GetULEB32(data, offset);
if (!segment_size)
return segment_size.takeError();
- segments.emplace_back(data_section_sp, offset, *segment_size);
+ segment.section_offset = offset;
+ segment.size = *segment_size;
+ segments.push_back(segment);
std::optional<lldb::offset_t> next_offset =
llvm::checkedAddUnsigned<lldb::offset_t>(offset, *segment_size);
@@ -352,13 +408,11 @@ ParseData(SectionSP data_section_sp) {
}
static llvm::Expected<std::vector<Symbol>>
-ParseNames(SectionSP name_section_sp,
- const std::vector<AddressRange> &function_ranges,
+ParseNames(SectionSP code_section_sp, DataExtractor &name_data,
+ const std::vector<WasmFunction> &functions,
std::vector<WasmSegment> &segments) {
- DataExtractor name_section_data;
- name_section_sp->GetSectionData(name_section_data);
- llvm::DataExtractor data = name_section_data.GetAsLLVM();
+ llvm::DataExtractor data = name_data.GetAsLLVM();
llvm::DataExtractor::Cursor c(0);
std::vector<Symbol> symbols;
while (c && c.tell() < data.size()) {
@@ -380,12 +434,13 @@ ParseNames(SectionSP name_section_sp,
llvm::Expected<std::string> name = GetWasmString(data, c);
if (!name)
return name.takeError();
- if (*idx >= function_ranges.size())
+ if (*idx >= functions.size())
continue;
symbols.emplace_back(
- symbols.size(), Mangled(*name), lldb::eSymbolTypeCode,
+ symbols.size(), *name, lldb::eSymbolTypeCode,
/*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false,
- /*is_artificial=*/false, function_ranges[*idx],
+ /*is_artificial=*/false, code_section_sp,
+ functions[i].section_offset, functions[i].size,
/*size_is_valid=*/true, /*contains_linker_annotations=*/false,
/*flags=*/0);
}
@@ -405,12 +460,6 @@ ParseNames(SectionSP name_section_sp,
continue;
// Update the segment name.
segments[i].name = *name;
- symbols.emplace_back(
- symbols.size(), Mangled(*name), lldb::eSymbolTypeData,
- /*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false,
- /*is_artificial=*/false, segments[i].address_range,
- /*size_is_valid=*/true, /*contains_linker_annotations=*/false,
- /*flags=*/0);
}
} break;
@@ -432,80 +481,11 @@ ParseNames(SectionSP name_section_sp,
}
void ObjectFileWasm::ParseSymtab(Symtab &symtab) {
- assert(m_sections_up && "sections must be parsed");
- Log *log = GetLog(LLDBLog::Object);
-
- // The name section contains names and indexes. First parse the data from the
- // relevant sections so we can access it by its index.
- std::vector<AddressRange> functions;
- std::vector<WasmSegment> segments;
-
- // Parse the code section.
- if (SectionSP code_section_sp =
- m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false)) {
- llvm::Expected<std::vector<AddressRange>> maybe_functions =
- ParseFunctions(code_section_sp);
- if (!maybe_functions) {
- LLDB_LOG_ERROR(log, maybe_functions.takeError(),
- "Failed to parse Wasm code section: {0}");
- return;
- }
- functions = *maybe_functions;
- }
-
- // Parse the data section.
- SectionSP data_section_sp =
- m_sections_up->FindSectionByType(lldb::eSectionTypeData, false);
- if (data_section_sp) {
- llvm::Expected<std::vector<WasmSegment>> maybe_segments =
- ParseData(data_section_sp);
- if (!maybe_segments) {
- LLDB_LOG_ERROR(log, maybe_segments.takeError(),
- "Failed to parse Wasm data section: {0}");
- return;
- }
- segments = *maybe_segments;
- }
-
- // Parse the name section.
- SectionSP name_section_sp =
- m_sections_up->FindSectionByType(lldb::eSectionTypeWasmName, false);
- if (!name_section_sp) {
- LLDB_LOG(log, "Failed to parse Wasm symbol table: no names section");
- return;
- }
-
- llvm::Expected<std::vector<Symbol>> symbols =
- ParseNames(name_section_sp, functions, segments);
- if (!symbols) {
- LLDB_LOG_ERROR(log, symbols.takeError(), "Failed to parse Wasm names: {0}");
- return;
- }
-
- for (const Symbol &symbol : *symbols)
+ for (const Symbol &symbol : m_symbols)
symtab.AddSymbol(symbol);
- lldb::user_id_t segment_id = 0;
- for (const WasmSegment &segment : segments) {
- const lldb::addr_t segment_addr =
- segment.address_range.GetBaseAddress().GetFileAddress();
- const size_t segment_size = segment.address_range.GetByteSize();
- SectionSP segment_sp = std::make_shared<Section>(
- /*parent_section_sp=*/data_section_sp, GetModule(),
- /*obj_file=*/this,
- ++segment_id << 8, // 1-based segment index, shifted by 8 bits to avoid
- // collision with section IDs.
- ConstString(segment.name), eSectionTypeData,
- /*file_vm_addr=*/segment_addr,
- /*vm_size=*/segment_size,
- /*file_offset=*/segment_addr,
- /*file_size=*/segment_size,
- /*log2align=*/0, /*flags=*/0);
- m_sections_up->AddSection(segment_sp);
- GetModule()->GetSectionList()->AddSection(segment_sp);
- }
-
symtab.Finalize();
+ m_symbols.clear();
}
static SectionType GetSectionTypeFromName(llvm::StringRef Name) {
@@ -516,7 +496,27 @@ static SectionType GetSectionTypeFromName(llvm::StringRef Name) {
return eSectionTypeOther;
}
+std::optional<ObjectFileWasm::section_info>
+ObjectFileWasm::GetSectionInfo(uint32_t section_id) {
+ for (const section_info &sect_info : m_sect_infos) {
+ if (sect_info.id == section_id)
+ return sect_info;
+ }
+ return std::nullopt;
+}
+
+std::optional<ObjectFileWasm::section_info>
+ObjectFileWasm::GetSectionInfo(llvm::StringRef section_name) {
+ for (const section_info &sect_info : m_sect_infos) {
+ if (sect_info.name == section_name)
+ return sect_info;
+ }
+ return std::nullopt;
+}
+
void ObjectFileWasm::CreateSections(SectionList &unified_section_list) {
+ Log *log = GetLog(LLDBLog::Object);
+
if (m_sections_up)
return;
@@ -530,7 +530,7 @@ void ObjectFileWasm::CreateSections(SectionList &unified_section_list) {
SectionType section_type = eSectionTypeOther;
ConstString section_name;
offset_t file_offset = sect_info.offset & 0xffffffff;
- addr_t vm_addr = file_offset;
+ addr_t vm_addr = sect_info.offset;
size_t vm_size = sect_info.size;
if (llvm::wasm::WASM_SEC_CODE == sect_info.id) {
@@ -542,9 +542,6 @@ void ObjectFileWasm::CreateSections(SectionList &unified_section_list) {
// For this reason Section::GetFileAddress() must return zero for the
// Code section.
vm_addr = 0;
- } else if (llvm::wasm::WASM_SEC_DATA == sect_info.id) {
- section_type = eSectionTypeData;
- section_name = ConstString("data");
} else {
section_type = GetSectionTypeFromName(sect_info.name.GetStringRef());
if (section_type == eSectionTypeOther)
@@ -556,23 +553,107 @@ void ObjectFileWasm::CreateSections(SectionList &unified_section_list) {
}
}
- SectionSP section_sp(
- new Section(GetModule(), // Module to which this section belongs.
- this, // ObjectFile to which this section belongs and
- // should read section data from.
- section_type, // Section ID.
- section_name, // Section name.
- section_type, // Section type.
- vm_addr, // VM address.
- vm_size, // VM size in bytes of this section.
- file_offset, // Offset of this section in the file.
- sect_info.size, // Size of the section as found in the file.
- 0, // Alignment of the section
- 0, // Flags for this section.
- 1)); // Number of host bytes per target byte
+ SectionSP section_sp = std::make_shared<Section>(
+ GetModule(), // Module to which this section belongs.
+ this, // ObjectFile to which this section belongs and
+ // should read section data from.
+ section_type, // Section ID.
+ section_name, // Section name.
+ section_type, // Section type.
+ vm_addr, // VM address.
+ vm_size, // VM size in bytes of this section.
+ file_offset, // Offset of this section in the file.
+ sect_info.size, // Size of the section as found in the file.
+ 0, // Alignment of the section
+ 0, // Flags for this section.
+ 1); // Number of host bytes per target byte
m_sections_up->AddSection(section_sp);
unified_section_list.AddSection(section_sp);
}
+
+ // The name section contains names and indexes. First parse the data from the
+ // relevant sections so we can access it by its index.
+ std::vector<WasmFunction> functions;
+ std::vector<WasmSegment> segments;
+
+ // Parse the code section.
+ if (std::optional<section_info> info =
+ GetSectionInfo(llvm::wasm::WASM_SEC_CODE)) {
+ DataExtractor code_data = ReadImageData(info->offset, info->size);
+ llvm::Expected<std::vector<WasmFunction>> maybe_functions =
+ ParseFunctions(code_data);
+ if (!maybe_functions) {
+ LLDB_LOG_ERROR(log, maybe_functions.takeError(),
+ "Failed to parse Wasm code section: {0}");
+ } else {
+ functions = *maybe_functions;
+ }
+ }
+
+ // Parse the data section.
+ std::optional<section_info> data_info =
+ GetSectionInfo(llvm::wasm::WASM_SEC_DATA);
+ if (data_info) {
+ DataExtractor data_data = ReadImageData(data_info->offset, data_info->size);
+ llvm::Expected<std::vector<WasmSegment>> maybe_segments =
+ ParseData(data_data);
+ if (!maybe_segments) {
+ LLDB_LOG_ERROR(log, maybe_segments.takeError(),
+ "Failed to parse Wasm data section: {0}");
+ } else {
+ segments = *maybe_segments;
+ }
+ }
+
+ if (std::optional<section_info> info = GetSectionInfo("name")) {
+ DataExtractor names_data = ReadImageData(info->offset, info->size);
+ llvm::Expected<std::vector<Symbol>> symbols = ParseNames(
+ m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false),
+ names_data, functions, segments);
+ if (!symbols) {
+ LLDB_LOG_ERROR(log, symbols.takeError(),
+ "Failed to parse Wasm names: {0}");
+ } else {
+ m_symbols = *symbols;
+ }
+ }
+
+ lldb::user_id_t segment_id = 0;
+ for (const WasmSegment &segment : segments) {
+ if (segment.type == WasmSegment::Active) {
+ // FIXME: Support segments with a memory index.
+ if (segment.memory_index != 0) {
+ LLDB_LOG(log, "Skipping segment {0}: non-zero memory index is "
+ "currently unsupported");
+ continue;
+ }
+
+ if (segment.init_expr_offset == LLDB_INVALID_OFFSET) {
+ LLDB_LOG(log, "Skipping segment {0}: unsupported init expression");
+ continue;
+ }
+ }
+
+ const lldb::addr_t file_vm_addr =
+ segment.type == WasmSegment::Active
+ ? segment.init_expr_offset
+ : data_info->offset + segment.section_offset;
+ const lldb::offset_t file_offset =
+ data_info->GetFileOffset() + segment.GetFileOffset();
+ SectionSP segment_sp = std::make_shared<Section>(
+ GetModule(),
+ /*obj_file=*/this,
+ ++segment_id << 8, // 1-based segment index, shifted by 8 bits to avoid
+ // collision with section IDs.
+ ConstString(segment.name), eSectionTypeData,
+ /*file_vm_addr=*/file_vm_addr,
+ /*vm_size=*/segment.size,
+ /*file_offset=*/file_offset,
+ /*file_size=*/segment.size,
+ /*log2align=*/0, /*flags=*/0);
+ m_sections_up->AddSection(segment_sp);
+ GetModule()->GetSectionList()->AddSection(segment_sp);
+ }
}
bool ObjectFileWasm::SetLoadAddress(Target &target, lldb::addr_t load_address,
@@ -697,7 +778,7 @@ void ObjectFileWasm::Dump(Stream *s) {
}
void ObjectFileWasm::DumpSectionHeader(llvm::raw_ostream &ostream,
- const section_info_t &sh) {
+ const section_info &sh) {
ostream << llvm::left_justify(sh.name.GetStringRef(), 16) << " "
<< llvm::format_hex(sh.offset, 10) << " "
<< llvm::format_hex(sh.size, 10) << " " << llvm::format_hex(sh.id, 6)