Parse Quoted Strings and Detect Character Types with Utility Functions 7/7

Go’s strconv package provides a comprehensive set of utilities for parsing quoted strings and analyzing character properties. These functions handle the intricate details of escape sequences, quote characters, and Unicode classification that would otherwise require complex custom implementations.
Understanding String Unquoting with Unquote()
The Unquote() function interprets a quoted string literal and returns its actual string value. It handles Go’s standard quoting conventions including escape sequences, Unicode literals, and different quote types.
package main
import (
"fmt"
"strconv"
)
func main() {
// Basic string unquoting
quoted := `"Hello, World!"`
unquoted, err := strconv.Unquote(quoted)
if err != nil {
fmt.Printf("Error: %v\n", err)
return
}
fmt.Printf("Unquoted: %s\n", unquoted) // Output: Hello, World!
// Handling escape sequences
escaped := `"Line 1\nLine 2\tTabbed"`
result, err := strconv.Unquote(escaped)
if err != nil {
fmt.Printf("Error: %v\n", err)
return
}
fmt.Printf("Result:\n%s\n", result)
// Output:
// Line 1
// Line 2 Tabbed
}The function recognizes various quote styles and processes escape sequences accordingly:
func demonstrateQuoteTypes() {
examples := []string{
`"double quoted"`,
`'A'`,
"`backtick quoted`",
`"Unicode: \u0048\u0065\u006C\u006C\u006F"`,
`"Hex escapes: \x48\x65\x6C\x6C\x6F"`,
}
for _, example := range examples {
if result, err := strconv.Unquote(example); err == nil {
fmt.Printf("Input: %s → Output: %s\n", example, result)
} else {
fmt.Printf("Failed to unquote: %s (Error: %v)\n", example, err)
}
}
}Error handling becomes crucial when dealing with malformed quoted strings:
func handleUnquoteErrors() {
invalidInputs := []string{
`"unterminated string`,
`"invalid escape \z"`,
`"invalid unicode \u123"`,
`single word without quotes`,
}
for _, input := range invalidInputs {
_, err := strconv.Unquote(input)
if err != nil {
fmt.Printf("Input: %s → Error: %v\n", input, err)
}
}
}Character-Level Parsing with UnquoteChar()
The UnquoteChar() decodes exactly one character from the start of a string and returns the decoded rune plus the remaining tail. The quote argument only controls which quote escape is permitted (\" if quote is ‘“’, \' if quote is ‘\“), and the other quote must not appear unescaped. multibyte reports whether the decoded rune requires multi-byte UTF-8.
func demonstrateUnquoteChar() {
input := `"Hello\nWorld\x21"`
// Skip the opening quote
s := input[1:]
for len(s) > 0 && s[0] != '"' {
value, multibyte, tail, err := strconv.UnquoteChar(s, '"')
if err != nil {
fmt.Printf("Error parsing character: %v\n", err)
break
}
fmt.Printf("Character: %c (U+%04X), multibyte UTF-8: %t\n",
value, value, multibyte)
s = tail
}
}The function returns four values:
- value: The actual rune that was parsed
- multibyte: Whether the original representation used multiple bytes
- tail: The remaining string after this character
- err: Any parsing error encountered
Here’s a practical example parsing different character types:
func parseCharacterSequence() {
sequences := []string{
`H`, // Simple ASCII
`\n`, // Escape sequence
`\u0041`, // Unicode escape
`\x42`, // Hex escape
`\101`, // Octal escape
`\'`, // Escaped quote
}
for _, seq := range sequences {
quote := byte('"')
if strings.HasPrefix(seq, `\'`) { quote = byte('\'') }
value, multibyte, _, err := strconv.UnquoteChar(seq, quote)
if err != nil {
fmt.Printf("Sequence: %s → Error: %v\n", seq, err)
continue
}
fmt.Printf("Sequence: %s → Character: %c, Multibyte: %t\n",
seq, value, multibyte)
}
}The quote parameter specifies which quote character to respect for termination:
func demonstrateQuoteParameter() {
samples := []struct{
s string
quote byte
note string
}{
{`\"`, '"', `\" allowed because quote = '"'`},
{`\'`, '\'', `\' allowed because quote = '\''`},
{`\'`, '"', `\' NOT allowed because quote = '"'`},
{`"`, '"', `raw quote " is not allowed unless escaped when quote = '"'`},
{`'`, '\'', `raw quote ' is not allowed unless escaped when quote = '\''`},
}
for _, x := range samples {
r, mb, tail, err := strconv.UnquoteChar(x.s, x.quote)
fmt.Printf("Input=%q quote=%q → r=%q mb=%t tail=%q err=%v // %s\n",
x.s, x.quote, r, mb, tail, err, x.note)
}
}Error handling with UnquoteChar() helps identify specific parsing issues:
func handleCharParsingErrors() {
problematicInputs := []string{
`\`, // Incomplete escape
`\z`, // Invalid escape character
`\u123`, // Incomplete Unicode escape
`\x1`, // Incomplete hex escape
}
for _, input := range problematicInputs {
_, _, _, err := strconv.UnquoteChar(input, '"')
if err != nil {
fmt.Printf("Input: %s → Error: %v\n", input, err)
}
}
}Extracting Quoted Content with QuotedPrefix()
The QuotedPrefix() identifies and extracts a valid quoted literal at the start of a string and returns the raw quoted text. It does not convert escapes, but it still validates the literal and will error on malformed quoting or bad escapes.
func demonstrateQuotedPrefix() {
examples := []string{
`"quoted text" followed by more`,
"`backtick quoted` with trailing data",
`"escaped \"quotes\" inside" plus extra`,
`"multiline\nstring" remainder`,
}
for _, example := range examples {
quoted, err := strconv.QuotedPrefix(example)
if err != nil {
fmt.Printf("Input: %s → Error: %v\n", example, err)
continue
}
remainder := example[len(quoted):]
fmt.Printf("Input: %s\n", example)
fmt.Printf("Quoted: %s\n", quoted)
fmt.Printf("Remainder: %s\n\n", remainder)
}
}This function proves particularly useful when parsing configuration files or command-line arguments where quoted strings might be mixed with other content:
func parseConfigLine(line string) (string, string, error) {
// Extract the quoted key
key, err := strconv.QuotedPrefix(line)
if err != nil {
return "", "", fmt.Errorf("invalid quoted key: %v", err)
}
// Skip past the key and any whitespace
remainder := strings.TrimSpace(line[len(key):])
// Look for assignment operator
if !strings.HasPrefix(remainder, "=") {
return "", "", fmt.Errorf("expected '=' after key")
}
// Extract the quoted value
valueStart := strings.TrimSpace(remainder[1:])
value, err := strconv.QuotedPrefix(valueStart)
if err != nil {
return "", "", fmt.Errorf("invalid quoted value: %v", err)
}
// Unquote both parts
unquotedKey, _ := strconv.Unquote(key)
unquotedValue, _ := strconv.Unquote(value)
return unquotedKey, unquotedValue, nil
}
func testConfigParsing() {
configLines := []string{
`"database_host" = "localhost"`,
`"port" = "5432"`,
`"password" = "secret\"value"`,
}
for _, line := range configLines {
key, value, err := parseConfigLine(line)
if err != nil {
fmt.Printf("Error parsing: %s → %v\n", line, err)
continue
}
fmt.Printf("Key: %s, Value: %s\n", key, value)
}
}The function handles nested quotes and complex escape sequences correctly:
func handleComplexQuoting() {
complexExamples := []string{
`"outer \"nested quotes\" content" trailing`,
`"JSON-like: {\"key\": \"value\"}" more data`,
`"path\\to\\file.txt" additional info`,
`"unicode: \u0048\u0065\u006C\u006C\u006F" suffix`,
}
for _, example := range complexExamples {
quoted, err := strconv.QuotedPrefix(example)
if err != nil {
fmt.Printf("Failed: %s → %v\n", example, err)
continue
}
// Show both the raw quoted string and its unquoted content
unquoted, unquoteErr := strconv.Unquote(quoted)
fmt.Printf("Raw quoted: %s\n", quoted)
if unquoteErr == nil {
fmt.Printf("Unquoted: %s\n", unquoted)
}
fmt.Printf("Remaining: %s\n\n", example[len(quoted):])
}
}Error conditions occur when the input doesn’t start with a valid quoted string:
func handleQuotedPrefixErrors() {
invalidInputs := []string{
`unquoted text`,
`"unterminated quote`,
`"invalid escape \z"`,
``, // empty string
}
for _, input := range invalidInputs {
_, err := strconv.QuotedPrefix(input)
if err != nil {
fmt.Printf("Input: %q → Error: %v\n", input, err)
}
}
}Backquote Compatibility with CanBackquote()
The CanBackquote() function determines whether a string can be safely represented using Go’s raw string literals (backticks) without requiring escape sequences. This is particularly useful when generating Go code or when you need to choose the most readable string representation.
func demonstrateCanBackquote() {
testStrings := []string{
"Simple ASCII text",
"Text with\nnewlines",
"Text with\ttabs",
"Text with `backticks` inside",
"Text with \"quotes\"",
"Text with 'apostrophes'",
"Unicode: héllo wörld",
"Control chars: \x00\x01\x02",
"Mixed: normal\x7Ftext",
}
for _, s := range testStrings {
canBackquote := strconv.CanBackquote(s)
fmt.Printf("String: %q\n", s)
fmt.Printf("Can use backticks: %t\n", canBackquote)
if canBackquote {
fmt.Printf("Raw literal: `%s`\n", s)
} else {
fmt.Printf("Needs quotes: %q\n", s)
}
fmt.Println()
}
}A practical application involves code generation where you want to produce the most readable string literals:
func generateStringLiteral(value string) string {
if strconv.CanBackquote(value) {
return fmt.Sprintf("`%s`", value)
}
return strconv.Quote(value)
}
func testCodeGeneration() {
values := []string{
"Hello, World!",
"Multi\nline\nstring",
"String with `backticks`",
"String with\x00null bytes",
"Unicode: 你好世界",
"Path\\to\\file",
}
fmt.Println("Generated Go string literals:")
for i, value := range values {
literal := generateStringLiteral(value)
fmt.Printf("const str%d = %s\n", i+1, literal)
}
}The function considers several factors when determining backquote compatibility:
func explainBackquoteRules() {
examples := map[string]string{
"Regular text": "✓ Safe for backticks",
"Text with\nnewlines": "✗ Contains control characters (newlines not allowed)",
"Text with `backticks`": "✗ Contains backticks",
"Text with\x00null": "✗ Contains null bytes",
"Text with\x7Fdel": "✗ Contains DEL character",
"Unicode: 世界": "✓ Unicode is fine",
"Text with\rcarriage returns": "✗ Contains carriage returns",
"Tabs\tare\tokay": "✓ Tabs are preserved",
}
for text, explanation := range examples {
canUse := strconv.CanBackquote(text)
status := "✗"
if canUse {
status = "✓"
}
fmt.Printf("%s %s → %s\n", status, explanation, text)
}
}When working with configuration or template systems, this function helps choose appropriate quoting:
type ConfigValue struct {
Key string
Value string
}
func formatConfig(configs []ConfigValue) {
for _, config := range configs {
keyLiteral := generateStringLiteral(config.Key)
valueLiteral := generateStringLiteral(config.Value)
fmt.Printf("config[%s] = %s\n", keyLiteral, valueLiteral)
}
}
func testConfigFormatting() {
configs := []ConfigValue{
{"database_url", "postgresql://localhost:5432/mydb"},
{"api_key", "abc123\ndef456"},
{"template", "Hello `{{.Name}}`!"},
{"multiline", "Line 1\nLine 2\nLine 3"},
}
fmt.Println("Formatted configuration:")
formatConfig(configs)
}The function also helps in debugging and logging scenarios:
func smartStringDisplay(s string) {
fmt.Printf("Original string: %q\n", s)
if strconv.CanBackquote(s) && !strings.Contains(s, "`") {
fmt.Printf("Raw string literal: `%s`\n", s)
fmt.Printf("Recommendation: Use backticks for better readability\n")
} else {
fmt.Printf("Quoted literal: %s\n", strconv.Quote(s))
fmt.Printf("Recommendation: Use quoted string due to special characters\n")
}
fmt.Println()
}Character Classification with IsGraphic() and IsPrint()
Go’s strconv package provides two essential functions for character classification: IsGraphic() and IsPrint(). These functions help determine whether Unicode characters are suitable for display and text processing operations.
func demonstrateCharacterClassification() {
testRunes := []rune{
'A', // Regular letter
'5', // Digit
' ', // Space
'\t', // Tab
'\n', // Newline
'\x00', // Null character
'\x7F', // DEL character
'世', // Chinese character
'🌟', // Emoji
'\u200B', // Zero-width space
'\u0301', // Combining acute accent
}
fmt.Printf("%-10s %-10s %-10s %s\n", "Character", "IsGraphic", "IsPrint", "Description")
fmt.Println(strings.Repeat("-", 50))
for _, r := range testRunes {
isGraphic := strconv.IsGraphic(r)
isPrint := strconv.IsPrint(r)
charDesc := getCharDescription(r)
fmt.Printf("%-10s %-10t %-10t %s\n",
string(r), isGraphic, isPrint, charDesc)
}
}
func getCharDescription(r rune) string {
switch {
case r == ' ':
return "Space"
case r == '\t':
return "Tab"
case r == '\n':
return "Newline"
case r == '\x00':
return "Null"
case r == '\x7F':
return "DEL"
case r < 32:
return fmt.Sprintf("Control (U+%04X)", r)
case unicode.IsLetter(r):
return "Letter"
case unicode.IsDigit(r):
return "Digit"
case unicode.IsSymbol(r):
return "Symbol"
default:
return fmt.Sprintf("Unicode (U+%04X)", r)
}
}The key difference between these functions is their treatment of space characters:
func compareGraphicVsPrint() {
spaceChars := []rune{
' ', // Regular space
'\t', // Tab
'\n', // Newline
'\r', // Carriage return
'\v', // Vertical tab
'\f', // Form feed
'\u00A0', // Non-breaking space
'\u2000', // En quad
'\u2028', // Line separator
}
fmt.Println("Space Character Analysis:")
fmt.Printf("%-15s %-10s %-10s\n", "Character", "IsGraphic", "IsPrint")
fmt.Println(strings.Repeat("-", 35))
for _, r := range spaceChars {
isGraphic := strconv.IsGraphic(r)
isPrint := strconv.IsPrint(r)
name := getSpaceName(r)
fmt.Printf("%-15s %-10t %-10t\n", name, isGraphic, isPrint)
}
}
func getSpaceName(r rune) string {
names := map[rune]string{
' ': "Space",
'\t': "Tab",
'\n': "Newline",
'\r': "Carriage Return",
'\v': "Vertical Tab",
'\f': "Form Feed",
'\u00A0': "Non-breaking Space",
'\u2000': "En Quad",
'\u2028': "Line Separator",
}
if name, exists := names[r]; exists {
return name
}
return fmt.Sprintf("U+%04X", r)
}These functions are particularly useful for text validation and sanitization:
func validateDisplayableText(input string) (bool, []string) {
var issues []string
allGraphic := true
for i, r := range input {
if !strconv.IsGraphic(r) {
allGraphic = false
issue := fmt.Sprintf("Non-graphic character at position %d: U+%04X", i, r)
issues = append(issues, issue)
}
}
return allGraphic, issues
}
func sanitizeForDisplay(input string) string {
var builder strings.Builder
for _, r := range input {
if strconv.IsPrint(r) {
builder.WriteRune(r)
} else {
// Replace non-printable characters with their Unicode representation
builder.WriteString(fmt.Sprintf("\\u%04X", r))
}
}
return builder.String()
}
func testTextProcessing() {
testInputs := []string{
"Normal text",
"Text with\ttabs and\nnewlines",
"Text with\x00null\x01characters",
"Unicode: 世界 🌟",
"Mixed: good\x7Fbad\x08text",
}
for _, input := range testInputs {
fmt.Printf("Input: %q\n", input)
isValid, issues := validateDisplayableText(input)
fmt.Printf("All graphic: %t\n", isValid)
if !isValid {
fmt.Printf("Issues: %v\n", issues)
sanitized := sanitizeForDisplay(input)
fmt.Printf("Sanitized: %q\n", sanitized)
}
fmt.Println()
}
}Practical applications include log processing and user input validation:
func processLogEntry(entry string) string {
// Remove or replace non-printable characters that might break log formatting
var cleaned strings.Builder
for _, r := range entry {
switch {
case strconv.IsPrint(r):
cleaned.WriteRune(r)
case r == '\n' || r == '\t':
// Preserve important whitespace but make it visible
cleaned.WriteString(fmt.Sprintf("\\%c", r))
default:
// Replace other control characters with hex representation
cleaned.WriteString(fmt.Sprintf("\\x%02X", r))
}
}
return cleaned.String()
}
func validateUserInput(input string) error {
for i, r := range input {
if !strconv.IsGraphic(r) && r != ' ' {
return fmt.Errorf("invalid character at position %d: non-graphic character U+%04X", i, r)
}
}
return nil
}
func testInputValidation() {
inputs := []string{
"Valid user input",
"Input with\ttab",
"Input with\x00null",
"Input with 世界",
"Input with\nnewline",
}
for _, input := range inputs {
fmt.Printf("Testing: %q\n", input)
if err := validateUserInput(input); err != nil {
fmt.Printf(" Validation failed: %v\n", err)
fmt.Printf(" Processed: %q\n", processLogEntry(input))
} else {
fmt.Printf(" Validation passed\n")
}
fmt.Println()
}
}Numeric Parsing Errors with NumError
The strconv package uses the NumError struct to provide detailed information about parsing failures. Understanding this error type and implementing robust error handling patterns ensures your applications can gracefully handle malformed input and provide meaningful feedback to users.
func demonstrateNumError() {
tests := []string{
"12.34",
"NaN",
"1.2.3",
"9e9999999", // overflow
}
for _, s := range tests {
if _, err := strconv.ParseFloat(s, 64); err != nil {
if numErr, ok := err.(*strconv.NumError); ok {
fmt.Printf("Input: %q\n", s)
fmt.Printf(" Func: %s\n", numErr.Func) // "ParseFloat"
fmt.Printf(" Num: %s\n", numErr.Num)
fmt.Printf(" Err: %v\n\n", numErr.Err)
continue
}
fmt.Printf("Input: %q → Other error: %v\n\n", s, err)
}
}
}Building robust parsers requires comprehensive error handling strategies:
type ParseResult struct {
Value string
Error error
Position int
}
func parseQuotedStrings(input string) []ParseResult {
var results []ParseResult
position := 0
for position < len(input) {
// Skip whitespace
for position < len(input) && unicode.IsSpace(rune(input[position])) {
position++
}
if position >= len(input) {
break
}
// Try to find a quoted string
remaining := input[position:]
quoted, err := strconv.QuotedPrefix(remaining)
if err != nil {
// Record the error with position information
results = append(results, ParseResult{
Value: "",
Error: fmt.Errorf("position %d: %v", position, err),
Position: position,
})
// Skip to next potential quote
position++
continue
}
// Unquote the found string
unquoted, unquoteErr := strconv.Unquote(quoted)
results = append(results, ParseResult{
Value: unquoted,
Error: unquoteErr,
Position: position,
})
position += len(quoted)
}
return results
}
func testComprehensiveParsing() {
testInput := `"valid" "another valid" "invalid escape \z" 'single quoted' "unterminated`
results := parseQuotedStrings(testInput)
fmt.Printf("Parsing input: %s\n\n", testInput)
for i, result := range results {
fmt.Printf("Result %d (position %d):\n", i+1, result.Position)
if result.Error != nil {
fmt.Printf(" Error: %v\n", result.Error)
} else {
fmt.Printf(" Value: %q\n", result.Value)
}
fmt.Println()
}
}Advanced error recovery techniques help maintain parsing continuity:
func recoveringParser(input string) ([]string, []error) {
var values []string
var errors []error
lines := strings.Split(input, "\n")
for lineNum, line := range lines {
line = strings.TrimSpace(line)
if line == "" {
continue
}
// Try multiple parsing strategies
if value, err := attemptParsing(line); err != nil {
errors = append(errors, fmt.Errorf("line %d: %v", lineNum+1, err))
// Attempt recovery by trying different quote styles
if recovered := attemptRecovery(line); recovered != "" {
values = append(values, recovered)
errors = append(errors, fmt.Errorf("line %d: recovered using fallback parsing", lineNum+1))
}
} else {
values = append(values, value)
}
}
return values, errors
}
func attemptParsing(line string) (string, error) {
// Try QuotedPrefix first
if quoted, err := strconv.QuotedPrefix(line); err == nil {
return strconv.Unquote(quoted)
}
// If that fails, check if it's a simple unquoted string
if strconv.CanBackquote(line) {
return line, nil
}
return "", fmt.Errorf("cannot parse as quoted or unquoted string")
}
func attemptRecovery(line string) string {
// Try to salvage content by removing problematic characters
cleaned := strings.Map(func(r rune) rune {
if strconv.IsGraphic(r) || r == ' ' {
return r
}
return -1 // Remove character
}, line)
// If the cleaned version can be backquoted, use it
if strconv.CanBackquote(cleaned) {
return cleaned
}
return ""
}Production-ready error handling includes logging and user-friendly messages:
type QuoteParser struct {
logger func(string, ...interface{})
}
func NewQuoteParser(logger func(string, ...interface{})) *QuoteParser {
return &QuoteParser{logger: logger}
}
func (qp *QuoteParser) ParseConfig(filename string, content string) (map[string]string, error) {
config := make(map[string]string)
lines := strings.Split(content, "\n")
var parseErrors []string
for lineNum, line := range lines {
line = strings.TrimSpace(line)
if line == "" || strings.HasPrefix(line, "#") {
continue
}
key, value, err := qp.parseConfigLine(line, lineNum+1)
if err != nil {
errorMsg := fmt.Sprintf("%s:%d: %v", filename, lineNum+1, err)
parseErrors = append(parseErrors, errorMsg)
qp.logger("Parse error: %s", errorMsg)
continue
}
config[key] = value
qp.logger("Parsed config: %s = %s", key, value)
}
if len(parseErrors) > 0 {
return config, fmt.Errorf("parsing failed with %d errors:\n%s",
len(parseErrors), strings.Join(parseErrors, "\n"))
}
return config, nil
}
func (qp *QuoteParser) parseConfigLine(line string, lineNum int) (string, string, error) {
parts := strings.SplitN(line, "=", 2)
if len(parts) != 2 {
return "", "", fmt.Errorf("expected KEY=VALUE format")
}
keyPart := strings.TrimSpace(parts[0])
valuePart := strings.TrimSpace(parts[1])
// Parse key
key, err := qp.parseValue(keyPart, "key")
if err != nil {
return "", "", err
}
// Parse value
value, err := qp.parseValue(valuePart, "value")
if err != nil {
return "", "", err
}
return key, value, nil
}
func (qp *QuoteParser) parseValue(input, context string) (string, error) {
// Try quoted string first
if quoted, err := strconv.QuotedPrefix(input); err == nil {
if unquoted, unquoteErr := strconv.Unquote(quoted); unquoteErr == nil {
return unquoted, nil
} else {
return "", fmt.Errorf("invalid %s quoted string: %v", context, unquoteErr)
}
}
// Try unquoted string
if strconv.CanBackquote(input) {
return input, nil
}
return "", fmt.Errorf("invalid %s: contains non-printable characters", context)
}
func testProductionParser() {
logger := func(format string, args ...interface{}) {
fmt.Printf("[LOG] "+format+"\n", args...)
}
parser := NewQuoteParser(logger)
configContent := `
# Configuration file
name = "My Application"
version = "1.0.0"
debug = true
database_url = "postgresql://user:pass@localhost/db"
invalid_line_missing_equals
api_key = "secret\nkey"
path = "/home/user/app"
`
config, err := parser.ParseConfig("config.txt", configContent)
fmt.Println("Parsed configuration:")
for key, value := range config {
fmt.Printf(" %s: %q\n", key, value)
}
if err != nil {
fmt.Printf("\nErrors encountered:\n%v\n", err)
}
}This completes our comprehensive guide to Go’s string unquoting and character utility functions. The strconv package provides robust tools for parsing quoted strings, analyzing characters, and handling errors gracefully. By combining these utilities with proper error handling patterns, you can build reliable text processing applications that handle edge cases and provide meaningful feedback when parsing fails.
